def test_map_chromosomes(self): stream = fstream([('chrIV',1),('IV',2),(2780,3),('NC_001136.9',4),('sth',5)], fields=['chr','start']) assembly = genrep.Assembly('sacCer2') res = list(map_chromosomes(stream, assembly.chromosomes, keep=True)) expected = [('chrIV',1),('chrIV',2),('chrIV',3),('chrIV',4),('sth',5)] self.assertListEqual(res, expected) # keep=False stream = fstream([('chrIV',1),('IV',2),(2780,3),('NC_001136.9',4),('sth',5)], fields=['chr','start']) res = list(map_chromosomes(stream, assembly.chromosomes, keep=False)) self.assertListEqual(res, expected[:-1])
def test_map_chromosomes(self): stream = fstream([('chrIV', 1), ('IV', 2), (2780, 3), ('NC_001136.9', 4), ('sth', 5)], fields=['chr', 'start']) assembly = genrep.Assembly('sacCer2') res = list(map_chromosomes(stream, assembly.chromosomes, keep=True)) expected = [('chrIV', 1), ('chrIV', 2), ('chrIV', 3), ('chrIV', 4), ('sth', 5)] self.assertListEqual(res, expected) # keep=False stream = fstream([('chrIV', 1), ('IV', 2), (2780, 3), ('NC_001136.9', 4), ('sth', 5)], fields=['chr', 'start']) res = list(map_chromosomes(stream, assembly.chromosomes, keep=False)) self.assertListEqual(res, expected[:-1])
def convert_junc_file(self, filename): """Convert a .junc SOAPsplice output file to bed format. Return the file name. :param filename: (str) name of the .junc file to convert. """ t = track(filename, format='txt', fields=['chr','start','end','strand','score'], chrmeta=self.assembly.chrmeta) stream = t.read() # Translate chromosome names s1 = map_chromosomes(stream, self.assembly.chromosomes) # Add junction IDs s2 = duplicate(s1,'strand','name') C = itertools.count() s3 = apply(s2,'name', lambda x: 'junction'+str(C.next())) # Convert to bed format outfile = unique_filename_in() bed = outfile + '.bed' out = track(bed, fields=s3.fields, chrmeta=self.assembly.chrmeta) out.write(s3) return bed
def convert_junc_file(self, filename): """Convert a .junc SOAPsplice output file to bed format. Return the file name. :param filename: (str) name of the .junc file to convert. """ t = track(filename, format='txt', fields=['chr', 'start', 'end', 'strand', 'score'], chrmeta=self.assembly.chrmeta) stream = t.read() # Translate chromosome names s1 = map_chromosomes(stream, self.assembly.chromosomes) # Add junction IDs s2 = duplicate(s1, 'strand', 'name') C = itertools.count() s3 = apply(s2, 'name', lambda x: 'junction' + str(C.next())) # Convert to bed format outfile = unique_filename_in() bed = outfile + '.bed' out = track(bed, fields=s3.fields, chrmeta=self.assembly.chrmeta) out.write(s3) return bed