def test_parse(self): file = StringIO(""" #=GENOME_DIFF 1.0 #=AUTHOR test SNP 1 23423 NC_000913 223 A gene_name=mhpE RA 2 NC_000913 223 0 G A frequency=0.1366 """.strip()) p = GenomeDiffParser(fsock=file) self.assertEqual([ Metadata('GENOME_DIFF', '1.0'), Metadata('AUTHOR', 'test'), Record('SNP', 1, parent_ids=[23423], new_seq='A', seq_id='NC_000913', position=223, gene_name='mhpE'), Record('RA', 2, new_base='A', frequency=0.1366, position=223, seq_id='NC_000913', insert_position=0, ref_base='G') ], list(p))
def test_document(self): file = StringIO(""" #=GENOME_DIFF 1.0 #=AUTHOR test SNP 1 23423 NC_000913 223 A RA 2 NC_000913 223 0 G A """.strip()) document = GenomeDiff.read(file) self.assertEqual({ 'AUTHOR': 'test', 'GENOME_DIFF': '1.0' }, document.metadata) snp_record = Record('SNP', 1, document, [23423], seq_id='NC_000913', new_seq='A', position=223) ra_record = Record('RA', 2, document, None, position=223, seq_id='NC_000913', insert_position=0, new_base='A', ref_base='G') self.assertEqual([snp_record], document.mutations) self.assertEqual([ra_record], document.evidence) self.assertEqual(snp_record, document[1]) self.assertEqual(ra_record, document[2])
def test_simple(self): snp_record = Record('SNP', 1, parent_ids=[23423], seq_id='NC_000913', new_seq='A', position=223, test='more') self.assertEqual('SNP', snp_record.type) self.assertEqual(1, snp_record.id) self.assertEqual('A', snp_record.new_seq) self.assertEqual('more', snp_record.test)
def __iter__(self): metadata_pattern = re.compile(r'^#=(\w+)\s+(.*)$') mutation_pattern = re.compile(r'^(?P<type>[A-Z]{2,4})' '\t(?P<id>\d+)' '\t((?P<parent_ids>\d+(,\s*\d+)*)|\.?)' '\t(?P<extra>.+)?$') for i, line in enumerate(self._fsock): if not line: continue elif line.startswith('#'): match = metadata_pattern.match(line) if match: yield Metadata(*match.group(1, 2)) else: match = mutation_pattern.match(line) if match: type = match.group('type') id = int(match.group('id')) parent_ids = match.group('parent_ids') if parent_ids: parent_ids = [int(id) for id in parent_ids.split(',')] extra = match.group('extra').split('\t') extra_dct = OrderedDict() for name in TYPE_SPECIFIC_FIELDS[type]: value = extra.pop(0) extra_dct[name] = self._convert_value(value) for k, v in (e.split('=', 1) for e in extra): extra_dct[k] = self._convert_value(v) yield Record(type, id, self._document, parent_ids, **extra_dct) else: raise Exception('Could not parse line #{}: {}'.format( i, line))