def test_parse_contig_descritpion(self): id_string = "my_genome_name_uid123_100 my_family|my_genus|my_genus my_species" contig_id_hash = parse_contig_description(id_string) assert_equal("my_genome_name_uid123", contig_id_hash["genome"]) assert_equal(100, contig_id_hash["contig_id"]) assert_equal("my_family", contig_id_hash["family"]) assert_equal("my_genus", contig_id_hash["genus"]) assert_equal("my_genus my_species", contig_id_hash["species"])
def read_contigs_file(open_contigs_file, start_position=False,taxonomy_info=True): """ Read contigs file generated by generate_contigs script""" contigs = [] seqs = list(SeqIO.parse(open_contigs_file, "fasta")) for seq in seqs: if taxonomy_info: contig_id_hash = parse_contig_description(seq.description, start_position=start_position) contig = DNA(id=contig_id_hash["genome"], seq=str(seq.seq)) if start_position: contig.start_position = contig_id_hash["start_position"] contig.family = contig_id_hash["family"] contig.genus = contig_id_hash["genus"] contig.species = contig_id_hash["species"] contig.contig_id = contig_id_hash["contig_id"] else: contig = DNA(id=seq.id,seq=str(seq.seq)) contig.contig_id = seq.id contigs.append(contig) return contigs