예제 #1
0
 def test_parse_contig_descritpion(self):
     id_string = "my_genome_name_uid123_100 my_family|my_genus|my_genus my_species"
     contig_id_hash = parse_contig_description(id_string)
     assert_equal("my_genome_name_uid123", contig_id_hash["genome"])
     assert_equal(100, contig_id_hash["contig_id"])
     assert_equal("my_family", contig_id_hash["family"])
     assert_equal("my_genus", contig_id_hash["genus"])
     assert_equal("my_genus my_species", contig_id_hash["species"])
예제 #2
0
def read_contigs_file(open_contigs_file, start_position=False,taxonomy_info=True):
    """ Read contigs file generated by generate_contigs script"""
    
    contigs = []
    seqs = list(SeqIO.parse(open_contigs_file, "fasta"))
    for seq in seqs:
        if taxonomy_info:
            contig_id_hash = parse_contig_description(seq.description, start_position=start_position)
            contig = DNA(id=contig_id_hash["genome"], seq=str(seq.seq))
            if start_position:
                contig.start_position = contig_id_hash["start_position"]
            contig.family = contig_id_hash["family"]
            contig.genus = contig_id_hash["genus"]
            contig.species = contig_id_hash["species"]
            contig.contig_id = contig_id_hash["contig_id"]
        else:
            contig = DNA(id=seq.id,seq=str(seq.seq))
            contig.contig_id = seq.id
        contigs.append(contig)

    return contigs