def main(open_name_file, dir_path,l):

    DNA.generate_kmer_hash(1)

    groups = read_parsed_taxonomy_file(open_name_file)

    # Read in the FASTA files for each genome
    read_FASTA_files(groups,dir_path)

    # For each bin, generate a number of contigs, 
    all_scores = []
    id_generator = Uniq_id(1000)
    for group_index,group in enumerate(groups):
        for genome in group.genomes:
            parts = genome.split_seq(l)
            print_parts(parts,sys.stdout, id_generator, genome)
Beispiel #2
0
 def test_print_parts(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     read_FASTA_files(groups, dir_path)
     uniq_id = Uniq_id(10)
     with tempfile.NamedTemporaryFile() as tmp_file:
         for group_index, group in enumerate(groups):
             for genome in group.genomes:
                 parts = genome.split_seq(10000)
                 print_parts(parts,tmp_file,uniq_id, genome)
         tmp_file.seek(0)
         genome_parts = list(SeqIO.parse(tmp_file,"fasta"))
         assert_equal(len(genome_parts),1788)
         assert_equal(genome_parts[0].id,"Ehrlichia_canis_Jake_uid58071_10_0")