def test_generate_group_contigs(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     read_FASTA_files(groups, dir_path)
     uniq_id = Uniq_id(10)
     group = groups[-1]
     s_set = SampleSetting("genomes",20,\
                               1100,1100,\
                               True)
     sg = SampleGroup(s_set, group, uniq_id)
     sg.generate_group_contigs()
     
     assert_equal(len(sg.group.genomes[-1].contigs[-1].full_seq),1100)
     assert_equal(len(group.genomes[-1].contigs[-1].full_seq),1100)
     assert_equal(len(group.genomes[-1].contigs)+len(group.genomes[0].contigs), 20)
예제 #2
0
def main(open_name_file, dir_path, x_set, start_position=False):

    try:
        DNA.generate_kmer_hash(2)
    except:
        pass

    groups = read_parsed_taxonomy_file(open_name_file)

    # Read in the FASTA files for each genome
    read_FASTA_files(groups,dir_path)

    # For each bin, generate a number of contigs, 
    all_scores = []
    id_generator = Uniq_id(1000)
    for group_index in range(len(groups)):
        group = groups[group_index]
        sg = SampleGroup(x_set, group, id_generator)
        sg.generate_group_contigs(start_position=start_position)
        sg.print_group_contigs(sys.stdout,start_position=start_position)
예제 #3
0
 def test_print_group_contigs(self):
     cur_dir = os.path.dirname(__file__)
     parsed_file_name = os.path.join(cur_dir,"fixtures/parsed_gen_2_2_test.txt")
     open_file = open(parsed_file_name, 'r')
     groups = read_parsed_taxonomy_file(open_file)
     dir_path = os.path.join(cur_dir,"fixtures/reference_genomes")
     read_FASTA_files(groups, dir_path)
     uniq_id = Uniq_id(10)
     group = groups[-1]
     s_set = SampleSetting("genomes",10,\
                               1100,1100,\
                               True)
     sg = SampleGroup(s_set, group, uniq_id)
     sg.generate_group_contigs()
     
     with tempfile.NamedTemporaryFile() as tmp_file:
         sg.print_group_contigs(tmp_file)
         tmp_file.seek(0)
         contig_seqs = list(SeqIO.parse(tmp_file, "fasta"))
         assert_equal(len(contig_seqs),10)
         assert_equal(contig_seqs[0].id, "Capnocytophaga_canimorsus_Cc5_uid70727_10")
         d_string = "Capnocytophaga_canimorsus_Cc5_uid70727_10 Flavobacteriaceae|Capnocytophaga|Capnocytophaga canimorsus"
         assert_equal(contig_seqs[0].description, d_string)