def test_run_chain_pick_otus(self): """run_chain_pick_otus runs serially without error""" run_chain_pick_otus(self.fna_original_fp, self.wf_out,\ call_commands_serially, \ self.params, self.qiime_config, False,\ no_status_updates) #load the exact match OTUs and check if they are valid exact_otus_fp=join(self.wf_out,'pick_otus_exact',\ 'test_otus.txt') obs_exact_otus=open(exact_otus_fp).read() self.assertEqual(obs_exact_otus,exp_exact_otus) #load the uclust_ref picked OTUs and check if they are valid uclust_ref_otus_fp=join(self.wf_out,'picked_otus_UCLUST_REF_97', 'leftover_otus.txt') obs_uclust_ref_otus=open(uclust_ref_otus_fp).read() self.assertEqual(obs_uclust_ref_otus,exp_uclust_ref_otus) #load the merged OTUs and check if they are valid all_otus_fp=uclust_ref_otus_fp=join(self.wf_out, 'exact_uclust_ref_otus.txt') obs_all_otus=open(all_otus_fp).read() self.assertEqual(obs_all_otus,exp_all_otus) #load the sample failures and check if they are valid otus_failures_fp=join(self.wf_out, 'all_failures.txt') obs_otu_failures=open(otus_failures_fp).read() self.assertEqual(obs_otu_failures,exp_otu_failures) #load the otu table and check if they are valid otus_table_fp=join(self.wf_out, 'exact_uclust_ref_otu_table.txt') obs_otu_table=open(otus_table_fp).read() self.assertEqual(obs_otu_table,exp_otu_table) # Check that the log file is created and has size > 0 log_fp = glob(join(self.wf_out,'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def test_submit_processed_data_to_db_fasta(self): """submit_processed_data_to_db_fasta runs without error""" run_process_fasta_through_split_lib(0,'Fasting_subset',\ input_fp=','.join(self.fasta_fps),\ mapping_fp=self.fasta_map_fp,\ output_dir=self.wf_out, \ command_handler=call_commands_serially,\ params=self.params,\ qiime_config=self.qiime_config,\ write_to_all_fasta=False,\ status_update_callback=no_status_updates) input_file_basename = splitext(split(self.sff_fp)[1])[0] otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt') split_lib_seqs_fp = join(self.wf_out,'split_libraries',\ 'seqs.fna') run_chain_pick_otus(split_lib_seqs_fp, output_dir=self.gg_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,parallel=True, status_update_callback=no_status_updates) input_fname = splitext(split(self.sff_fp)[-1])[0] db_input_fp = join(self.wf_out,input_fname) analysis_id=submit_fasta_and_split_lib(data_access, ','.join(self.fasta_fps), 0, self.wf_out) load_otu_mapping(data_access,self.wf_out,analysis_id) print 'Analysis ID is: %s' % str(analysis_id) print 'Testing the SEQ_RUN loading!' exp_sff_md5=['412eee0be168a285415d9e4db3dbbf2f'] exp_num_seqs=22 exp_instr_code='FASTA' exp_sff_fname=['test_split_lib_seqs'] con = data_access.getSFFDatabaseConnection() cur = con.cursor() seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum, h.instrument_code from analysis j inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id inner join sff_file f on f.sff_file_id=s.sff_file_id inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join sequencing_run h on h.seq_run_id=s.seq_run_id""" seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id)) results = cur.execute(seq_run_info) #print 'Calling getTestFlowData...' for data in results: obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\ obs_instrument_code = data print 'After getTestSeqRunData...' self.assertTrue(obs_sff_filename in exp_sff_fname) self.assertEqual(obs_num_of_reads,exp_num_seqs) self.assertTrue(obs_sff_md5 in exp_sff_md5) self.assertEqual(obs_instrument_code,exp_instr_code) print 'Done testing SEQ_RUN!' print 'Testing Split-Library Data' exp_split_lib_seq='TTGGGCCGTGTCTCAGTCCCAATGTGGCCGATCAGTCTCTTAACTCGGCTATGCATCATTGCCTTGGTAAGCCGTTACCTTACCAACTAGCTAATGCACCGCAGGTCCATCCAAGAGTGATAGCAGAACCATCTTTCAAACTCTAGACATGCGTCTAGTGTTGTTATCCGGTATTAGCATCTGTTTCCAGGTGTTATCCCAGTCTCTTGGG' exp_split_lib_md5='412eee0be168a285415d9e4db3dbbf2f' exp_split_lib_seq_md5='59843d3394983f2caa26f583014a3389' split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum, s.sequence_string,s.md5_checksum from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id inner join split_library_run l on j.split_library_run_id=l.split_library_run_id""" split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id)) results = cur.execute(split_lib_info) #print 'Calling getTestFlowData...' for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\ obs_split_lib_seq,obs_split_lib_seq_md5 = data self.assertEqual(obs_split_lib_md5,exp_split_lib_md5) self.assertEqual(obs_split_lib_seq,exp_split_lib_seq) self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5) print 'Testing OTU Data!' #exp_prokmsa=97550 exp_otu_md5='cec9b6c184ffdb12d9de4450034ab775' exp_threshold=97 otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id, ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file, p.threshold from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id""" otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id)) results = cur.execute(otu_info) for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\ obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\ obs_otu_md5,obs_threshold = data #self.assertEqual(obs_prokmsa,exp_prokmsa) self.assertEqual(obs_otu_md5,exp_otu_md5) self.assertEqual(obs_threshold,exp_threshold) otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id""" otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx635.281531\'" % (str(analysis_id)) results = cur.execute(otu_fail_info) for data in results: obs_seq_run_id,obs_ssu_id= data self.failIfEqual(obs_seq_run_id,0) self.failIfEqual(obs_ssu_id,0) valid=data_access.deleteTestAnalysis(True,analysis_id) if not valid: print "Error: Could not delete data from DB!"
def test_submit_processed_data_to_db_illumina(self): """run_process_illumina_through_pick_otus runs without error""" run_process_illumina_through_split_lib(0,'Fasting_subset',\ input_fp=','.join(self.illumina_fps),\ mapping_fp=self.illumina_map_fp,\ output_dir=self.wf_out, \ command_handler=call_commands_serially,\ params=self.params,\ qiime_config=self.qiime_config,\ write_to_all_fasta=False,\ status_update_callback=no_status_updates) input_file_basename = splitext(split(self.sff_fp)[1])[0] otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt') split_lib_seqs_fp = join(self.wf_out,'split_libraries',\ 'seqs.fna') run_chain_pick_otus(split_lib_seqs_fp, output_dir=self.gg_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,parallel=True, status_update_callback=no_status_updates) input_fname = splitext(split(self.sff_fp)[-1])[0] db_input_fp = join(self.wf_out,input_fname) analysis_id=submit_illumina_and_split_lib(data_access, ','.join(self.illumina_fps), 0, self.wf_out) load_otu_mapping(data_access,self.wf_out,analysis_id) print 'Analysis ID is: %s' % str(analysis_id) print 'Testing the SEQ_RUN loading!' exp_sff_md5=['2b14442f7df4d06ac1e241816bf3ce4a','53181ca3427e5b4ce28a6b13cb3b98dd'] exp_num_seqs=100 exp_instr_code='ILLUMINA' exp_sff_fname=['s_8_2_sequence_100_records','s_8_1_sequence_100_records'] con = data_access.getSFFDatabaseConnection() cur = con.cursor() seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum, h.instrument_code from analysis j inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id inner join sff_file f on f.sff_file_id=s.sff_file_id inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join sequencing_run h on h.seq_run_id=s.seq_run_id""" seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id)) results = cur.execute(seq_run_info) #print 'Calling getTestFlowData...' for data in results: obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\ obs_instrument_code = data print 'After getTestSeqRunData...' self.assertTrue(obs_sff_filename in exp_sff_fname) self.assertEqual(obs_num_of_reads,exp_num_seqs) self.assertTrue(obs_sff_md5 in exp_sff_md5) self.assertEqual(obs_instrument_code,exp_instr_code) print 'Done testing SEQ_RUN!' print 'Testing Split-Library Data' exp_split_lib_seq='TACGAAGGGAGCTAGCGTTATTCGGAATGATTGGGTGTAAAGAGTTTGTAGATTGCAAAATTTTTGTTATTAGTAAAAAATTGAATTTATTATTTAAAGATGCTTTTAATACAATTTTGCTTGAGTATAGTAGAGGAAAAT' exp_split_lib_md5='1443e25614090e660b003c5774ed4cba' exp_split_lib_seq_md5='7e8278ef1f5561d997cad48eabe40847' split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum, s.sequence_string,s.md5_checksum from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id inner join split_library_run l on j.split_library_run_id=l.split_library_run_id""" split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id)) results = cur.execute(split_lib_info) #print 'Calling getTestFlowData...' for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\ obs_split_lib_seq,obs_split_lib_seq_md5 = data self.assertEqual(obs_split_lib_md5,exp_split_lib_md5) self.assertEqual(obs_split_lib_seq,exp_split_lib_seq) self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5) print 'Testing OTU Data!' #exp_prokmsa=97550 exp_otu_md5='56222e11026575d9850009768c0b8885' exp_threshold=97 otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id, ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file, p.threshold from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id""" otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'SSBH05July07\'" % (str(analysis_id)) results = cur.execute(otu_info) for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\ obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\ obs_otu_md5,obs_threshold = data #self.assertEqual(obs_prokmsa,exp_prokmsa) self.assertEqual(obs_otu_md5,exp_otu_md5) self.assertEqual(obs_threshold,exp_threshold) otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id""" otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id)) results = cur.execute(otu_fail_info) for data in results: obs_seq_run_id,obs_ssu_id= data self.failIfEqual(obs_seq_run_id,0) self.failIfEqual(obs_ssu_id,0) valid=data_access.deleteTestAnalysis(True,analysis_id) if not valid: print "Error: Could not delete data from DB!"
def test_submit_processed_data_to_db(self): """run_process_sff_through_pick_otus runs without error""" run_process_sff_through_split_lib(0,'Fasting_subset', sff_input_fp=self.sff_fp, mapping_fp=self.fasting_mapping_fp, output_dir=self.wf_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,convert_to_flx=False, write_to_all_fasta=False, status_update_callback=no_status_updates) input_file_basename = splitext(split(self.sff_fp)[1])[0] otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt') split_lib_seqs_fp = join(self.wf_out,'split_libraries',\ 'seqs.fna') run_chain_pick_otus(split_lib_seqs_fp, output_dir=self.gg_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,parallel=False, status_update_callback=no_status_updates) input_fname = splitext(split(self.sff_fp)[-1])[0] db_input_fp = join(self.wf_out,input_fname) analysis_id=submit_sff_and_split_lib(data_access,\ db_input_fp+'.fna',0) load_otu_mapping(data_access,self.wf_out,analysis_id) print 'Analysis ID is: %s' % str(analysis_id) print 'Testing the FLOW_DATA loading!' exp_sff_md5='314f4000857668d45a413d2e94a755fc' exp_num_seqs=22 exp_read_id='FLP3FBN01ELBSX' exp_instr_code='GS FLX' exp_sff_fname='Fasting_subset' con = data_access.getSFFDatabaseConnection() cur = con.cursor() seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum, h.instrument_code from analysis j inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id inner join sff_file f on f.sff_file_id=s.sff_file_id inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join sequencing_run h on h.seq_run_id=s.seq_run_id""" seq_run_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id)) results = cur.execute(seq_run_info) #print 'Calling getTestFlowData...' for data in results: obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\ obs_instrument_code = data self.assertEqual(obs_sff_filename,exp_sff_fname) self.assertEqual(obs_num_of_reads,exp_num_seqs) self.assertEqual(obs_sff_md5,exp_sff_md5) self.assertEqual(obs_instrument_code,exp_instr_code) ''' print 'After getTestFlowData...' #print 'Calling getTestFlowData...' obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\ obs_instrument_code,obs_read_id,obs_read_seq,obs_flow_string,\ obs_qual_string = data_access.getTestFlowData(True,analysis_id, 'test.PCx634_1') #print 'After getTestFlowData...' self.assertEqual(obs_sff_filename,exp_sff_fname) self.assertEqual(obs_num_of_reads,exp_num_seqs) self.assertEqual(obs_sff_md5,exp_sff_md5) self.assertEqual(obs_instrument_code,exp_instr_code) self.assertEqual(obs_read_id,exp_read_id) self.assertEqual(obs_read_seq,exp_read_seq) self.assertEqual(str(obs_flow_string),exp_flow_string) self.assertEqual(str(obs_qual_string),exp_qual_string) print 'Done testing Flow_Data!' ''' print 'Testing Split-Library Data' exp_split_lib_seq='CTGGGCCGTGTCTCAGTCCCAATGTGGCCGTTTACCCTCTCAGGCCGGCTACGCATCATCGCCTTGGTGGGCCGTTACCTCACCAACTAGCTAATGCGCCGCAGGTCCATCCATGTTCACGCCTTGATGGGCGCTTTAATATACTGAGCATGCGCTCTGTATACCTATCCGGTTTTAGCTACCGTTTCCAGCAGTTATCCCGGACACATGGGCTAGG' exp_split_lib_md5='2c67e0acf745bef73e26c36f0b3bd00a' exp_split_lib_seq_md5='008918f7469f8e33d5dd6e01075d5194' split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum, s.sequence_string,s.md5_checksum from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id inner join split_library_run l on j.split_library_run_id=l.split_library_run_id""" split_lib_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id)) results = cur.execute(split_lib_info) #print 'Calling getTestFlowData...' for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\ obs_split_lib_seq,obs_split_lib_seq_md5 = data self.assertEqual(obs_split_lib_md5,exp_split_lib_md5) self.assertEqual(obs_split_lib_seq,exp_split_lib_seq) self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5) ''' obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\ obs_split_lib_seq,obs_split_lib_seq_md5 = \ data_access.getTestSplitLibData(True,analysis_id, 'test.PCx634_1') self.assertEqual(obs_split_lib_md5,exp_split_lib_md5) self.assertEqual(obs_split_lib_seq,exp_split_lib_seq) self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5) ''' print 'Testing OTU Data!' #exp_prokmsa=83669 exp_otu_md5='0b8edcf8a4275730001877496b41cf55' exp_threshold=97 otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id, ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file, p.threshold from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id""" otu_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_2\'" % (str(analysis_id)) results = cur.execute(otu_info) for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\ obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\ obs_otu_md5,obs_threshold = data ''' obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\ obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\ obs_otu_md5,obs_threshold = \ data_access.getTestOTUData(True,analysis_id, 'test.PCx634_2') ''' #self.assertEqual(obs_prokmsa,exp_prokmsa) self.assertEqual(obs_otu_md5,exp_otu_md5) self.assertEqual(obs_threshold,exp_threshold) otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id""" otu_fail_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_14\'" % (str(analysis_id)) results = cur.execute(otu_fail_info) for data in results: obs_seq_run_id,obs_ssu_id= data ''' obs_seq_run_id,obs_ssu_id = \ data_access.getTestOTUFailureData(True,analysis_id, 'test.PCx634_14') ''' self.failIfEqual(obs_seq_run_id,0) self.failIfEqual(obs_ssu_id,0) valid=data_access.deleteTestAnalysis(True,analysis_id) if not valid: print "Error: Could not delete data from DB!"
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) fasta_file = opts.split_lib_seqs verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel output_dir=opts.output_dir if output_dir: if exists(output_dir): dir_path=output_dir else: try: mkdir(output_dir) dir_path=output_dir except OSError: pass else: dir_path='./' if parallel: raise_error_on_parallel_unavailable() try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp try: makedirs(dir_path) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = web_app_call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates new_output_dir=join(dir_path,'chain_picked_otus') create_dir(new_output_dir) run_chain_pick_otus(fasta_file=fasta_file,\ output_dir=new_output_dir,\ command_handler=command_handler,\ params=parse_qiime_parameters(parameter_f),\ qiime_config=qiime_config,\ parallel=parallel,\ status_update_callback=status_update_callback)
def test_submit_processed_data_to_db_illumina(self): """run_process_illumina_through_pick_otus runs without error""" self.files_to_remove.append(join(self.wf_out,'s8_map_incomplete.txt')) # process the sequence data first before loading run_process_illumina_through_split_lib(0,'Fasting_subset',\ input_fp=','.join(self.illumina_fps),\ mapping_fp=self.illumina_map_fp,\ output_dir=self.wf_out, \ command_handler=call_commands_serially,\ params=self.params,\ qiime_config=self.qiime_config,\ write_to_all_fasta=False,\ status_update_callback=no_status_updates) # get the filepaths of key files input_file_basename = splitext(split(self.sff_fp)[1])[0] otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt') split_lib_seqs_fp = join(self.wf_out,'split_libraries',\ 'seqs.fna') # run chained OTU-picking run_chain_pick_otus(split_lib_seqs_fp, output_dir=self.gg_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,parallel=True, status_update_callback=no_status_updates) input_fname = splitext(split(self.sff_fp)[-1])[0] db_input_fp = join(self.wf_out,input_fname) # load the study analysis_id, input_dir, seq_run_id, split_lib_input_md5sum = \ submit_illumina_and_split_lib(data_access, ','.join(self.illumina_fps), self.study_id, self.wf_out) # load the OTU table data load_otu_mapping(data_access,self.wf_out,analysis_id) # load the split-library sequence data split_library_id=load_split_lib_sequences(data_access,input_dir, analysis_id, seq_run_id, split_lib_input_md5sum) ### TEST raw sequence data load # get expected results print 'Analysis ID is: %s' % str(analysis_id) print 'Testing the SEQ_RUN loading!' exp_fastq_md5=['6e3c114cec9bdc8708aaa9077fd71aa6','685cac31968b74c5d99b294ac29e9fd9'] exp_num_seqs=100 exp_instr_code='ILLUMINA' exp_fastq_fname=['s_8_2_sequence_100_records.txt','s_8_1_sequence_100_records.txt'] # define the query to pull data from DB con = data_access.getSFFDatabaseConnection() cur = con.cursor() seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum, h.instrument_code from analysis j inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id inner join sff_file f on f.sff_file_id=s.sff_file_id inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join sequencing_run h on h.seq_run_id=s.seq_run_id""" seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id)) results = cur.execute(seq_run_info) # get observed values for data in results: obs_seq_run_id,obs_fastq_filename,obs_num_of_reads,obs_fastq_md5,\ obs_instrument_code = data # check results self.assertTrue(obs_fastq_filename in exp_fastq_fname) self.assertEqual(obs_num_of_reads,exp_num_seqs) self.assertTrue(obs_fastq_md5 in exp_fastq_md5) self.assertEqual(obs_instrument_code,exp_instr_code) print 'Done testing SEQ_RUN!' # TEST split-library sequence data # get expected results print 'Testing Split-Library Data' exp_split_lib_seq='TACGAAGGGAGCTAGCGTTATTCGGAATGATTGGGTGTAAAGAGTTTGTAGATTGCAAAATTTTTGTTATTAGTAAAAAATTGAATTTATTATTTAAAGATGCTTTTAATACAATTTTGCTTGAGTATAGTAGAGGAAAAT' exp_split_lib_md5='700a9b08947589cfdd96525c97f9bcb4' exp_split_lib_seq_md5='7e8278ef1f5561d997cad48eabe40847' # define the query to pull data from DB split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum, s.sequence_string,s.md5_checksum from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id inner join split_library_run l on j.split_library_run_id=l.split_library_run_id""" split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id)) results = cur.execute(split_lib_info) # get observed values for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\ obs_split_lib_seq,obs_split_lib_seq_md5 = data # check results self.assertEqual(obs_split_lib_md5,exp_split_lib_md5) self.assertEqual(obs_split_lib_seq,exp_split_lib_seq) self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5) ### TEST OTU table load # get expected results print 'Testing OTU Data!' exp_otu_md5='6bc1d4693d57ddfa6abe9bd94103476d' exp_threshold=97 # define the query to pull data from DB otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id, ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file, p.threshold from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id""" otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'SSBH05July07\'" % (str(analysis_id)) results = cur.execute(otu_info) # get observed values for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\ obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\ obs_otu_md5,obs_threshold = data # check results self.assertEqual(obs_otu_md5,exp_otu_md5) self.assertEqual(obs_threshold,exp_threshold) ### TEST OTU failures load # define the query to pull data from DB otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id""" otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id)) results = cur.execute(otu_fail_info) # get observed values for data in results: obs_seq_run_id,obs_ssu_id= data # check results self.failIfEqual(obs_seq_run_id,0) self.failIfEqual(obs_ssu_id,0) # delete loaded study data valid=data_access.deleteTestAnalysis(True,analysis_id) if not valid: print "Error: Could not delete data from DB!"
def test_submit_processed_data_to_db(self): """run_process_sff_through_pick_otus runs without error""" self.files_to_remove.append(join(self.wf_out,'Fasting_subset.fna')) self.files_to_remove.append(join(self.wf_out,'Fasting_subset.qual')) self.files_to_remove.append(join(self.wf_out,'Fasting_subset.txt')) # remove generated mapping file moved_mapping_file=join(self.wf_out,split(self.fasting_mapping_fp)[-1]) self.files_to_remove.append(moved_mapping_file) # process the sequence data first before loading run_process_sff_through_split_lib(0,'Fasting_subset', sff_input_fp=self.sff_fp, mapping_fp=self.fasting_mapping_fp, output_dir=self.wf_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,convert_to_flx=False, write_to_all_fasta=False, status_update_callback=no_status_updates) # get the file basename input_file_basename = splitext(split(self.sff_fp)[1])[0] # get key filepaths otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt') split_lib_seqs_fp = join(self.wf_out,'split_libraries',\ 'seqs.fna') # run chained OTU-picking run_chain_pick_otus(split_lib_seqs_fp, output_dir=self.gg_out, command_handler=call_commands_serially, params=self.params, qiime_config=self.qiime_config,parallel=False, status_update_callback=no_status_updates) input_fname = splitext(split(self.sff_fp)[-1])[0] db_input_fp = join(self.wf_out,input_fname) # submit the data analysis_id, input_dir, seq_run_id, split_lib_input_md5sum = \ submit_sff_and_split_lib(data_access,db_input_fp+'.fna', self.study_id) # load OTU picking load_otu_mapping(data_access,self.wf_out,analysis_id) # load split-lib sequences split_library_id=load_split_lib_sequences(data_access,input_dir, analysis_id, seq_run_id, split_lib_input_md5sum) ### TEST raw sequence data load # expected results print 'Analysis ID is: %s' % str(analysis_id) print 'Testing the FLOW_DATA loading!' exp_sff_md5='314f4000857668d45a413d2e94a755fc' exp_num_seqs=22 exp_read_id='FLP3FBN01ELBSX' exp_instr_code='GS FLX' exp_sff_fname='Fasting_subset' # define the query to pull data from DB con = data_access.getSFFDatabaseConnection() cur = con.cursor() seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum, h.instrument_code from analysis j inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id inner join sff_file f on f.sff_file_id=s.sff_file_id inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join sequencing_run h on h.seq_run_id=s.seq_run_id""" seq_run_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id)) results = cur.execute(seq_run_info) # get observed values for data in results: obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\ obs_instrument_code = data # check results self.assertEqual(obs_sff_filename,exp_sff_fname) self.assertEqual(obs_num_of_reads,exp_num_seqs) self.assertEqual(obs_sff_md5,exp_sff_md5) self.assertEqual(obs_instrument_code,exp_instr_code) # TEST split-library data load # expected results print 'Testing Split-Library Data' exp_split_lib_seq='CTGGGCCGTGTCTCAGTCCCAATGTGGCCGTTTACCCTCTCAGGCCGGCTACGCATCATCGCCTTGGTGGGCCGTTACCTCACCAACTAGCTAATGCGCCGCAGGTCCATCCATGTTCACGCCTTGATGGGCGCTTTAATATACTGAGCATGCGCTCTGTATACCTATCCGGTTTTAGCTACCGTTTCCAGCAGTTATCCCGGACACATGGGCTAGG' exp_split_lib_md5='2c67e0acf745bef73e26c36f0b3bd00a' exp_split_lib_seq_md5='008918f7469f8e33d5dd6e01075d5194' # define the query to pull data from DB split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum, s.sequence_string,s.md5_checksum from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id inner join split_library_run l on j.split_library_run_id=l.split_library_run_id""" split_lib_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id)) results = cur.execute(split_lib_info) # get observed values for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\ obs_split_lib_seq,obs_split_lib_seq_md5 = data # check results self.assertEqual(obs_split_lib_md5,exp_split_lib_md5) self.assertEqual(obs_split_lib_seq,exp_split_lib_seq) self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5) # TEST OTU-table data load # expected results print 'Testing OTU Data!' exp_otu_md5='0b8edcf8a4275730001877496b41cf55' exp_threshold=97 # define the query to pull data from DB otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id, ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file, p.threshold from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id""" otu_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_2\'" % (str(analysis_id)) results = cur.execute(otu_info) # get observed values for data in results: obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\ obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\ obs_otu_md5,obs_threshold = data # check results self.assertEqual(obs_otu_md5,exp_otu_md5) self.assertEqual(obs_threshold,exp_threshold) # TEST OTU-failures data load # define the query to pull data from DB otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id from analysis j inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id""" otu_fail_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_14\'" % (str(analysis_id)) results = cur.execute(otu_fail_info) # get observed values for data in results: obs_seq_run_id,obs_ssu_id= data # check results self.failIfEqual(obs_seq_run_id,0) self.failIfEqual(obs_ssu_id,0) # delete the loaded data valid=data_access.deleteTestAnalysis(True,analysis_id) if not valid: print "Error: Could not delete data from DB!"