def test_run_chain_pick_otus(self):
     """run_chain_pick_otus runs serially without error"""
     run_chain_pick_otus(self.fna_original_fp, self.wf_out,\
                 call_commands_serially, \
                 self.params, self.qiime_config, False,\
                 no_status_updates)
     
     #load the exact match OTUs and check if they are valid
     exact_otus_fp=join(self.wf_out,'pick_otus_exact',\
                        'test_otus.txt')
     obs_exact_otus=open(exact_otus_fp).read()
     
     self.assertEqual(obs_exact_otus,exp_exact_otus)
     
     #load the uclust_ref picked OTUs and check if they are valid
     uclust_ref_otus_fp=join(self.wf_out,'picked_otus_UCLUST_REF_97',
                         'leftover_otus.txt')
     obs_uclust_ref_otus=open(uclust_ref_otus_fp).read()
     
     self.assertEqual(obs_uclust_ref_otus,exp_uclust_ref_otus)
     
     #load the merged OTUs and check if they are valid
     all_otus_fp=uclust_ref_otus_fp=join(self.wf_out,
                                         'exact_uclust_ref_otus.txt')
     obs_all_otus=open(all_otus_fp).read()
     
     self.assertEqual(obs_all_otus,exp_all_otus)
     
     #load the sample failures and check if they are valid
     otus_failures_fp=join(self.wf_out, 'all_failures.txt')
     obs_otu_failures=open(otus_failures_fp).read()
     
     self.assertEqual(obs_otu_failures,exp_otu_failures)
     
     #load the otu table and check if they are valid
     otus_table_fp=join(self.wf_out, 'exact_uclust_ref_otu_table.txt')
     obs_otu_table=open(otus_table_fp).read()
     
     self.assertEqual(obs_otu_table,exp_otu_table)
     
     # Check that the log file is created and has size > 0
     log_fp = glob(join(self.wf_out,'log*.txt'))[0]
     self.assertTrue(getsize(log_fp) > 0)
    def test_submit_processed_data_to_db_fasta(self):
           """submit_processed_data_to_db_fasta runs without error"""

           run_process_fasta_through_split_lib(0,'Fasting_subset',\
            input_fp=','.join(self.fasta_fps),\
            mapping_fp=self.fasta_map_fp,\
            output_dir=self.wf_out, \
            command_handler=call_commands_serially,\
            params=self.params,\
            qiime_config=self.qiime_config,\
            write_to_all_fasta=False,\
            status_update_callback=no_status_updates)


           input_file_basename = splitext(split(self.sff_fp)[1])[0]
           otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt')
           split_lib_seqs_fp = join(self.wf_out,'split_libraries',\
                                    'seqs.fna')

           run_chain_pick_otus(split_lib_seqs_fp,
                               output_dir=self.gg_out,
                               command_handler=call_commands_serially,
                               params=self.params,
                               qiime_config=self.qiime_config,parallel=True,
                               status_update_callback=no_status_updates)

           input_fname = splitext(split(self.sff_fp)[-1])[0]
           db_input_fp = join(self.wf_out,input_fname)


           analysis_id=submit_fasta_and_split_lib(data_access,
                                                ','.join(self.fasta_fps),
                                                0, self.wf_out)

           load_otu_mapping(data_access,self.wf_out,analysis_id)

           print 'Analysis ID is: %s' % str(analysis_id)
           print 'Testing the SEQ_RUN loading!'
           exp_sff_md5=['412eee0be168a285415d9e4db3dbbf2f']
           exp_num_seqs=22

           exp_instr_code='FASTA'
           exp_sff_fname=['test_split_lib_seqs']
           con = data_access.getSFFDatabaseConnection()
           cur = con.cursor()
           seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum,
                 h.instrument_code
                 from analysis j
                 inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id
                 inner join sff_file f on f.sff_file_id=s.sff_file_id
                 inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
                 inner join sequencing_run h on h.seq_run_id=s.seq_run_id"""
           seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id))
           results = cur.execute(seq_run_info)

           #print 'Calling getTestFlowData...'
           for data in results:
               obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\
               obs_instrument_code = data

           print 'After getTestSeqRunData...'

           self.assertTrue(obs_sff_filename in exp_sff_fname)
           self.assertEqual(obs_num_of_reads,exp_num_seqs)
           self.assertTrue(obs_sff_md5 in exp_sff_md5)
           self.assertEqual(obs_instrument_code,exp_instr_code)

           print 'Done testing SEQ_RUN!'

           print 'Testing Split-Library Data'
           exp_split_lib_seq='TTGGGCCGTGTCTCAGTCCCAATGTGGCCGATCAGTCTCTTAACTCGGCTATGCATCATTGCCTTGGTAAGCCGTTACCTTACCAACTAGCTAATGCACCGCAGGTCCATCCAAGAGTGATAGCAGAACCATCTTTCAAACTCTAGACATGCGTCTAGTGTTGTTATCCGGTATTAGCATCTGTTTCCAGGTGTTATCCCAGTCTCTTGGG'
           exp_split_lib_md5='412eee0be168a285415d9e4db3dbbf2f'
           exp_split_lib_seq_md5='59843d3394983f2caa26f583014a3389'

           split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum,
                 s.sequence_string,s.md5_checksum
                 from analysis j
                 inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
                 inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id
                 inner join split_library_run l on j.split_library_run_id=l.split_library_run_id"""
           split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id))

           results = cur.execute(split_lib_info)

           #print 'Calling getTestFlowData...'
           for data in results:
               obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
               obs_split_lib_seq,obs_split_lib_seq_md5 = data

           self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
           self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
           self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)

           print 'Testing OTU Data!'

           #exp_prokmsa=97550
           exp_otu_md5='cec9b6c184ffdb12d9de4450034ab775'
           exp_threshold=97

           otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id,
               ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file,
               p.threshold
               from analysis j
               inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
               inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id
               inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id
               inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id"""
           otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx354.281526\'" % (str(analysis_id))

           results = cur.execute(otu_info)

           for data in results:
               obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
               obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
               obs_otu_md5,obs_threshold = data

           #self.assertEqual(obs_prokmsa,exp_prokmsa)
           self.assertEqual(obs_otu_md5,exp_otu_md5)
           self.assertEqual(obs_threshold,exp_threshold)

           otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id
                 from analysis j
                 inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
                 inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id"""
           otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'test.PCx635.281531\'" % (str(analysis_id))

           results = cur.execute(otu_fail_info)

           for data in results:
               obs_seq_run_id,obs_ssu_id= data


           self.failIfEqual(obs_seq_run_id,0)
           self.failIfEqual(obs_ssu_id,0)

           valid=data_access.deleteTestAnalysis(True,analysis_id)
           if not valid:
               print "Error: Could not delete data from DB!"
    def test_submit_processed_data_to_db_illumina(self):
        """run_process_illumina_through_pick_otus runs without error"""
        
        run_process_illumina_through_split_lib(0,'Fasting_subset',\
         input_fp=','.join(self.illumina_fps),\
         mapping_fp=self.illumina_map_fp,\
         output_dir=self.wf_out, \
         command_handler=call_commands_serially,\
         params=self.params,\
         qiime_config=self.qiime_config,\
         write_to_all_fasta=False,\
         status_update_callback=no_status_updates)
        
        
        input_file_basename = splitext(split(self.sff_fp)[1])[0]
        otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt')
        split_lib_seqs_fp = join(self.wf_out,'split_libraries',\
                                 'seqs.fna')
                                 
        run_chain_pick_otus(split_lib_seqs_fp,
                            output_dir=self.gg_out,
                            command_handler=call_commands_serially,
                            params=self.params,
                            qiime_config=self.qiime_config,parallel=True,
                            status_update_callback=no_status_updates)
                                 
        input_fname = splitext(split(self.sff_fp)[-1])[0]
        db_input_fp = join(self.wf_out,input_fname)
        
        
        analysis_id=submit_illumina_and_split_lib(data_access,
                                                  ','.join(self.illumina_fps),
                                                  0,
                                                  self.wf_out)
        
        load_otu_mapping(data_access,self.wf_out,analysis_id)
        
        print 'Analysis ID is: %s' % str(analysis_id)
        print 'Testing the SEQ_RUN loading!'
        exp_sff_md5=['2b14442f7df4d06ac1e241816bf3ce4a','53181ca3427e5b4ce28a6b13cb3b98dd']
        exp_num_seqs=100
       
        exp_instr_code='ILLUMINA'
        exp_sff_fname=['s_8_2_sequence_100_records','s_8_1_sequence_100_records']
        con = data_access.getSFFDatabaseConnection()
        cur = con.cursor()
        seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum,
              h.instrument_code
              from analysis j
              inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id
              inner join sff_file f on f.sff_file_id=s.sff_file_id
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join sequencing_run h on h.seq_run_id=s.seq_run_id"""
        seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id))
        results = cur.execute(seq_run_info)
        
        #print 'Calling getTestFlowData...'
        for data in results:
            obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\
            obs_instrument_code = data
            
        print 'After getTestSeqRunData...'
        
        self.assertTrue(obs_sff_filename in exp_sff_fname)
        self.assertEqual(obs_num_of_reads,exp_num_seqs)
        self.assertTrue(obs_sff_md5 in exp_sff_md5)
        self.assertEqual(obs_instrument_code,exp_instr_code)
        
        print 'Done testing SEQ_RUN!'
        
        print 'Testing Split-Library Data'
        exp_split_lib_seq='TACGAAGGGAGCTAGCGTTATTCGGAATGATTGGGTGTAAAGAGTTTGTAGATTGCAAAATTTTTGTTATTAGTAAAAAATTGAATTTATTATTTAAAGATGCTTTTAATACAATTTTGCTTGAGTATAGTAGAGGAAAAT'
        exp_split_lib_md5='1443e25614090e660b003c5774ed4cba'
        exp_split_lib_seq_md5='7e8278ef1f5561d997cad48eabe40847'

        split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum,
              s.sequence_string,s.md5_checksum
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
              inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id
              inner join split_library_run l on j.split_library_run_id=l.split_library_run_id"""
        split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id))
    
        results = cur.execute(split_lib_info)
        
        #print 'Calling getTestFlowData...'
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
            obs_split_lib_seq,obs_split_lib_seq_md5 = data
                                                            
        self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
        self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
        self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)
        
        print 'Testing OTU Data!'
        
        #exp_prokmsa=97550
        exp_otu_md5='56222e11026575d9850009768c0b8885'
        exp_threshold=97
        
        otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id,
            ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file,
            p.threshold
            from analysis j
            inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
            inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id
            inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id
            inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id"""
        otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'SSBH05July07\'" % (str(analysis_id))
    
        results = cur.execute(otu_info)
        
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
            obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
            obs_otu_md5,obs_threshold = data
        
        #self.assertEqual(obs_prokmsa,exp_prokmsa)
        self.assertEqual(obs_otu_md5,exp_otu_md5)
        self.assertEqual(obs_threshold,exp_threshold)
        
        otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id"""
        otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id))
    
        results = cur.execute(otu_fail_info)
        
        for data in results:
            obs_seq_run_id,obs_ssu_id= data
        
        
        self.failIfEqual(obs_seq_run_id,0)
        self.failIfEqual(obs_ssu_id,0)

        valid=data_access.deleteTestAnalysis(True,analysis_id)
        if not valid:
            print "Error: Could not delete data from DB!"
    def test_submit_processed_data_to_db(self):
        """run_process_sff_through_pick_otus runs without error"""
        
        run_process_sff_through_split_lib(0,'Fasting_subset',
          sff_input_fp=self.sff_fp, 
          mapping_fp=self.fasting_mapping_fp,
          output_dir=self.wf_out,
          command_handler=call_commands_serially,
          params=self.params,
          qiime_config=self.qiime_config,convert_to_flx=False,
          write_to_all_fasta=False,
          status_update_callback=no_status_updates)
        
        input_file_basename = splitext(split(self.sff_fp)[1])[0]
        otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt')
        split_lib_seqs_fp = join(self.wf_out,'split_libraries',\
                                 'seqs.fna')
                                 
        run_chain_pick_otus(split_lib_seqs_fp,
                            output_dir=self.gg_out,
                            command_handler=call_commands_serially,
                            params=self.params,
                            qiime_config=self.qiime_config,parallel=False,
                            status_update_callback=no_status_updates)
                                 
        input_fname = splitext(split(self.sff_fp)[-1])[0]
        db_input_fp = join(self.wf_out,input_fname)

        analysis_id=submit_sff_and_split_lib(data_access,\
                                                db_input_fp+'.fna',0)
        load_otu_mapping(data_access,self.wf_out,analysis_id)
                                                
        print 'Analysis ID is: %s' % str(analysis_id)
        print 'Testing the FLOW_DATA loading!'
        exp_sff_md5='314f4000857668d45a413d2e94a755fc'
        exp_num_seqs=22
        exp_read_id='FLP3FBN01ELBSX'
        exp_instr_code='GS FLX'
        exp_sff_fname='Fasting_subset'
       
       
        con = data_access.getSFFDatabaseConnection()
        cur = con.cursor()
        seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum,
              h.instrument_code
              from analysis j
              inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id
              inner join sff_file f on f.sff_file_id=s.sff_file_id
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join sequencing_run h on h.seq_run_id=s.seq_run_id"""
        seq_run_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id))
        results = cur.execute(seq_run_info)
        
        #print 'Calling getTestFlowData...'
        for data in results:
            obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\
            obs_instrument_code = data
        
        self.assertEqual(obs_sff_filename,exp_sff_fname)
        self.assertEqual(obs_num_of_reads,exp_num_seqs)
        self.assertEqual(obs_sff_md5,exp_sff_md5)
        self.assertEqual(obs_instrument_code,exp_instr_code)
        
        '''
        print 'After getTestFlowData...'
        #print 'Calling getTestFlowData...' 
        obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\
        obs_instrument_code,obs_read_id,obs_read_seq,obs_flow_string,\
        obs_qual_string = data_access.getTestFlowData(True,analysis_id,
                                                            'test.PCx634_1')

        #print 'After getTestFlowData...'                                                   
        self.assertEqual(obs_sff_filename,exp_sff_fname)    
        self.assertEqual(obs_num_of_reads,exp_num_seqs)            
        self.assertEqual(obs_sff_md5,exp_sff_md5)
        self.assertEqual(obs_instrument_code,exp_instr_code)
        self.assertEqual(obs_read_id,exp_read_id)
        self.assertEqual(obs_read_seq,exp_read_seq)
        self.assertEqual(str(obs_flow_string),exp_flow_string)
        self.assertEqual(str(obs_qual_string),exp_qual_string)
        
        print 'Done testing Flow_Data!'
        
        '''
        
        print 'Testing Split-Library Data'
        exp_split_lib_seq='CTGGGCCGTGTCTCAGTCCCAATGTGGCCGTTTACCCTCTCAGGCCGGCTACGCATCATCGCCTTGGTGGGCCGTTACCTCACCAACTAGCTAATGCGCCGCAGGTCCATCCATGTTCACGCCTTGATGGGCGCTTTAATATACTGAGCATGCGCTCTGTATACCTATCCGGTTTTAGCTACCGTTTCCAGCAGTTATCCCGGACACATGGGCTAGG'
        exp_split_lib_md5='2c67e0acf745bef73e26c36f0b3bd00a'
        exp_split_lib_seq_md5='008918f7469f8e33d5dd6e01075d5194'

        
        split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum,
              s.sequence_string,s.md5_checksum
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
              inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id
              inner join split_library_run l on j.split_library_run_id=l.split_library_run_id"""
        split_lib_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id))
    
        results = cur.execute(split_lib_info)
        
        #print 'Calling getTestFlowData...'
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
            obs_split_lib_seq,obs_split_lib_seq_md5 = data
                                                            
        self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
        self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
        self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)
        
        '''
        obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
        obs_split_lib_seq,obs_split_lib_seq_md5 = \
                    data_access.getTestSplitLibData(True,analysis_id,
                                                            'test.PCx634_1')
                                                            
        self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
        self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
        self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)
        '''
        print 'Testing OTU Data!'
        
        #exp_prokmsa=83669
        exp_otu_md5='0b8edcf8a4275730001877496b41cf55'
        exp_threshold=97
        
        otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id,
            ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file,
            p.threshold
            from analysis j
            inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
            inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id
            inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id
            inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id"""
        otu_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_2\'" % (str(analysis_id))
    
        results = cur.execute(otu_info)
        
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
            obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
            obs_otu_md5,obs_threshold = data
        
        '''
        obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
        obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
        obs_otu_md5,obs_threshold = \
                    data_access.getTestOTUData(True,analysis_id,
                                                            'test.PCx634_2')
        '''
        #self.assertEqual(obs_prokmsa,exp_prokmsa)
        self.assertEqual(obs_otu_md5,exp_otu_md5)
        self.assertEqual(obs_threshold,exp_threshold)
        
        otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id"""
        otu_fail_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_14\'" % (str(analysis_id))
    
        results = cur.execute(otu_fail_info)
        
        for data in results:
            obs_seq_run_id,obs_ssu_id= data
        
        '''
        obs_seq_run_id,obs_ssu_id = \
                    data_access.getTestOTUFailureData(True,analysis_id,
                                                            'test.PCx634_14')
        '''
        
        self.failIfEqual(obs_seq_run_id,0)
        self.failIfEqual(obs_ssu_id,0)
        
        valid=data_access.deleteTestAnalysis(True,analysis_id)
        if not valid:
            print "Error: Could not delete data from DB!"
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    fasta_file = opts.split_lib_seqs
    verbose = opts.verbose
    print_only = opts.print_only
    parallel = opts.parallel
    output_dir=opts.output_dir
    if output_dir:
        if exists(output_dir):
            dir_path=output_dir
        else:
            try:
                mkdir(output_dir)
                dir_path=output_dir
            except OSError:
                pass
    else:
        dir_path='./'
        
    if parallel: 
        raise_error_on_parallel_unavailable()

    try:
       parameter_f = open(opts.parameter_fp)
    except IOError:
        raise IOError,\
        "Can't open parameters file (%s). Does it exist? Do you have read access?"\
        % opts.parameter_fp

    try:
        makedirs(dir_path)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            print "Output directory already exists. Please choose "+\
            "a different directory, or force overwrite with -f."
            exit(1)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = web_app_call_commands_serially

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    new_output_dir=join(dir_path,'chain_picked_otus')
    create_dir(new_output_dir)
    
    run_chain_pick_otus(fasta_file=fasta_file,\
     output_dir=new_output_dir,\
     command_handler=command_handler,\
     params=parse_qiime_parameters(parameter_f),\
     qiime_config=qiime_config,\
     parallel=parallel,\
     status_update_callback=status_update_callback)
    def test_submit_processed_data_to_db_illumina(self):
        """run_process_illumina_through_pick_otus runs without error"""
        
        self.files_to_remove.append(join(self.wf_out,'s8_map_incomplete.txt'))
        
        # process the sequence data first before loading
        run_process_illumina_through_split_lib(0,'Fasting_subset',\
         input_fp=','.join(self.illumina_fps),\
         mapping_fp=self.illumina_map_fp,\
         output_dir=self.wf_out, \
         command_handler=call_commands_serially,\
         params=self.params,\
         qiime_config=self.qiime_config,\
         write_to_all_fasta=False,\
         status_update_callback=no_status_updates)
        
        # get the filepaths of key files
        input_file_basename = splitext(split(self.sff_fp)[1])[0]
        otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt')
        split_lib_seqs_fp = join(self.wf_out,'split_libraries',\
                                 'seqs.fna')
        
        # run chained OTU-picking
        run_chain_pick_otus(split_lib_seqs_fp,
                            output_dir=self.gg_out,
                            command_handler=call_commands_serially,
                            params=self.params,
                            qiime_config=self.qiime_config,parallel=True,
                            status_update_callback=no_status_updates)
                                 
        input_fname = splitext(split(self.sff_fp)[-1])[0]
        db_input_fp = join(self.wf_out,input_fname)
        
        # load the study
        analysis_id, input_dir, seq_run_id, split_lib_input_md5sum = \
            submit_illumina_and_split_lib(data_access,
                                          ','.join(self.illumina_fps),
                                          self.study_id, self.wf_out)
        
        # load the OTU table data
        load_otu_mapping(data_access,self.wf_out,analysis_id)

        # load the split-library sequence data
        split_library_id=load_split_lib_sequences(data_access,input_dir,
                                              analysis_id, seq_run_id,
                                              split_lib_input_md5sum)
        
        ### TEST raw sequence data load
        # get expected results 
        print 'Analysis ID is: %s' % str(analysis_id)
        print 'Testing the SEQ_RUN loading!'
        exp_fastq_md5=['6e3c114cec9bdc8708aaa9077fd71aa6','685cac31968b74c5d99b294ac29e9fd9']
        exp_num_seqs=100
        exp_instr_code='ILLUMINA'
        exp_fastq_fname=['s_8_2_sequence_100_records.txt','s_8_1_sequence_100_records.txt']
        
        # define the query to pull data from DB
        con = data_access.getSFFDatabaseConnection()
        cur = con.cursor()
        seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum,
              h.instrument_code
              from analysis j
              inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id
              inner join sff_file f on f.sff_file_id=s.sff_file_id
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join sequencing_run h on h.seq_run_id=s.seq_run_id"""
        seq_run_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id))
        results = cur.execute(seq_run_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_fastq_filename,obs_num_of_reads,obs_fastq_md5,\
            obs_instrument_code = data
            
        # check results
        self.assertTrue(obs_fastq_filename in exp_fastq_fname)
        self.assertEqual(obs_num_of_reads,exp_num_seqs)
        self.assertTrue(obs_fastq_md5 in exp_fastq_md5)
        self.assertEqual(obs_instrument_code,exp_instr_code)
        
        print 'Done testing SEQ_RUN!'
        
        # TEST split-library sequence data
        # get expected results 
        print 'Testing Split-Library Data'
        exp_split_lib_seq='TACGAAGGGAGCTAGCGTTATTCGGAATGATTGGGTGTAAAGAGTTTGTAGATTGCAAAATTTTTGTTATTAGTAAAAAATTGAATTTATTATTTAAAGATGCTTTTAATACAATTTTGCTTGAGTATAGTAGAGGAAAAT'
        exp_split_lib_md5='700a9b08947589cfdd96525c97f9bcb4'
        exp_split_lib_seq_md5='7e8278ef1f5561d997cad48eabe40847'
        
        # define the query to pull data from DB
        split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum,
              s.sequence_string,s.md5_checksum
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
              inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id
              inner join split_library_run l on j.split_library_run_id=l.split_library_run_id"""
        split_lib_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id))
        results = cur.execute(split_lib_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
            obs_split_lib_seq,obs_split_lib_seq_md5 = data
        
        # check results                                                    
        self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
        self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
        self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)
        
        ### TEST OTU table load
        # get expected results
        print 'Testing OTU Data!'
        exp_otu_md5='6bc1d4693d57ddfa6abe9bd94103476d'
        exp_threshold=97
        
        # define the query to pull data from DB
        otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id,
            ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file,
            p.threshold
            from analysis j
            inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
            inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id
            inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id
            inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id"""
        otu_info+=" where j.analysis_id=%s and slrm.sample_name=\'SSBH05July07\'" % (str(analysis_id))
        results = cur.execute(otu_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
            obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
            obs_otu_md5,obs_threshold = data
        
        # check results 
        self.assertEqual(obs_otu_md5,exp_otu_md5)
        self.assertEqual(obs_threshold,exp_threshold)
        
        ### TEST OTU failures load
        # define the query to pull data from DB
        otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id"""
        otu_fail_info+=" where j.analysis_id=%s and slrm.sample_name=\'HKE08Aug07\'" % (str(analysis_id))
        results = cur.execute(otu_fail_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_ssu_id= data
        
        # check results 
        self.failIfEqual(obs_seq_run_id,0)
        self.failIfEqual(obs_ssu_id,0)
        
        # delete loaded study data
        valid=data_access.deleteTestAnalysis(True,analysis_id)
        if not valid:
            print "Error: Could not delete data from DB!"
    def test_submit_processed_data_to_db(self):
        """run_process_sff_through_pick_otus runs without error"""
        
        self.files_to_remove.append(join(self.wf_out,'Fasting_subset.fna'))
        self.files_to_remove.append(join(self.wf_out,'Fasting_subset.qual'))
        self.files_to_remove.append(join(self.wf_out,'Fasting_subset.txt'))
        
        # remove generated mapping file
        moved_mapping_file=join(self.wf_out,split(self.fasting_mapping_fp)[-1])
        self.files_to_remove.append(moved_mapping_file)
        
        # process the sequence data first before loading
        run_process_sff_through_split_lib(0,'Fasting_subset',
          sff_input_fp=self.sff_fp, 
          mapping_fp=self.fasting_mapping_fp,
          output_dir=self.wf_out,
          command_handler=call_commands_serially,
          params=self.params,
          qiime_config=self.qiime_config,convert_to_flx=False,
          write_to_all_fasta=False,
          status_update_callback=no_status_updates)
        
        # get the file basename
        input_file_basename = splitext(split(self.sff_fp)[1])[0]
        
        # get key filepaths
        otu_fp = join(self.wf_out,'picked_otus','seqs_otus.txt')
        split_lib_seqs_fp = join(self.wf_out,'split_libraries',\
                                 'seqs.fna')
        
        # run chained OTU-picking
        run_chain_pick_otus(split_lib_seqs_fp,
                            output_dir=self.gg_out,
                            command_handler=call_commands_serially,
                            params=self.params,
                            qiime_config=self.qiime_config,parallel=False,
                            status_update_callback=no_status_updates)
                                 
        input_fname = splitext(split(self.sff_fp)[-1])[0]
        db_input_fp = join(self.wf_out,input_fname)

        # submit the data
        analysis_id, input_dir, seq_run_id, split_lib_input_md5sum = \
            submit_sff_and_split_lib(data_access,db_input_fp+'.fna',
                                     self.study_id)
        # load OTU picking
        load_otu_mapping(data_access,self.wf_out,analysis_id)
        
        # load split-lib sequences
        split_library_id=load_split_lib_sequences(data_access,input_dir,
                                                analysis_id, seq_run_id,
                                                split_lib_input_md5sum)
        
        ### TEST raw sequence data load
        # expected results
        print 'Analysis ID is: %s' % str(analysis_id)
        print 'Testing the FLOW_DATA loading!'
        exp_sff_md5='314f4000857668d45a413d2e94a755fc'
        exp_num_seqs=22
        exp_read_id='FLP3FBN01ELBSX'
        exp_instr_code='GS FLX'
        exp_sff_fname='Fasting_subset'
        
        # define the query to pull data from DB
        con = data_access.getSFFDatabaseConnection()
        cur = con.cursor()
        seq_run_info="""select j.seq_run_id,f.sff_filename,f.number_of_reads,f.md5_checksum,
              h.instrument_code
              from analysis j
              inner join seq_run_to_sff_file s on j.seq_run_id=s.seq_run_id
              inner join sff_file f on f.sff_file_id=s.sff_file_id
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join sequencing_run h on h.seq_run_id=s.seq_run_id"""
        seq_run_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id))
        results = cur.execute(seq_run_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_sff_filename,obs_num_of_reads,obs_sff_md5,\
            obs_instrument_code = data
        
        # check results
        self.assertEqual(obs_sff_filename,exp_sff_fname)
        self.assertEqual(obs_num_of_reads,exp_num_seqs)
        self.assertEqual(obs_sff_md5,exp_sff_md5)
        self.assertEqual(obs_instrument_code,exp_instr_code)
        
        # TEST split-library data load
        # expected results
        print 'Testing Split-Library Data'
        exp_split_lib_seq='CTGGGCCGTGTCTCAGTCCCAATGTGGCCGTTTACCCTCTCAGGCCGGCTACGCATCATCGCCTTGGTGGGCCGTTACCTCACCAACTAGCTAATGCGCCGCAGGTCCATCCATGTTCACGCCTTGATGGGCGCTTTAATATACTGAGCATGCGCTCTGTATACCTATCCGGTTTTAGCTACCGTTTCCAGCAGTTATCCCGGACACATGGGCTAGG'
        exp_split_lib_md5='2c67e0acf745bef73e26c36f0b3bd00a'
        exp_split_lib_seq_md5='008918f7469f8e33d5dd6e01075d5194'

        # define the query to pull data from DB
        split_lib_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,l.command,l.md5_checksum,
              s.sequence_string,s.md5_checksum
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
              inner join ssu_sequence s on slrm.ssu_sequence_id=s.ssu_sequence_id
              inner join split_library_run l on j.split_library_run_id=l.split_library_run_id"""
        split_lib_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_1\'" % (str(analysis_id))
        results = cur.execute(split_lib_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_split_lib_cmd,obs_split_lib_md5,\
            obs_split_lib_seq,obs_split_lib_seq_md5 = data
        
        # check results                                            
        self.assertEqual(obs_split_lib_md5,exp_split_lib_md5)
        self.assertEqual(obs_split_lib_seq,exp_split_lib_seq)
        self.assertEqual(obs_split_lib_seq_md5,exp_split_lib_seq_md5)
        
        # TEST OTU-table data load
        # expected results
        print 'Testing OTU Data!'
        exp_otu_md5='0b8edcf8a4275730001877496b41cf55'
        exp_threshold=97
        
        # define the query to pull data from DB
        otu_info="""select distinct j.seq_run_id,slrm.ssu_sequence_id,ot.reference_id,gr.ssu_sequence_id,
            ot.reference_id,j.otu_picking_run_id,p.command,p.md5_sum_input_file,
            p.threshold
            from analysis j
            inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id and j.split_library_run_id=slrm.split_library_run_id
            inner join otu_table ot on j.otu_run_set_id=ot.otu_run_set_id
            inner join gg_plus_denovo_reference gr on ot.reference_id=gr.reference_id
            inner join otu_picking_run p on j.otu_picking_run_id=p.otu_picking_run_id"""
        otu_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_2\'" % (str(analysis_id))
        results = cur.execute(otu_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_ssu_seq_id,obs_otu_id,obs_otu_ssu_id,\
            obs_prokmsa,obs_otu_picking_run_id,obs_pick_otu_cmd,\
            obs_otu_md5,obs_threshold = data
        
        # check results  
        self.assertEqual(obs_otu_md5,exp_otu_md5)
        self.assertEqual(obs_threshold,exp_threshold)
        
        # TEST OTU-failures data load
        # define the query to pull data from DB
        otu_fail_info="""select distinct j.seq_run_id,f.ssu_sequence_id
              from analysis j
              inner join split_library_read_map slrm on j.seq_run_id=slrm.seq_run_id
              inner join otu_picking_failures f on slrm.ssu_sequence_id=f.ssu_sequence_id"""
        otu_fail_info+=" where j.analysis_id=%s and slrm.sequence_name=\'test.PCx634_14\'" % (str(analysis_id))
    
        results = cur.execute(otu_fail_info)
        
        # get observed values
        for data in results:
            obs_seq_run_id,obs_ssu_id= data
        
        # check results  
        self.failIfEqual(obs_seq_run_id,0)
        self.failIfEqual(obs_ssu_id,0)
        
        # delete the loaded data
        valid=data_access.deleteTestAnalysis(True,analysis_id)
        if not valid:
            print "Error: Could not delete data from DB!"