def chimera(runobj): chimera_cluster_ids = [] logger.debug("Starting Chimera Checker") # lets read the trim status file out here and keep those details out of the Chimera code idx_keys = get_keys(runobj) #new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"] # Open run STATUS File here. # open in append mode because we may start the run in the middle # say at the gast stage and don't want to over write. # if we re-run trimming we'll get two trim status reports runobj.run_status_file_h = open(runobj.run_status_file_name, "a") mychimera = Chimera(runobj) logger.debug("\nStarting DeNovo Chimera") c_den = mychimera.chimera_denovo() logger.debug("Ending DeNovo Chimera") if c_den[0] == 'SUCCESS': chimera_cluster_ids += c_den[2] # add a list to a list logger.debug("chimera_cluster_ids: "+' '.join(chimera_cluster_ids)) chimera_code='PASS' elif c_den[0] == 'NOREGION': chimera_code='NOREGION' elif c_den[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code='FAIL' logger.debug("Chimera DeNovo Code: "+chimera_code) logger.debug("\nStarting Reference Chimera") c_ref = mychimera.chimera_reference() if c_ref[0] == 'SUCCESS': chimera_cluster_ids += c_ref[2] chimera_code='PASS' elif c_ref[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_ref[0] == 'FAIL': chimera_code='FAIL' else: chimera_code='FAIL' #print chimera_cluster_ids runobj.chimera_status_file_h = open(runobj.chimera_status_file_name,"w") if chimera_code == 'PASS': if runobj.use_cluster: chimera_cluster_code = wait_for_cluster_to_finish(chimera_cluster_ids) if chimera_cluster_code[0] == 'SUCCESS': logger.info("Chimera checking finished successfully") runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n") runobj.run_status_file_h.write("CHIMERA SUCCESS\n") else: logger.info("3-Chimera checking Failed") runobj.chimera_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") runobj.run_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") sys.exit("3-Chimera checking Failed") else: chimera_cluster_code = ['SUCCESS','Not using cluster'] logger.info("Chimera checking finished without using cluster") runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n") runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n") elif chimera_code == 'NOREGION': logger.info("No regions found that need chimera checking") runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") elif chimera_code == 'FAIL': logger.info("1-Chimera checking Failed") runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n") runobj.run_status_file_h.write("1-CHIMERA ERROR: \n") sys.exit("1-Chimera Failed") else: logger.info("2-Chimera checking Failed") runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n") runobj.run_status_file_h.write("2-CHIMERA ERROR: \n") sys.exit("2-Chimera checking Failed") sleep(2) if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS': logger.info("Writing Chimeras to deleted files") mychimera.write_chimeras_to_deleted_file() # should also recreate fasta # then read chimera files and place (or replace) any chimeric read_id # into the deleted file. mymblutils = MBLPipelineFastaUtils(idx_keys, runobj) # write new cleaned files that remove chimera if apropriate # these are in fasta_mbl_pipeline.py # the cleaned file are renamed to the original name: # lane_key.unique.fa # lane_key.trimmed.fa # lane_key.names -- # lane_key.abund.fa -- this file is for the uclust chimera script # lane_key.deleted.txt -- no change in this file # THE ORDER IS IMPORTANT HERE: mymblutils.write_clean_fasta_file() mymblutils.write_clean_names_file() mymblutils.write_clean_uniques_file() mymblutils.write_clean_abundance_file()
def chimera(runobj): chimera_cluster_ids = [] logger.debug("Starting Chimera Checker") # lets read the trim status file out here and keep those details out of the Chimera code idx_keys = get_keys(runobj) #new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"] mychimera = Chimera(runobj) c_den = mychimera.chimera_denovo(idx_keys) if c_den[0] == 'SUCCESS': chimera_cluster_ids += c_den[2] chimera_code='PASS' elif c_den[0] == 'NOREGION': chimera_code='NOREGION' elif c_den[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code='FAIL' c_ref = mychimera.chimera_reference(idx_keys) if c_ref[0] == 'SUCCESS': chimera_cluster_ids += c_ref[2] chimera_code='PASS' elif c_ref[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_ref[0] == 'FAIL': chimera_code='FAIL' else: chimera_code='FAIL' #print chimera_cluster_ids runobj.chimera_status_file_h = open(runobj.chimera_status_file_name,"w") if chimera_code == 'PASS': chimera_cluster_code = wait_for_cluster_to_finish(chimera_cluster_ids) if chimera_cluster_code[0] == 'SUCCESS': logger.info("Chimera checking finished successfully") runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n") runobj.run_status_file_h.write("CHIMERA SUCCESS\n") else: logger.info("3-Chimera checking Failed") runobj.chimera_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") runobj.run_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") sys.exit("3-Chimera checking Failed") elif chimera_code == 'NOREGION': logger.info("No regions found that need chimera checking") runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") elif chimera_code == 'FAIL': logger.info("1-Chimera checking Failed") runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n") runobj.run_status_file_h.write("1-CHIMERA ERROR: \n") sys.exit("1-Chimera Failed") else: logger.info("2-Chimera checking Failed") runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n") runobj.run_status_file_h.write("2-CHIMERA ERROR: \n") sys.exit("2-Chimera checking Failed") sleep(2) if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS': mychimera.write_chimeras_to_deleted_file(idx_keys) # should also recreate fasta # then read chimera files and place (or replace) any chimeric read_id # into the deleted file. mymblutils = MBLPipelineFastaUtils(idx_keys, mychimera.outdir) # write new cleaned files that remove chimera if apropriate # these are in fasta_mbl_pipeline.py # the cleaned file are renamed to the original name: # lane_key.unique.fa # lane_key.trimmed.fa # lane_key.names -- # lane_key.abund.fa -- this file is for the uclust chimera script # lane_key.deleted.txt -- no change in this file # THE ORDER IS IMPORTANT HERE: mymblutils.write_clean_fasta_file() mymblutils.write_clean_names_file() mymblutils.write_clean_uniques_file() mymblutils.write_clean_abundance_file() # write keys file for each lane_key - same fields as db table? for easy writing # write primers file for each lane_key # Write new clean files to the database # rawseq table not used # trimseq # runkeys # primers # run primers mymblutils.write_clean_files_to_database()
def chimera(runobj): chimera_cluster_ids = [] logger.debug("Starting Chimera Checker") # lets read the trim status file out here and keep those details out of the Chimera code idx_keys = get_keys(runobj) # new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"] # Open run STATUS File here. # open in append mode because we may start the run in the middle # say at the gast stage and don't want to over write. # if we re-run trimming we'll get two trim status reports runobj.run_status_file_h = open(runobj.run_status_file_name, "a") mychimera = Chimera(runobj) logger.debug("\nStarting DeNovo Chimera") c_den = mychimera.chimera_denovo() logger.debug("Ending DeNovo Chimera") if c_den[0] == 'SUCCESS': chimera_cluster_ids += c_den[2] # add a list to a list logger.debug("chimera_cluster_ids: " + ' '.join(chimera_cluster_ids)) chimera_code = 'PASS' elif c_den[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_den[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code = 'FAIL' logger.debug("Chimera DeNovo Code: " + chimera_code) logger.debug("\nStarting Reference Chimera") c_ref = mychimera.chimera_reference() if c_ref[0] == 'SUCCESS': chimera_cluster_ids += c_ref[2] chimera_code = 'PASS' elif c_ref[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_ref[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code = 'FAIL' # logger.debug(chimera_cluster_ids) runobj.chimera_status_file_h = open(runobj.chimera_status_file_name, "w") if chimera_code == 'PASS': if runobj.use_cluster: chimera_cluster_code = wait_for_cluster_to_finish( chimera_cluster_ids) if chimera_cluster_code[0] == 'SUCCESS': logger.info("Chimera checking finished successfully") runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n") runobj.run_status_file_h.write("CHIMERA SUCCESS\n") else: logger.info("3-Chimera checking Failed") runobj.chimera_status_file_h.write( "3-CHIMERA ERROR: " + str(chimera_cluster_code[1]) + " " + str(chimera_cluster_code[2]) + "\n") runobj.run_status_file_h.write("3-CHIMERA ERROR: " + str(chimera_cluster_code[1]) + " " + str(chimera_cluster_code[2]) + "\n") sys.exit("3-Chimera checking Failed") else: chimera_cluster_code = ['SUCCESS', 'Not using cluster'] logger.info("Chimera checking finished without using cluster") runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n") runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n") elif chimera_code == 'NOREGION': logger.info("No regions found that need chimera checking") runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") elif chimera_code == 'FAIL': logger.info("1-Chimera checking Failed") runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n") runobj.run_status_file_h.write("1-CHIMERA ERROR: \n") sys.exit("1-Chimera Failed") else: logger.info("2-Chimera checking Failed") runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n") runobj.run_status_file_h.write("2-CHIMERA ERROR: \n") sys.exit("2-Chimera checking Failed") sleep(2) if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS': logger.info("Writing Chimeras to deleted files") mychimera.write_chimeras_to_deleted_file() # should also recreate fasta # then read chimera files and place (or replace) any chimeric read_id # into the deleted file. mymblutils = MBLPipelineFastaUtils(idx_keys, runobj) # write new cleaned files that remove chimera if apropriate # these are in fasta_mbl_pipeline.py # the cleaned file are renamed to the original name: # lane_key.unique.fa # lane_key.trimmed.fa # lane_key.names -- # lane_key.abund.fa -- this file is for the uclust chimera script # lane_key.deleted.txt -- no change in this file # THE ORDER IS IMPORTANT HERE: mymblutils.write_clean_fasta_file() mymblutils.write_clean_names_file() mymblutils.write_clean_uniques_file() mymblutils.write_clean_abundance_file()