def illumina_chimera_after_cluster(runobj): mychimera = Chimera(runobj) mychimera.illumina_rm_size_files() start = time.time() mychimera.illumina_size_to_freq_in_chimer() elapsed = (time.time() - start) logger.debug("illumina_size_to_freq_in_chimer time: %s" % elapsed) # start = time.time() # logger.debug("Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo") # mychimera.check_chimeric_stats() # elapsed = (time.time() - start) # logger.debug("check_chimeric_stats time: %s" % elapsed) start = time.time() logger.debug("Creating nonchimeric files in %s" % mychimera.indir) mychimera.move_out_chimeric() elapsed = (time.time() - start) logger.debug("move_out_chimeric time: %s" % elapsed) logger.debug("illumina_chimera_after_cluster time = %s" % str(elapsed))
def illumina_chimera_after_cluster(runobj): mychimera = Chimera(runobj) mychimera.illumina_rm_size_files() start = time.time() mychimera.illumina_size_to_freq_in_chimer() elapsed = (time.time() - start) print "illumina_size_to_freq_in_chimer time: %s" % elapsed # start = time.time() # print "Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo" # mychimera.check_chimeric_stats() # elapsed = (time.time() - start) # print "check_chimeric_stats time: %s" % elapsed start = time.time() print "Creating nonchimeric files in %s" % mychimera.indir mychimera.move_out_chimeric() elapsed = (time.time() - start) print "move_out_chimeric time: %s" % elapsed print "illumina_chimera_after_cluster time = %s" % str(elapsed)
def chimera(runobj): chimera_cluster_ids = [] logger.debug("Starting Chimera Checker") # lets read the trim status file out here and keep those details out of the Chimera code idx_keys = get_keys(runobj) #new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"] mychimera = Chimera(runobj) c_den = mychimera.chimera_denovo(idx_keys) if c_den[0] == 'SUCCESS': chimera_cluster_ids += c_den[2] chimera_code='PASS' elif c_den[0] == 'NOREGION': chimera_code='NOREGION' elif c_den[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code='FAIL' c_ref = mychimera.chimera_reference(idx_keys) if c_ref[0] == 'SUCCESS': chimera_cluster_ids += c_ref[2] chimera_code='PASS' elif c_ref[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_ref[0] == 'FAIL': chimera_code='FAIL' else: chimera_code='FAIL' #print chimera_cluster_ids runobj.chimera_status_file_h = open(runobj.chimera_status_file_name,"w") if chimera_code == 'PASS': chimera_cluster_code = wait_for_cluster_to_finish(chimera_cluster_ids) if chimera_cluster_code[0] == 'SUCCESS': logger.info("Chimera checking finished successfully") runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n") runobj.run_status_file_h.write("CHIMERA SUCCESS\n") else: logger.info("3-Chimera checking Failed") runobj.chimera_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") runobj.run_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") sys.exit("3-Chimera checking Failed") elif chimera_code == 'NOREGION': logger.info("No regions found that need chimera checking") runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") elif chimera_code == 'FAIL': logger.info("1-Chimera checking Failed") runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n") runobj.run_status_file_h.write("1-CHIMERA ERROR: \n") sys.exit("1-Chimera Failed") else: logger.info("2-Chimera checking Failed") runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n") runobj.run_status_file_h.write("2-CHIMERA ERROR: \n") sys.exit("2-Chimera checking Failed") sleep(2) if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS': mychimera.write_chimeras_to_deleted_file(idx_keys) # should also recreate fasta # then read chimera files and place (or replace) any chimeric read_id # into the deleted file. mymblutils = MBLPipelineFastaUtils(idx_keys, mychimera.outdir) # write new cleaned files that remove chimera if apropriate # these are in fasta_mbl_pipeline.py # the cleaned file are renamed to the original name: # lane_key.unique.fa # lane_key.trimmed.fa # lane_key.names -- # lane_key.abund.fa -- this file is for the uclust chimera script # lane_key.deleted.txt -- no change in this file # THE ORDER IS IMPORTANT HERE: mymblutils.write_clean_fasta_file() mymblutils.write_clean_names_file() mymblutils.write_clean_uniques_file() mymblutils.write_clean_abundance_file() # write keys file for each lane_key - same fields as db table? for easy writing # write primers file for each lane_key # Write new clean files to the database # rawseq table not used # trimseq # runkeys # primers # run primers mymblutils.write_clean_files_to_database()
def illumina_chimera(runobj): utils = PipelneUtils() start = time.time() mychimera = Chimera(runobj) # elapsed = (time.time() - start) # print elapsed print "Preparing input files (replacing \"frequency:\" with \";size=\" and capitalize reads)" # start = time.time() # mychimera.illumina_freq_to_size_in_chg() # elapsed = (time.time() - start) # print "1a) illumina_freq_to_size_in_chg time: %s" % elapsed start = time.time() mychimera.call_illumina_sed("from_frequency_to_size") elapsed = (time.time() - start) print "call_illumina_sed from_frequency_to_size time: %s" % elapsed # print "START chimera checking" # c_den = mychimera.chimera_checking("denovo") # # print "c_den - check denovo res: %s" % c_den # print c_den # c_den = mychimera.chimera_checking("ref") # print c_den # todo: use run_until_done_on_cluster from utils """run after cluster is done with it work:""" start = time.time() time_before = utils.get_time_now() print "time_before = %s" % time_before print "Waiting for the cluster..." while True: if utils.is_local(): sleep(1) else: sleep(120) cluster_done = mychimera.check_if_cluster_is_done(time_before) print "cluster_done = %s" % cluster_done if (cluster_done): break elapsed = (time.time() - start) print "Cluster is done with both chimera checkings in: %s" % elapsed mychimera.check_if_chimera_dir_empty() mychimera.illumina_rm_size_files() # start = time.time() # mychimera.illumina_size_to_freq_in_chimer() # elapsed = (time.time() - start) # print "2a) illumina_size_to_freq_in_chimer time: %s" % elapsed start = time.time() mychimera.call_illumina_sed("from_size_to_frequency") elapsed = (time.time() - start) print "call_illumina_sed from_size_to_frequency time: %s" % elapsed # start = time.time() # print "Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo" # mychimera.check_chimeric_stats() # elapsed = (time.time() - start) # print "check_chimeric_stats time: %s" % elapsed start = time.time() print "Creating nonchimeric files in %s" % mychimera.indir mychimera.move_out_chimeric() elapsed = (time.time() - start) print "move_out_chimeric time: %s" % elapsed
def chimera(runobj): chimera_cluster_ids = [] logger.debug("Starting Chimera Checker") # lets read the trim status file out here and keep those details out of the Chimera code idx_keys = get_keys(runobj) #new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"] # Open run STATUS File here. # open in append mode because we may start the run in the middle # say at the gast stage and don't want to over write. # if we re-run trimming we'll get two trim status reports runobj.run_status_file_h = open(runobj.run_status_file_name, "a") mychimera = Chimera(runobj) logger.debug("\nStarting DeNovo Chimera") c_den = mychimera.chimera_denovo() logger.debug("Ending DeNovo Chimera") if c_den[0] == 'SUCCESS': chimera_cluster_ids += c_den[2] # add a list to a list logger.debug("chimera_cluster_ids: "+' '.join(chimera_cluster_ids)) chimera_code='PASS' elif c_den[0] == 'NOREGION': chimera_code='NOREGION' elif c_den[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code='FAIL' logger.debug("Chimera DeNovo Code: "+chimera_code) logger.debug("\nStarting Reference Chimera") c_ref = mychimera.chimera_reference() if c_ref[0] == 'SUCCESS': chimera_cluster_ids += c_ref[2] chimera_code='PASS' elif c_ref[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_ref[0] == 'FAIL': chimera_code='FAIL' else: chimera_code='FAIL' #print chimera_cluster_ids runobj.chimera_status_file_h = open(runobj.chimera_status_file_name,"w") if chimera_code == 'PASS': if runobj.use_cluster: chimera_cluster_code = wait_for_cluster_to_finish(chimera_cluster_ids) if chimera_cluster_code[0] == 'SUCCESS': logger.info("Chimera checking finished successfully") runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n") runobj.run_status_file_h.write("CHIMERA SUCCESS\n") else: logger.info("3-Chimera checking Failed") runobj.chimera_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") runobj.run_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n") sys.exit("3-Chimera checking Failed") else: chimera_cluster_code = ['SUCCESS','Not using cluster'] logger.info("Chimera checking finished without using cluster") runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n") runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n") elif chimera_code == 'NOREGION': logger.info("No regions found that need chimera checking") runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") elif chimera_code == 'FAIL': logger.info("1-Chimera checking Failed") runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n") runobj.run_status_file_h.write("1-CHIMERA ERROR: \n") sys.exit("1-Chimera Failed") else: logger.info("2-Chimera checking Failed") runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n") runobj.run_status_file_h.write("2-CHIMERA ERROR: \n") sys.exit("2-Chimera checking Failed") sleep(2) if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS': logger.info("Writing Chimeras to deleted files") mychimera.write_chimeras_to_deleted_file() # should also recreate fasta # then read chimera files and place (or replace) any chimeric read_id # into the deleted file. mymblutils = MBLPipelineFastaUtils(idx_keys, runobj) # write new cleaned files that remove chimera if apropriate # these are in fasta_mbl_pipeline.py # the cleaned file are renamed to the original name: # lane_key.unique.fa # lane_key.trimmed.fa # lane_key.names -- # lane_key.abund.fa -- this file is for the uclust chimera script # lane_key.deleted.txt -- no change in this file # THE ORDER IS IMPORTANT HERE: mymblutils.write_clean_fasta_file() mymblutils.write_clean_names_file() mymblutils.write_clean_uniques_file() mymblutils.write_clean_abundance_file()
def illumina_chimera(runobj): utils = PipelneUtils() # start = time.time() mychimera = Chimera(runobj) # elapsed = (time.time() - start) # logger.debug(elapsed) logger.debug( "Preparing input files (replacing \"frequency:\" with \";size=\" and capitalize reads)" ) # start = time.time() # mychimera.illumina_freq_to_size_in_chg() # elapsed = (time.time() - start) # logger.debug("1a) illumina_freq_to_size_in_chg time: %s" % elapsed) start = time.time() mychimera.call_illumina_sed("from_frequency_to_size") elapsed = (time.time() - start) logger.debug("call_illumina_sed from_frequency_to_size time: %s" % elapsed) # logger.debug("START chimera checking") # c_den = mychimera.chimera_checking() # # logger.debug("c_den - check denovo res: %s" % c_den) # logger.debug(c_den) # c_den = # mychimera.chimera_checking("ref") # logger.debug(c_den) # todo: use run_until_done_on_cluster from utils """run after cluster is done with it work:""" start = time.time() time_before = utils.get_time_now() logger.debug("time_before = %s" % time_before) logger.debug("Waiting for the cluster...") while True: if utils.is_local(): sleep(1) break else: sleep(120) cluster_done = mychimera.check_if_cluster_is_done(time_before) logger.debug("cluster_done = %s" % cluster_done) if cluster_done: break elapsed = (time.time() - start) logger.debug("Cluster is done with both chimera checkings in: %s" % elapsed) mychimera.check_if_chimera_dir_empty() mychimera.illumina_rm_size_files() # start = time.time() # mychimera.illumina_size_to_freq_in_chimer() # elapsed = (time.time() - start) # logger.debug("2a) illumina_size_to_freq_in_chimer time: %s" % elapsed) start = time.time() mychimera.call_illumina_sed("from_size_to_frequency") elapsed = (time.time() - start) logger.debug("call_illumina_sed from_size_to_frequency time: %s" % elapsed) # start = time.time() # logger.debug("Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo") # mychimera.check_chimeric_stats() # elapsed = (time.time() - start) # logger.debug("check_chimeric_stats time: %s" % elapsed) start = time.time() logger.debug("Creating nonchimeric files in %s" % mychimera.indir) mychimera.move_out_chimeric() elapsed = (time.time() - start) logger.debug("move_out_chimeric time: %s" % elapsed)
def chimera(runobj): chimera_cluster_ids = [] logger.debug("Starting Chimera Checker") # lets read the trim status file out here and keep those details out of the Chimera code idx_keys = get_keys(runobj) # new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"] # Open run STATUS File here. # open in append mode because we may start the run in the middle # say at the gast stage and don't want to over write. # if we re-run trimming we'll get two trim status reports runobj.run_status_file_h = open(runobj.run_status_file_name, "a") mychimera = Chimera(runobj) logger.debug("\nStarting DeNovo Chimera") c_den = mychimera.chimera_denovo() logger.debug("Ending DeNovo Chimera") if c_den[0] == 'SUCCESS': chimera_cluster_ids += c_den[2] # add a list to a list logger.debug("chimera_cluster_ids: " + ' '.join(chimera_cluster_ids)) chimera_code = 'PASS' elif c_den[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_den[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code = 'FAIL' logger.debug("Chimera DeNovo Code: " + chimera_code) logger.debug("\nStarting Reference Chimera") c_ref = mychimera.chimera_reference() if c_ref[0] == 'SUCCESS': chimera_cluster_ids += c_ref[2] chimera_code = 'PASS' elif c_ref[0] == 'NOREGION': chimera_code = 'NOREGION' elif c_ref[0] == 'FAIL': chimera_code = 'FAIL' else: chimera_code = 'FAIL' # logger.debug(chimera_cluster_ids) runobj.chimera_status_file_h = open(runobj.chimera_status_file_name, "w") if chimera_code == 'PASS': if runobj.use_cluster: chimera_cluster_code = wait_for_cluster_to_finish( chimera_cluster_ids) if chimera_cluster_code[0] == 'SUCCESS': logger.info("Chimera checking finished successfully") runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n") runobj.run_status_file_h.write("CHIMERA SUCCESS\n") else: logger.info("3-Chimera checking Failed") runobj.chimera_status_file_h.write( "3-CHIMERA ERROR: " + str(chimera_cluster_code[1]) + " " + str(chimera_cluster_code[2]) + "\n") runobj.run_status_file_h.write("3-CHIMERA ERROR: " + str(chimera_cluster_code[1]) + " " + str(chimera_cluster_code[2]) + "\n") sys.exit("3-Chimera checking Failed") else: chimera_cluster_code = ['SUCCESS', 'Not using cluster'] logger.info("Chimera checking finished without using cluster") runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n") runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n") elif chimera_code == 'NOREGION': logger.info("No regions found that need chimera checking") runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n") elif chimera_code == 'FAIL': logger.info("1-Chimera checking Failed") runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n") runobj.run_status_file_h.write("1-CHIMERA ERROR: \n") sys.exit("1-Chimera Failed") else: logger.info("2-Chimera checking Failed") runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n") runobj.run_status_file_h.write("2-CHIMERA ERROR: \n") sys.exit("2-Chimera checking Failed") sleep(2) if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS': logger.info("Writing Chimeras to deleted files") mychimera.write_chimeras_to_deleted_file() # should also recreate fasta # then read chimera files and place (or replace) any chimeric read_id # into the deleted file. mymblutils = MBLPipelineFastaUtils(idx_keys, runobj) # write new cleaned files that remove chimera if apropriate # these are in fasta_mbl_pipeline.py # the cleaned file are renamed to the original name: # lane_key.unique.fa # lane_key.trimmed.fa # lane_key.names -- # lane_key.abund.fa -- this file is for the uclust chimera script # lane_key.deleted.txt -- no change in this file # THE ORDER IS IMPORTANT HERE: mymblutils.write_clean_fasta_file() mymblutils.write_clean_names_file() mymblutils.write_clean_uniques_file() mymblutils.write_clean_abundance_file()