Exemple #1
0
def illumina_chimera_after_cluster(runobj):
    mychimera = Chimera(runobj)

    mychimera.illumina_rm_size_files()
    start = time.time()
    mychimera.illumina_size_to_freq_in_chimer()
    elapsed = (time.time() - start)
    logger.debug("illumina_size_to_freq_in_chimer time: %s" % elapsed)

    #     start = time.time()
    #     logger.debug("Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo")
    #     mychimera.check_chimeric_stats()
    #     elapsed = (time.time() - start)
    #     logger.debug("check_chimeric_stats time: %s" % elapsed)

    start = time.time()
    logger.debug("Creating nonchimeric files in %s" % mychimera.indir)
    mychimera.move_out_chimeric()
    elapsed = (time.time() - start)
    logger.debug("move_out_chimeric time: %s" % elapsed)
    logger.debug("illumina_chimera_after_cluster time = %s" % str(elapsed))
Exemple #2
0
def illumina_chimera_after_cluster(runobj):  
    mychimera = Chimera(runobj)

    mychimera.illumina_rm_size_files()
    start = time.time()
    mychimera.illumina_size_to_freq_in_chimer()
    elapsed = (time.time() - start)
    print "illumina_size_to_freq_in_chimer time: %s" % elapsed
    
#     start = time.time()
#     print "Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo"
#     mychimera.check_chimeric_stats()
#     elapsed = (time.time() - start)
#     print "check_chimeric_stats time: %s" % elapsed
    
    start = time.time()
    print "Creating nonchimeric files in %s" % mychimera.indir
    mychimera.move_out_chimeric()
    elapsed = (time.time() - start)
    print "move_out_chimeric time: %s" % elapsed
    print "illumina_chimera_after_cluster time = %s" % str(elapsed)
def chimera(runobj):
    chimera_cluster_ids = [] 
    logger.debug("Starting Chimera Checker")
    # lets read the trim status file out here and keep those details out of the Chimera code
    idx_keys = get_keys(runobj)
    #new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"]
    
    mychimera = Chimera(runobj)
    
    c_den    = mychimera.chimera_denovo(idx_keys)
    if c_den[0] == 'SUCCESS':
        chimera_cluster_ids += c_den[2]
        chimera_code='PASS'
    elif c_den[0] == 'NOREGION':
        chimera_code='NOREGION'
    elif c_den[0] == 'FAIL':
        chimera_code = 'FAIL'
    else:
        chimera_code='FAIL'
    
    c_ref    = mychimera.chimera_reference(idx_keys)
    
    if c_ref[0] == 'SUCCESS':
        chimera_cluster_ids += c_ref[2]
        chimera_code='PASS'
    elif c_ref[0] == 'NOREGION':
        chimera_code = 'NOREGION'
    elif c_ref[0] == 'FAIL':
        chimera_code='FAIL'
    else:
        chimera_code='FAIL'
    
    #print chimera_cluster_ids
    runobj.chimera_status_file_h = open(runobj.chimera_status_file_name,"w")
    if chimera_code == 'PASS':  
        
        chimera_cluster_code = wait_for_cluster_to_finish(chimera_cluster_ids) 
        if chimera_cluster_code[0] == 'SUCCESS':
            logger.info("Chimera checking finished successfully")
            runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n")
            runobj.run_status_file_h.write("CHIMERA SUCCESS\n")
            
        else:
            logger.info("3-Chimera checking Failed")
            runobj.chimera_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n")
            runobj.run_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n")
            sys.exit("3-Chimera checking Failed")
            
    elif chimera_code == 'NOREGION':
        logger.info("No regions found that need chimera checking")
        runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")
        runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")
        
    elif chimera_code == 'FAIL':
        logger.info("1-Chimera checking Failed")
        runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("1-CHIMERA ERROR: \n")
        sys.exit("1-Chimera Failed")
    else:
        logger.info("2-Chimera checking Failed")
        runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("2-CHIMERA ERROR: \n")
        sys.exit("2-Chimera checking Failed")
    sleep(2)   
    if  chimera_code == 'PASS' and  chimera_cluster_code[0] == 'SUCCESS':
        mychimera.write_chimeras_to_deleted_file(idx_keys)
        # should also recreate fasta
        # then read chimera files and place (or replace) any chimeric read_id
        # into the deleted file.
        
        mymblutils = MBLPipelineFastaUtils(idx_keys, mychimera.outdir)
        
        # write new cleaned files that remove chimera if apropriate
        # these are in fasta_mbl_pipeline.py
        # the cleaned file are renamed to the original name:
        # lane_key.unique.fa
        # lane_key.trimmed.fa
        # lane_key.names        -- 
        # lane_key.abund.fa     -- this file is for the uclust chimera script
        # lane_key.deleted.txt  -- no change in this file
        # THE ORDER IS IMPORTANT HERE:
        mymblutils.write_clean_fasta_file()
        mymblutils.write_clean_names_file()
        mymblutils.write_clean_uniques_file()
        mymblutils.write_clean_abundance_file()
        # write keys file for each lane_key - same fields as db table? for easy writing
        # write primers file for each lane_key
 
        
        # Write new clean files to the database
        # rawseq table not used
        # trimseq
        # runkeys
        # primers
        # run primers
        mymblutils.write_clean_files_to_database()
Exemple #4
0
def illumina_chimera(runobj):
    utils = PipelneUtils()

    start = time.time()
    mychimera = Chimera(runobj)
#     elapsed = (time.time() - start)
#     print elapsed
    print "Preparing input files (replacing \"frequency:\" with \";size=\" and capitalize reads)"

#     start = time.time()
#     mychimera.illumina_freq_to_size_in_chg()
#     elapsed = (time.time() - start)
#     print "1a) illumina_freq_to_size_in_chg time: %s" % elapsed
    start = time.time()
    mychimera.call_illumina_sed("from_frequency_to_size")
    elapsed = (time.time() - start)
    print "call_illumina_sed from_frequency_to_size time: %s" % elapsed
#     
    print "START chimera checking"
#     c_den = 
    mychimera.chimera_checking("denovo")
# #     print "c_den - check denovo res: %s" % c_den
#     print c_den
#     c_den = 
    mychimera.chimera_checking("ref")
#     print c_den
#     todo: use run_until_done_on_cluster from utils
    """run after cluster is done with it work:"""
    start = time.time()  
    time_before = utils.get_time_now()
    print "time_before = %s" % time_before
    print "Waiting for the cluster..."
    while True:
        if utils.is_local():
            sleep(1)        
        else:
            sleep(120)        
        cluster_done = mychimera.check_if_cluster_is_done(time_before)
        print "cluster_done = %s" % cluster_done
        if (cluster_done):
            break
    
    elapsed = (time.time() - start)
    print "Cluster is done with both chimera checkings in: %s" % elapsed     
    
    mychimera.check_if_chimera_dir_empty()
    
    mychimera.illumina_rm_size_files()

#     start = time.time()
#     mychimera.illumina_size_to_freq_in_chimer()
#     elapsed = (time.time() - start)
#     print "2a) illumina_size_to_freq_in_chimer time: %s" % elapsed
    start = time.time()
    mychimera.call_illumina_sed("from_size_to_frequency")
    elapsed = (time.time() - start)
    print "call_illumina_sed from_size_to_frequency time: %s" % elapsed
    
#     start = time.time()
#     print "Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo"
#     mychimera.check_chimeric_stats()
#     elapsed = (time.time() - start)
#     print "check_chimeric_stats time: %s" % elapsed
    
    start = time.time()
    print "Creating nonchimeric files in %s" % mychimera.indir
    mychimera.move_out_chimeric()
    elapsed = (time.time() - start)
    print "move_out_chimeric time: %s" % elapsed
Exemple #5
0
def chimera(runobj):
    chimera_cluster_ids = [] 
    logger.debug("Starting Chimera Checker")
    # lets read the trim status file out here and keep those details out of the Chimera code
    idx_keys = get_keys(runobj)
    #new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"]
    # Open run STATUS File here.
    # open in append mode because we may start the run in the middle
    # say at the gast stage and don't want to over write.
    # if we re-run trimming we'll get two trim status reports
    runobj.run_status_file_h = open(runobj.run_status_file_name, "a")
    
    mychimera = Chimera(runobj)
    logger.debug("\nStarting DeNovo Chimera")
    c_den    = mychimera.chimera_denovo()
    logger.debug("Ending DeNovo Chimera")
    if c_den[0] == 'SUCCESS':
        chimera_cluster_ids += c_den[2]   # add a list to a list
        logger.debug("chimera_cluster_ids: "+' '.join(chimera_cluster_ids))
        chimera_code='PASS'
    elif c_den[0] == 'NOREGION':
        chimera_code='NOREGION'
    elif c_den[0] == 'FAIL':
        chimera_code = 'FAIL'
    else:
        chimera_code='FAIL'

    logger.debug("Chimera DeNovo Code: "+chimera_code)
    logger.debug("\nStarting Reference Chimera")
    c_ref    = mychimera.chimera_reference()
    
    if c_ref[0] == 'SUCCESS':
        chimera_cluster_ids += c_ref[2]
        chimera_code='PASS'
    elif c_ref[0] == 'NOREGION':
        chimera_code = 'NOREGION'
    elif c_ref[0] == 'FAIL':
        chimera_code='FAIL'
    else:
        chimera_code='FAIL'
    
    #print chimera_cluster_ids
    runobj.chimera_status_file_h = open(runobj.chimera_status_file_name,"w")
    if chimera_code == 'PASS':  
        
        if runobj.use_cluster:
            chimera_cluster_code = wait_for_cluster_to_finish(chimera_cluster_ids) 
            if chimera_cluster_code[0] == 'SUCCESS':
                logger.info("Chimera checking finished successfully")
                runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n")
                runobj.run_status_file_h.write("CHIMERA SUCCESS\n")
                
            else:
                logger.info("3-Chimera checking Failed")
                runobj.chimera_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n")
                runobj.run_status_file_h.write("3-CHIMERA ERROR: "+str(chimera_cluster_code[1])+" "+str(chimera_cluster_code[2])+"\n")
                sys.exit("3-Chimera checking Failed")
        else:
            chimera_cluster_code = ['SUCCESS','Not using cluster']
            logger.info("Chimera checking finished without using cluster")
            runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n")
            runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n")
    elif chimera_code == 'NOREGION':
        logger.info("No regions found that need chimera checking")
        runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")
        runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")
        
    elif chimera_code == 'FAIL':
        logger.info("1-Chimera checking Failed")
        runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("1-CHIMERA ERROR: \n")
        sys.exit("1-Chimera Failed")
    else:
        logger.info("2-Chimera checking Failed")
        runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("2-CHIMERA ERROR: \n")
        sys.exit("2-Chimera checking Failed")
    
    sleep(2) 
    
    if  chimera_code == 'PASS' and  chimera_cluster_code[0] == 'SUCCESS':
        logger.info("Writing Chimeras to deleted files")
        mychimera.write_chimeras_to_deleted_file()

        # should also recreate fasta
        # then read chimera files and place (or replace) any chimeric read_id
        # into the deleted file.
        
        mymblutils = MBLPipelineFastaUtils(idx_keys, runobj)
        
        # write new cleaned files that remove chimera if apropriate
        # these are in fasta_mbl_pipeline.py
        # the cleaned file are renamed to the original name:
        # lane_key.unique.fa
        # lane_key.trimmed.fa
        # lane_key.names        -- 
        # lane_key.abund.fa     -- this file is for the uclust chimera script
        # lane_key.deleted.txt  -- no change in this file
        # THE ORDER IS IMPORTANT HERE:
        mymblutils.write_clean_fasta_file()
        mymblutils.write_clean_names_file()
        mymblutils.write_clean_uniques_file()
        mymblutils.write_clean_abundance_file()
Exemple #6
0
def illumina_chimera(runobj):
    utils = PipelneUtils()

    # start = time.time()
    mychimera = Chimera(runobj)
    #     elapsed = (time.time() - start)
    #     logger.debug(elapsed)
    logger.debug(
        "Preparing input files (replacing \"frequency:\" with \";size=\" and capitalize reads)"
    )

    #     start = time.time()
    #     mychimera.illumina_freq_to_size_in_chg()
    #     elapsed = (time.time() - start)
    #     logger.debug("1a) illumina_freq_to_size_in_chg time: %s" % elapsed)
    start = time.time()
    mychimera.call_illumina_sed("from_frequency_to_size")
    elapsed = (time.time() - start)
    logger.debug("call_illumina_sed from_frequency_to_size time: %s" % elapsed)
    #
    logger.debug("START chimera checking")
    #     c_den =
    mychimera.chimera_checking()
    # #     logger.debug("c_den - check denovo res: %s" % c_den)
    #     logger.debug(c_den)
    #     c_den =
    #     mychimera.chimera_checking("ref")
    #     logger.debug(c_den)
    #     todo: use run_until_done_on_cluster from utils
    """run after cluster is done with it work:"""
    start = time.time()
    time_before = utils.get_time_now()
    logger.debug("time_before = %s" % time_before)
    logger.debug("Waiting for the cluster...")
    while True:
        if utils.is_local():
            sleep(1)
            break

        else:
            sleep(120)
            cluster_done = mychimera.check_if_cluster_is_done(time_before)
            logger.debug("cluster_done = %s" % cluster_done)
            if cluster_done:
                break

    elapsed = (time.time() - start)
    logger.debug("Cluster is done with both chimera checkings in: %s" %
                 elapsed)

    mychimera.check_if_chimera_dir_empty()

    mychimera.illumina_rm_size_files()

    #     start = time.time()
    #     mychimera.illumina_size_to_freq_in_chimer()
    #     elapsed = (time.time() - start)
    #     logger.debug("2a) illumina_size_to_freq_in_chimer time: %s" % elapsed)
    start = time.time()
    mychimera.call_illumina_sed("from_size_to_frequency")
    elapsed = (time.time() - start)
    logger.debug("call_illumina_sed from_size_to_frequency time: %s" % elapsed)

    #     start = time.time()
    #     logger.debug("Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo")
    #     mychimera.check_chimeric_stats()
    #     elapsed = (time.time() - start)
    #     logger.debug("check_chimeric_stats time: %s" % elapsed)

    start = time.time()
    logger.debug("Creating nonchimeric files in %s" % mychimera.indir)
    mychimera.move_out_chimeric()
    elapsed = (time.time() - start)
    logger.debug("move_out_chimeric time: %s" % elapsed)
Exemple #7
0
def chimera(runobj):
    chimera_cluster_ids = []
    logger.debug("Starting Chimera Checker")
    # lets read the trim status file out here and keep those details out of the Chimera code
    idx_keys = get_keys(runobj)
    # new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"]
    # Open run STATUS File here.
    # open in append mode because we may start the run in the middle
    # say at the gast stage and don't want to over write.
    # if we re-run trimming we'll get two trim status reports
    runobj.run_status_file_h = open(runobj.run_status_file_name, "a")

    mychimera = Chimera(runobj)
    logger.debug("\nStarting DeNovo Chimera")
    c_den = mychimera.chimera_denovo()
    logger.debug("Ending DeNovo Chimera")
    if c_den[0] == 'SUCCESS':
        chimera_cluster_ids += c_den[2]  # add a list to a list
        logger.debug("chimera_cluster_ids: " + ' '.join(chimera_cluster_ids))
        chimera_code = 'PASS'
    elif c_den[0] == 'NOREGION':
        chimera_code = 'NOREGION'
    elif c_den[0] == 'FAIL':
        chimera_code = 'FAIL'
    else:
        chimera_code = 'FAIL'

    logger.debug("Chimera DeNovo Code: " + chimera_code)
    logger.debug("\nStarting Reference Chimera")
    c_ref = mychimera.chimera_reference()

    if c_ref[0] == 'SUCCESS':
        chimera_cluster_ids += c_ref[2]
        chimera_code = 'PASS'
    elif c_ref[0] == 'NOREGION':
        chimera_code = 'NOREGION'
    elif c_ref[0] == 'FAIL':
        chimera_code = 'FAIL'
    else:
        chimera_code = 'FAIL'

    # logger.debug(chimera_cluster_ids)
    runobj.chimera_status_file_h = open(runobj.chimera_status_file_name, "w")
    if chimera_code == 'PASS':

        if runobj.use_cluster:
            chimera_cluster_code = wait_for_cluster_to_finish(
                chimera_cluster_ids)
            if chimera_cluster_code[0] == 'SUCCESS':
                logger.info("Chimera checking finished successfully")
                runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n")
                runobj.run_status_file_h.write("CHIMERA SUCCESS\n")

            else:
                logger.info("3-Chimera checking Failed")
                runobj.chimera_status_file_h.write(
                    "3-CHIMERA ERROR: " + str(chimera_cluster_code[1]) + " " +
                    str(chimera_cluster_code[2]) + "\n")
                runobj.run_status_file_h.write("3-CHIMERA ERROR: " +
                                               str(chimera_cluster_code[1]) +
                                               " " +
                                               str(chimera_cluster_code[2]) +
                                               "\n")
                sys.exit("3-Chimera checking Failed")
        else:
            chimera_cluster_code = ['SUCCESS', 'Not using cluster']
            logger.info("Chimera checking finished without using cluster")
            runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n")
            runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n")
    elif chimera_code == 'NOREGION':
        logger.info("No regions found that need chimera checking")
        runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")
        runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")

    elif chimera_code == 'FAIL':
        logger.info("1-Chimera checking Failed")
        runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("1-CHIMERA ERROR: \n")
        sys.exit("1-Chimera Failed")
    else:
        logger.info("2-Chimera checking Failed")
        runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("2-CHIMERA ERROR: \n")
        sys.exit("2-Chimera checking Failed")

    sleep(2)

    if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS':
        logger.info("Writing Chimeras to deleted files")
        mychimera.write_chimeras_to_deleted_file()

        # should also recreate fasta
        # then read chimera files and place (or replace) any chimeric read_id
        # into the deleted file.

        mymblutils = MBLPipelineFastaUtils(idx_keys, runobj)

        # write new cleaned files that remove chimera if apropriate
        # these are in fasta_mbl_pipeline.py
        # the cleaned file are renamed to the original name:
        # lane_key.unique.fa
        # lane_key.trimmed.fa
        # lane_key.names        --
        # lane_key.abund.fa     -- this file is for the uclust chimera script
        # lane_key.deleted.txt  -- no change in this file
        # THE ORDER IS IMPORTANT HERE:
        mymblutils.write_clean_fasta_file()
        mymblutils.write_clean_names_file()
        mymblutils.write_clean_uniques_file()
        mymblutils.write_clean_abundance_file()