Esempio n. 1
0
def illumina_chimera_after_cluster(runobj):
    mychimera = Chimera(runobj)

    mychimera.illumina_rm_size_files()
    start = time.time()
    mychimera.illumina_size_to_freq_in_chimer()
    elapsed = (time.time() - start)
    logger.debug("illumina_size_to_freq_in_chimer time: %s" % elapsed)

    #     start = time.time()
    #     logger.debug("Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo")
    #     mychimera.check_chimeric_stats()
    #     elapsed = (time.time() - start)
    #     logger.debug("check_chimeric_stats time: %s" % elapsed)

    start = time.time()
    logger.debug("Creating nonchimeric files in %s" % mychimera.indir)
    mychimera.move_out_chimeric()
    elapsed = (time.time() - start)
    logger.debug("move_out_chimeric time: %s" % elapsed)
    logger.debug("illumina_chimera_after_cluster time = %s" % str(elapsed))
Esempio n. 2
0
def illumina_chimera(runobj):
    utils = PipelneUtils()

    # start = time.time()
    mychimera = Chimera(runobj)
    #     elapsed = (time.time() - start)
    #     logger.debug(elapsed)
    logger.debug(
        "Preparing input files (replacing \"frequency:\" with \";size=\" and capitalize reads)"
    )

    #     start = time.time()
    #     mychimera.illumina_freq_to_size_in_chg()
    #     elapsed = (time.time() - start)
    #     logger.debug("1a) illumina_freq_to_size_in_chg time: %s" % elapsed)
    start = time.time()
    mychimera.call_illumina_sed("from_frequency_to_size")
    elapsed = (time.time() - start)
    logger.debug("call_illumina_sed from_frequency_to_size time: %s" % elapsed)
    #
    logger.debug("START chimera checking")
    #     c_den =
    mychimera.chimera_checking()
    # #     logger.debug("c_den - check denovo res: %s" % c_den)
    #     logger.debug(c_den)
    #     c_den =
    #     mychimera.chimera_checking("ref")
    #     logger.debug(c_den)
    #     todo: use run_until_done_on_cluster from utils
    """run after cluster is done with it work:"""
    start = time.time()
    time_before = utils.get_time_now()
    logger.debug("time_before = %s" % time_before)
    logger.debug("Waiting for the cluster...")
    while True:
        if utils.is_local():
            sleep(1)
            break

        else:
            sleep(120)
            cluster_done = mychimera.check_if_cluster_is_done(time_before)
            logger.debug("cluster_done = %s" % cluster_done)
            if cluster_done:
                break

    elapsed = (time.time() - start)
    logger.debug("Cluster is done with both chimera checkings in: %s" %
                 elapsed)

    mychimera.check_if_chimera_dir_empty()

    mychimera.illumina_rm_size_files()

    #     start = time.time()
    #     mychimera.illumina_size_to_freq_in_chimer()
    #     elapsed = (time.time() - start)
    #     logger.debug("2a) illumina_size_to_freq_in_chimer time: %s" % elapsed)
    start = time.time()
    mychimera.call_illumina_sed("from_size_to_frequency")
    elapsed = (time.time() - start)
    logger.debug("call_illumina_sed from_size_to_frequency time: %s" % elapsed)

    #     start = time.time()
    #     logger.debug("Check chimeric statistics. If ref > 15% and ratio ref to de-novo > 2 use only de-novo")
    #     mychimera.check_chimeric_stats()
    #     elapsed = (time.time() - start)
    #     logger.debug("check_chimeric_stats time: %s" % elapsed)

    start = time.time()
    logger.debug("Creating nonchimeric files in %s" % mychimera.indir)
    mychimera.move_out_chimeric()
    elapsed = (time.time() - start)
    logger.debug("move_out_chimeric time: %s" % elapsed)
Esempio n. 3
0
def chimera(runobj):
    chimera_cluster_ids = []
    logger.debug("Starting Chimera Checker")
    # lets read the trim status file out here and keep those details out of the Chimera code
    idx_keys = get_keys(runobj)
    # new_lane_keys = convert_unicode_dictionary_to_str(json.loads(open(runobj.trim_status_file_name,"r").read()))["new_lane_keys"]
    # Open run STATUS File here.
    # open in append mode because we may start the run in the middle
    # say at the gast stage and don't want to over write.
    # if we re-run trimming we'll get two trim status reports
    runobj.run_status_file_h = open(runobj.run_status_file_name, "a")

    mychimera = Chimera(runobj)
    logger.debug("\nStarting DeNovo Chimera")
    c_den = mychimera.chimera_denovo()
    logger.debug("Ending DeNovo Chimera")
    if c_den[0] == 'SUCCESS':
        chimera_cluster_ids += c_den[2]  # add a list to a list
        logger.debug("chimera_cluster_ids: " + ' '.join(chimera_cluster_ids))
        chimera_code = 'PASS'
    elif c_den[0] == 'NOREGION':
        chimera_code = 'NOREGION'
    elif c_den[0] == 'FAIL':
        chimera_code = 'FAIL'
    else:
        chimera_code = 'FAIL'

    logger.debug("Chimera DeNovo Code: " + chimera_code)
    logger.debug("\nStarting Reference Chimera")
    c_ref = mychimera.chimera_reference()

    if c_ref[0] == 'SUCCESS':
        chimera_cluster_ids += c_ref[2]
        chimera_code = 'PASS'
    elif c_ref[0] == 'NOREGION':
        chimera_code = 'NOREGION'
    elif c_ref[0] == 'FAIL':
        chimera_code = 'FAIL'
    else:
        chimera_code = 'FAIL'

    # logger.debug(chimera_cluster_ids)
    runobj.chimera_status_file_h = open(runobj.chimera_status_file_name, "w")
    if chimera_code == 'PASS':

        if runobj.use_cluster:
            chimera_cluster_code = wait_for_cluster_to_finish(
                chimera_cluster_ids)
            if chimera_cluster_code[0] == 'SUCCESS':
                logger.info("Chimera checking finished successfully")
                runobj.chimera_status_file_h.write("CHIMERA SUCCESS\n")
                runobj.run_status_file_h.write("CHIMERA SUCCESS\n")

            else:
                logger.info("3-Chimera checking Failed")
                runobj.chimera_status_file_h.write(
                    "3-CHIMERA ERROR: " + str(chimera_cluster_code[1]) + " " +
                    str(chimera_cluster_code[2]) + "\n")
                runobj.run_status_file_h.write("3-CHIMERA ERROR: " +
                                               str(chimera_cluster_code[1]) +
                                               " " +
                                               str(chimera_cluster_code[2]) +
                                               "\n")
                sys.exit("3-Chimera checking Failed")
        else:
            chimera_cluster_code = ['SUCCESS', 'Not using cluster']
            logger.info("Chimera checking finished without using cluster")
            runobj.chimera_status_file_h.write("CHIMERA SUCCESS--no cluster\n")
            runobj.run_status_file_h.write("CHIMERA SUCCESS--no cluster\n")
    elif chimera_code == 'NOREGION':
        logger.info("No regions found that need chimera checking")
        runobj.chimera_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")
        runobj.run_status_file_h.write("CHIMERA CHECK NOT NEEDED\n")

    elif chimera_code == 'FAIL':
        logger.info("1-Chimera checking Failed")
        runobj.chimera_status_file_h.write("1-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("1-CHIMERA ERROR: \n")
        sys.exit("1-Chimera Failed")
    else:
        logger.info("2-Chimera checking Failed")
        runobj.chimera_status_file_h.write("2-CHIMERA ERROR: \n")
        runobj.run_status_file_h.write("2-CHIMERA ERROR: \n")
        sys.exit("2-Chimera checking Failed")

    sleep(2)

    if chimera_code == 'PASS' and chimera_cluster_code[0] == 'SUCCESS':
        logger.info("Writing Chimeras to deleted files")
        mychimera.write_chimeras_to_deleted_file()

        # should also recreate fasta
        # then read chimera files and place (or replace) any chimeric read_id
        # into the deleted file.

        mymblutils = MBLPipelineFastaUtils(idx_keys, runobj)

        # write new cleaned files that remove chimera if apropriate
        # these are in fasta_mbl_pipeline.py
        # the cleaned file are renamed to the original name:
        # lane_key.unique.fa
        # lane_key.trimmed.fa
        # lane_key.names        --
        # lane_key.abund.fa     -- this file is for the uclust chimera script
        # lane_key.deleted.txt  -- no change in this file
        # THE ORDER IS IMPORTANT HERE:
        mymblutils.write_clean_fasta_file()
        mymblutils.write_clean_names_file()
        mymblutils.write_clean_uniques_file()
        mymblutils.write_clean_abundance_file()