Example #1
0
def search_for_new(config, config_file, post_config_file, fetch_msg, \
        process_msg, store_msg, backup_msg, qseq, fastq, remove_qseq, compress_fastq, casava):
    """Search for any new unreported directories.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and not any(dir.startswith(dname) for dir in reported):
            # Injects run_name on logging calls.
            # Convenient for run_name on "Subject" for email notifications
            run_setter = lambda record: record.extra.__setitem__('run', os.path.basename(dname))
            with logbook.Processor(run_setter):
                if casava and _is_finished_dumping_read_1(dname):
                    logger2.info("Generating fastq.gz files for read 1 of %s" % dname)
                    fastq_dir = None
                    _generate_fastq_with_casava(dname, config, r1=True)
                    _post_process_run(dname, config, config_file,
                                      fastq_dir, post_config_file,
                                      fetch_msg=True, process_msg=False,
                                      store_msg=store_msg,backup_msg=False)
    
                if _is_finished_dumping(dname):
                    logger2.info("The instrument has finished dumping on directory %s" % dname)
                    _update_reported(config["msg_db"], dname)
                    _process_samplesheets(dname, config)
                    if qseq:
                        logger2.info("Generating qseq files for %s" % dname)
                        _generate_qseq(get_qseq_dir(dname), config)

                    fastq_dir = None
                    if fastq:
                        logger2.info("Generating fastq files for %s" % dname)
                        fastq_dir = _generate_fastq(dname, config)
                        if remove_qseq: _clean_qseq(get_qseq_dir(dname), fastq_dir)
                        _calculate_md5(fastq_dir)
                        if compress_fastq: _compress_fastq(fastq_dir, config)
                    if casava:
                        logger2.info("Generating fastq.gz files for %s" % dname)
                        _generate_fastq_with_casava(dname, config)

                    _post_process_run(dname, config, config_file,
                                      fastq_dir, post_config_file,
                                      fetch_msg, process_msg, store_msg, backup_msg)

                    # Update the reported database after successful processing
                    _update_reported(config["msg_db"], dname)

                # Re-read the reported database to make sure it hasn't
                # changed while processing.
                reported = _read_reported(config["msg_db"])
Example #2
0
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                log.info("The instrument has finished dumping on directory %s" % dname)
                _update_reported(config["msg_db"], dname)

                ss_file = samplesheet.run_has_samplesheet(dname, config)
                if ss_file:
                    out_file = os.path.join(dname, "run_info.yaml")
                    log.info("CSV Samplesheet %s found, converting to %s" %
                             (ss_file, out_file))
                    samplesheet.csv2yaml(ss_file, out_file)
                if qseq:
                    log.info("Generating qseq files for %s" % dname)
                    _generate_qseq(get_qseq_dir(dname), config)
                if fastq:
                    log.info("Generating fastq files for %s" % dname)
                    _generate_fastq(dname, config)

                store_files, process_files = _files_to_copy(dname)

                if process_msg:
                    finished_message(config["msg_process_tag"], dname,
                                     process_files, amqp_config)
                if store_msg:
                    finished_message(config["msg_store_tag"], dname,
                                     store_files, amqp_config)
Example #3
0
def search_for_new(config, config_file, post_config_file, process_msg,
                   store_msg, qseq, fastq):
    """Search for any new unreported directories.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                # Injects run_name on logging calls.
                # Convenient for run_name on "Subject" for email notifications
                with logbook.Processor(lambda record: record.extra.__setitem__(
                        'run', os.path.basename(dname))):
                    logger2.info(
                        "The instrument has finished dumping on directory %s" %
                        dname)
                    _update_reported(config["msg_db"], dname)
                    _process_samplesheets(dname, config)
                    if qseq:
                        logger2.info("Generating qseq files for %s" % dname)
                        _generate_qseq(get_qseq_dir(dname), config)
                    fastq_dir = None
                    if fastq:
                        logger2.info("Generating fastq files for %s" % dname)
                        fastq_dir = _generate_fastq(dname, config)
                    _post_process_run(dname, config, config_file, fastq_dir,
                                      post_config_file, process_msg, store_msg)
Example #4
0
def search_for_new(config, amqp_config, post_config_file,
                   process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """

    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                # Injects run_name on logging calls.
                # Convenient for run_name on "Subject" for email notifications
                with logbook.Processor(lambda record: record.extra.__setitem__('run', os.path.basename(dname))):
                    log.info("The instrument has finished dumping on directory %s" % dname)
                    _update_reported(config["msg_db"], dname)
                    _process_samplesheets(dname, config)
                    if qseq:
                        log.info("Generating qseq files for %s" % dname)
                        _generate_qseq(get_qseq_dir(dname), config)
                    fastq_dir = None
                    if fastq:
                        log.info("Generating fastq files for %s" % dname)
                        fastq_dir = _generate_fastq(dname, config)
                    _post_process_run(dname, config, amqp_config,
                                      fastq_dir, post_config_file,
                                      process_msg, store_msg)
Example #5
0
def search_for_new(config, amqp_config, process_msg, store_msg, qseq, fastq):
    """Search for any new directories that have not been reported.
    """
    reported = _read_reported(config["msg_db"])
    for dname in _get_directories(config):
        if os.path.isdir(dname) and dname not in reported:
            if _is_finished_dumping(dname):
                log.info(
                    "The instrument has finished dumping on directory %s" %
                    dname)
                _update_reported(config["msg_db"], dname)

                ss_file = samplesheet.run_has_samplesheet(dname, config)
                if ss_file:
                    out_file = os.path.join(dname, "run_info.yaml")
                    log.info("CSV Samplesheet %s found, converting to %s" %
                             (ss_file, out_file))
                    samplesheet.csv2yaml(ss_file, out_file)
                if qseq:
                    log.info("Generating qseq files for %s" % dname)
                    _generate_qseq(get_qseq_dir(dname), config)
                if fastq:
                    log.info("Generating fastq files for %s" % dname)
                    _generate_fastq(dname, config)

                store_files, process_files = _files_to_copy(dname)

                if process_msg:
                    finished_message(config["msg_process_tag"], dname,
                                     process_files, amqp_config)
                if store_msg:
                    finished_message(config["msg_store_tag"], dname,
                                     store_files, amqp_config)
Example #6
0
def process_second_read(*args, **kwargs):
    """Processing to be performed after all reads have been sequences
    """
    dname, config = args[0:2]
    logger2.info("The instrument has finished dumping on directory %s" % dname)
    
    utils.touch_indicator_file(os.path.join(dname,"second_read_processing_started.txt"))
    _update_reported(config["msg_db"], dname)
    fastq_dir = None
    
    # Do bcl -> fastq conversion and demultiplexing using Casava1.8+
    if kwargs.get("casava",False):
        logger2.info("Generating fastq.gz files for {:s}".format(dname))
        _generate_fastq_with_casava(dname, config)
    else:
        _process_samplesheets(dname, config)
        if kwargs.get("qseq",True):
            logger2.info("Generating qseq files for {:s}".format(dname))
            _generate_qseq(get_qseq_dir(dname), config)
            
        if kwargs.get("fastq",True):
            logger2.info("Generating fastq files for {:s}".format(dname))
            fastq_dir = _generate_fastq(dname, config)
            if kwargs.get("remove_qseq",False):
                _clean_qseq(get_qseq_dir(dname), fastq_dir)
            _calculate_md5(fastq_dir)
            
    # Call the post_processing method
    loc_args = args + (fastq_dir,)
    _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg",True),
                                    "process_msg": kwargs.get("process_msg",True),
                                    "store_msg": kwargs.get("store_msg",True),
                                    "backup_msg": kwargs.get("backup_msg",False)})

    # Update the reported database after successful processing
    _update_reported(config["msg_db"], dname)
    utils.touch_indicator_file(os.path.join(dname,"second_read_processing_completed.txt"))
Example #7
0
                with open(rsync_out, 'a') as ro:
                    with open(rsync_err, 'a') as re:
                        try:
                            ro.write("-----------\n{}\n".format(" ".join(cl)))
                            re.write("-----------\n{}\n".format(" ".join(cl)))
                            subprocess.check_call(cl, stdout=ro, stderr=re)
                        except subprocess.CalledProcessError, e:
                            logger2.error("rsync transfer of Unaligned results FAILED")


    else:
        _process_samplesheets(dname, config)
        if kwargs.get("qseq", True):
            logger2.info("Generating qseq files for {:s}".format(dname))
            _generate_qseq(get_qseq_dir(dname), config)

        if kwargs.get("fastq", True):
            logger2.info("Generating fastq files for {:s}".format(dname))
            fastq_dir = _generate_fastq(dname, config)
            if kwargs.get("remove_qseq", False):
                _clean_qseq(get_qseq_dir(dname), fastq_dir)

            _calculate_md5(fastq_dir)

    # Call the post_processing method
    loc_args = args + (fastq_dir,)
    _post_process_run(*loc_args, **{"fetch_msg": kwargs.get("fetch_msg", False),
                                    "process_msg": kwargs.get("process_msg", False),
                                    "store_msg": kwargs.get("store_msg", False),
                                    "backup_msg": kwargs.get("backup_msg", False),