Python generate_window Examples, pipelines.generate_window Python Examples

Example #1

0

Show file

File: bcl2fastq_dbupdate.py Project: yuanjingnan/pipelines

def get_started_outdirs_from_db(testing=True, win=None):
    """FIXME:add-doc"""
    connection = mongodb_conn(testing)
    if connection is None:
        sys.exit(1)

    db = connection.gisds.runcomplete

    if win:
        epoch_present, epoch_back = generate_window(win)
        results = db.find({
            "analysis.Status": "STARTED",
            "timestamp": {
                "$gt": epoch_back,
                "$lt": epoch_present
            }
        })
    else:
        results = db.find({"analysis.Status": "STARTED"})

    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %d runs", results.count())
    for record in results:
        logger.debug("record: %s", record)
        #run_number = record['run']
        # we might have several analysis runs:
        for analysis in record['analysis']:
            yield analysis["out_dir"]

Example #2

0

Show file

File: bcl2fastq_records.py Project: yuanjingnan/pipelines

def instantiate_query(args):
    """
	Instantiates MongoDB query dictionary object
	"""
    instance = {}
    if args.status:
        instance["analysis.Status"] = args.status
    if args.mux:
        instance["analysis.per_mux_status.mux_id"] = args.mux
    if args.run:
        instance["run"] = {"$regex": "^" + args.run}
    if args.win:
        epoch_present, epoch_initial = generate_window(args.win)
    else:
        epoch_present, epoch_initial = generate_window(7)
    instance["timestamp"] = {"$gt": epoch_initial, "$lt": epoch_present}
    return instance

Example #3

0

Show file

File: bcl2fastq_records.py Project: gis-rpd/pipelines

def instantiate_query(args):
	"""
	Instantiates MongoDB query dictionary object
	"""
	instance = {}
	if args.status:
		instance["analysis.Status"] = args.status
	if args.mux:
		instance["analysis.per_mux_status.mux_id"] = args.mux
	if args.run:
		instance["run"] = {"$regex": "^" + args.run}
	if args.win:
		epoch_present, epoch_initial = generate_window(args.win)
	else:
		epoch_present, epoch_initial = generate_window(7)
	instance["timestamp"] = {"$gt": epoch_initial, "$lt": epoch_present}
	return instance

Example #4

0

Show file

File: novogene_data_transfer.py Project: yuanjingnan/pipelines

def runs_from_db(connection, testing, win=14):
    """Get the runs from pipeline_run collections"""
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(win)
    results = db.find({"run" : {"$regex" : "^NG00"},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %d runs", results.count())
    for record in results:
        run_number = record['run']
        logger.debug("record: %s", record)
        if not record.get('analysis'):
            continue
        # Check if Novogene run_mode
        _, run_id, _ = get_machine_run_flowcell_id(run_number)
        if testing:
            rest_url = rest_services['run_details']['testing'].replace("run_num", run_id)
        else:
            rest_url = rest_services['run_details']['production'].replace("run_num", run_id)
        response = requests.get(rest_url)
        if response.status_code != requests.codes.ok:
            response.raise_for_status()
        rest_data = response.json()
        sg10k_lib_list = get_sg10_lib_list(rest_data)
        run_records = {}
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue
            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning("mux_status is None or incomplete for run %s analysis %s."
                                   " Requires fix in DB. Skipping entry for now.", \
                                    run_number, analysis_id)
                    continue
                if mux_status.get('Status', None) != "SUCCESS":
                    continue
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']
                if not os.path.exists(out_dir):
                    logger.warning("Direcotry does not exists %s", out_dir)
                    continue
                downstream_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format(
                    analysis_count, mux_count)
                if mux_status.get('Status') == "SUCCESS" and \
                    mux_status.get('DownstreamSubmission') == "TODO":
                    mux_info = (run_number, downstream_id, analysis_id, out_dir)
                    if mux_id in run_records:
                        logger.info("MUX %s from %s has been analyzed more than 1 time \
                            succeessfully, please check", mux_id, run_number)
                        del run_records[mux_id]
                    elif mux_id in sg10k_lib_list:
                        run_records[mux_id] = mux_info
        if run_records:
            yield run_records

Example #5

0

Show file

def check_mongo():
    """
    Instantiates MongoDB database object
    For Test Server, testing == True
    For Production Server, testing == False
    """
    warnings = ""
    epoch_present, epoch_window = generate_window(MAX_WINDOW)
    epoch_present, epoch_started = generate_window(MAX_RUN)
    del epoch_present

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis.Status"] = "STARTED"
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
        #        PrettyPrinter(indent=2).pprint(record)
        if record["analysis"][-1]["Status"] != "SUCCESS":
            warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" +
                         str(record["run"]) + "\n")
            count_warnings += 1
    if count_warnings > 0:
        warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" +
                     str(count_warnings) + "\n\n")

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis"] = {"$exists": False}
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
        #        PrettyPrinter(indent=2).pprint(record)
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" +
                     str(record["run"]) + "\n")
        count_warnings += 1
    if count_warnings > 0:
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" +
                     str(count_warnings) + "\n\n")

    return warnings

Example #6

0

Show file

File: production_warnings.py Project: gis-rpd/pipelines

def check_mongo():
    """
    Instantiates MongoDB database object
    For Test Server, testing == True
    For Production Server, testing == False
    """
    warnings = ""
    epoch_present, epoch_window = generate_window(MAX_WINDOW)
    epoch_present, epoch_started = generate_window(MAX_RUN)
    del epoch_present

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis.Status"] = "STARTED"
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
#        PrettyPrinter(indent=2).pprint(record)
        if record["analysis"][-1]["Status"] != "SUCCESS":
            warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n")
            count_warnings += 1
    if count_warnings > 0:
        warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n")

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis"] = {"$exists": False}
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
#        PrettyPrinter(indent=2).pprint(record)
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n")
        count_warnings += 1
    if count_warnings > 0:
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n")

    return warnings

Example #7

0

Show file

File: delegator.py Project: yuanjingnan/pipelines

def get_sample_info(child, rows, mux_analysis_list, mux_id, fastq_data_dir, \
    run_num_flowcell, sample_info):
    """Collects sample info from ELM JOSN
    """
    sample_cfg = {}
    site = get_site()
    ctime, _ = generate_window(1)
    _, _, flowcellid = get_machine_run_flowcell_id(run_num_flowcell)
    mux_analysis_list.add(mux_id)
    sample_id = child['libraryId']
    sample_cfg['requestor'] = rows['requestor']
    sample_cfg['ctime'] = ctime
    sample_cfg['site'] = site
    try:
        sample_cfg['pipeline_name'] = legacy_mapper['pipeline_mapper'][
            child['Analysis']]
    except KeyError as e:
        sample_cfg['pipeline_name'] = child['Analysis']
        logger.warning(str(e) + " Pipeline not mappped to newer version")
        return sample_info
    pipeline_version = get_pipeline_version(child['pipeline_version'] \
        if 'pipeline_version' in rows else None)
    sample_cfg['pipeline_version'] = pipeline_version
    #sample_cfg['pipeline_params'] = 'params'
    ref_info = get_reference_info(child['Analysis'], \
        sample_cfg['pipeline_version'], child['genome'])
    if not ref_info:
        logger.info("ref_info not available")
        return sample_info
    cmdline_info = get_cmdline_info(child)
    sample_cfg['references_cfg'] = ref_info
    if cmdline_info:
        sample_cfg['cmdline'] = cmdline_info

    readunits_dict = {}
    status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\
        rows['laneId'])
    if status:
        ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\
            rows['laneId'], None, fq1, fq2)
        k = key_for_readunit(ru)
        readunits_dict[k] = dict(ru._asdict())
        sample_cfg['readunits'] = readunits_dict
        if sample_info.get(sample_id, {}).get('readunits', {}):
            sample_info[sample_id]['readunits'].update(readunits_dict)
        else:
            sample_info[sample_id] = sample_cfg
    return sample_info

Example #8

0

Show file

File: downstream_dbupdate.py Project: gis-rpd/pipelines

def runs_from_db(testing=True, win=34):
    """Get the runs from pipeline_run collections"""
    connection = mongodb_conn(testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.pipeline_runs
    epoch_present, epoch_back = generate_window(win)
    results = db.find({"runs": {"$exists": True},
                       "ctime": {"$gt": 1470127013000, "$lt": 1470127093000}})
    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %d runs for last %s days", results.count(), win)
    for record in results:
        logger.debug("record: %s", record)
        for runs in record['runs']:
            if runs["status"] == "STARTED":
                test = (record['_id'], record['out_dir'], runs['start_time'])
                yield test

Example #9

0

Show file

File: bcl2fastq_dbupdate.py Project: gis-rpd/pipelines

def get_outdirs_from_db(testing=True, win=14):
    """FIXME:add-doc"""
    connection = mongodb_conn(testing)
    if connection is None:
        sys.exit(1)

    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(win)

    results = db.find({"analysis": {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %d runs for last %s days", results.count(), win)
    for record in results:
        logger.debug("record: %s", record)
        #run_number = record['run']
        # we might have several analysis runs:
        for analysis in record['analysis']:
            yield analysis["out_dir"]

Example #10

0

Show file

File: check_elm_run_info.py Project: yuanjingnan/pipelines

def runs_from_db(db, mail_to, ccaddr, win=34):
    """Get the runs from pipeline_run collections"""
    epoch_present, epoch_back = generate_window(win)
    results = db.find({"analysis" : {"$exists": False},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %d runs for last %s days", results.count(), win)
    mail = False
    subject = "Runs with missing ELM information"
    body = "Dear NGSP, " + "\n"
    body += subject + " for the following runs. Please include in the ELM." + "\n"
    for record in results:
        logger.debug("record: %s", record)
        _, runid, _ = get_machine_run_flowcell_id(record.get('run'))
        rest_data = get_rest_data(runid)
        if not rest_data.get('runId'):
            body += record.get('run')+ "\n"
            mail = True
    if mail:
        send_mail(subject, body, toaddr=mail_to, ccaddr=ccaddr)

Example #11

0

Show file

File: novogene_raw_delete.py Project: yuanjingnan/pipelines

def runs_from_db(db, days=75, win=34):
    """Get the runs from pipeline_run collections"""
    epoch_present, epoch_back = generate_window(win)
    results = db.find({
        "run": {
            "$regex": "^NG00"
        },
        "raw-delete": {
            "$exists": False
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    LOGGER.info("Found %d runs for last %s days", results.count(), win)
    for record in results:
        LOGGER.debug("record: %s", record)
        if not record.get('run'):
            LOGGER.critical("run is missing for DB-id %s", record['_id'])
            continue
        runid_and_flowcellid = (record['run'])
        results = db.find({"run": runid_and_flowcellid})
        if not 'analysis' in record:
            continue
        last_analysis = record['analysis'][-1]
        status = last_analysis.get("Status")
        end_time = last_analysis.get("end_time")
        if not status or not end_time:
            continue
        analysis_epoch_time = isoformat_to_epoch_time(end_time + "+08:00")
        epoch_time_now = isoformat_to_epoch_time(generate_timestamp() +
                                                 "+08:00")
        rd = relative_epoch_time(epoch_time_now, analysis_epoch_time)
        relative_days = rd.months * 30 + rd.days
        if status == 'SUCCESS' and relative_days > days:
            yield runid_and_flowcellid

Example #12

0

Show file

File: run_legacy_pipelines.py Project: gis-rpd/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1', "--break-after-first", action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    #Check if pipeline scripts are available
    assert os.path.exists(BWA)
    assert os.path.exists(RNA)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({"analysis.Status": "SUCCESS",
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        for analysis in record['analysis']:
            out_dir = analysis.get("out_dir")

            #Check if bcl2Fastq is completed successfully
            if 'Status' in analysis and analysis.get("Status") == "SUCCESS":
                if not os.path.exists(out_dir):
                    logger.critical("Following directory listed in DB doesn't exist: %s", out_dir)
                    continue

                #Check if downstream analysis has been started
                if not os.path.exists(os.path.join(out_dir, "config_casava-1.8.2.txt".format())):
                    logger.info("Start the downstream analysis at %s", out_dir)
                    os.makedirs(os.path.join(out_dir, LOG_DIR_REL), exist_ok=True)
                    #generate config file
                    config_cmd = [CONFIG, '-r', run_number]
                    try:
                        f = open(os.path.join(out_dir, "config_casava-1.8.2.txt".format()), "w")
                        _ = subprocess.call(config_cmd, stderr=subprocess.STDOUT, stdout=f)
                    except subprocess.CalledProcessError as e:
                        logger.fatal("The following command failed with return code %s: %s",
                                     e.returncode, ' '.join(config_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #Generate and Submit BWA and RNAseq mapping pipeline
                    if os.path.exists(os.path.join(out_dir, "samplesheet.csv".format())):
                        dirs = os.path.join(out_dir, "out")
                        cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG))
                        cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(RNA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG))
                        if args.dry_run:
                            logger.warning("Skipped following run: %s", cmd)
                            #Remove config txt
                            os.remove(os.path.join(out_dir, "config_casava-1.8.2.txt".format()))
                        else:
                            try:
                                #ananlysisReport into submission log
                                with open(os.path.join(out_dir, SUBMISSIONLOG), 'w') as fh:
                                    fh.write(cmd)
                                _ = subprocess.check_output(cmd, shell=True)
                            except subprocess.CalledProcessError as e:
                                logger.fatal("The following command failed with return code %s: %s",
                                             e.returncode, ' '.join(cmd))
                                logger.fatal("Output: %s", e.output.decode())
                                logger.fatal("Exiting")
                                #send_status_mail
                                send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log"))
                                sys.exit(1)
                            num_triggers += 1

                        if args.break_after_first:
                            logger.info("Stopping after first sequencing run")
                            sys.exit(0)
                    else:
                        #send_status_mail
                        logger.info("samplesheet.csv missing for %s under %s", run_number, out_dir)
                        send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.abspath(out_dir))
            elif analysis.get("Status") == "FAILED":
                logger.debug("BCL2FASTQ FAILED for %s under %s", run_number, out_dir)
     # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)

Example #13

0

Show file

File: report_generate.py Project: yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    parser.add_argument('-n',
                        "--no-mail",
                        action='store_true',
                        help="Don't mail. Just print to console")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    runs = {}
    extra_text = ""
    for record in results:
        run_number = record['run']
        timestamp = record['timestamp']
        runs[timestamp] = run_number
    od = collections.OrderedDict(sorted(runs.items()))
    logger.info("Found %s runs", results.count())
    extra_text = "Found {} runs. \n".format(results.count())
    for _, v in od.items():  # v is run
        results = db.find({"run": v})
        for record in results:
            if not 'analysis' in record:
                continue
            last_analysis = record['analysis'][-1]
            status = last_analysis.get("Status")
            if not status:
                continue

            if status == 'SUCCESS':
                if last_analysis.get("per_mux_status"):
                    mux = last_analysis.get("per_mux_status")
                    for d in mux:
                        if d is None:
                            logger.warning("Skipping empty per_mux_status for run %s." \
                                "Needs fix in DB", v)
                            continue
                        if d.get(
                                'Status'
                        ) == "SUCCESS":  # FIXME what if key is missing?
                            mux_id = d['mux_id']

                            stats_submission = d['StatsSubmission']
                            if stats_submission == "FAILED":
                                extra_text += "StatsSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                     .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

                            archive_submission = d['ArchiveSubmission']
                            if archive_submission == "FAILED":
                                extra_text += "ArchiveSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                    .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

            elif status == 'FAILED':
                extra_text += "Analysis for run {} has failed. \n".format(v)
                extra_text += "Analysis_id is {} and out_dir is {} \n" \
                    .format(last_analysis.get("analysis_id"), \
                    last_analysis.get("out_dir"))
                extra_text += "\n"
                extra_text += "---------------------------------------------------\n"
                logger.info("Analysis for run %s has failed ", v)

            elif status == 'STARTED':
                analysis_id = last_analysis.get("analysis_id")
                analysis_epoch_time = isoformat_to_epoch_time(analysis_id +
                                                              "+08:00")
                run_completion_time = timestamp / 1000
                rd = relative_epoch_time(run_completion_time,
                                         analysis_epoch_time)
                if rd.days > 3:
                    extra_text += "Analysis for run {} was started {} days ago. "\
                        "Please check. \n".format(v, rd.days)
                    extra_text += "Analysis_id is {} and out_dir is {} \n" \
                        .format(last_analysis.get("analysis_id"), \
                        last_analysis.get("out_dir"))
                    extra_text += "\n"
                    extra_text += "---------------------------------------------------\n"

    extra_text += "Report generation is completed"
    subject = "Report generation for bcl2fastq"
    if args.testing:
        subject = "Testing:" + subject
    if args.no_mail:
        print(
            "Skipping sending of email with subject '{}' and following body:".
            format(subject))
        print(extra_text)
    else:
        send_mail(subject, extra_text)
    logger.info("Report generation is completed")

Example #14

0

Show file

File: delegator.py Project: wiseflying/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"analysis.per_mux_status" : {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())
    run_list = {}
    mongo_db_ref = {}
    for record in results:
        run_number = record['run']
        print(run_number)
        mux_list = {}
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue
            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning("mux_status is None or incomplete for run %s analysis %s."
                                   " Requires fix in DB. Skipping entry for now.", \
                                    run_number, analysis_id)
                    continue
                if mux_status.get('Status', None) != "SUCCESS":
                    logger.info("MUX %s from %s is not SUCCESS. Skipping downstream analysis",
                                mux_status['mux_id'], run_number)
                    continue
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']
                mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format(
                    analysis_count, mux_count)
                if mux_status.get('Status') == "SUCCESS" and \
                    mux_status.get('DownstreamSubmission', None) == "TODO":
                    mongo_list = (mux_id, mux_db_id, analysis_id)
                    mongo_db_ref.setdefault(run_number, []).append(mongo_list)
                    mux_list.setdefault(mux_id, []).append(out_dir)
        for mux_id, out_dir in mux_list.items():
            mux_list_success = mux_list[mux_id]
            #Check if MUX has been analyzed more then 1 time successfully
            if len(mux_list_success) > 1:
                body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \
                    + "delegator is skipping the downstream analysis under {}. Please" \
                    "check the results.".format(mux_list_success)
                subject = "Downstream delegator skipped job submission for {}".format(mux_id)
                if args.testing:
                    subject += " (testing)"
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                continue
            mux_info = (mux_id, out_dir)
            run_list.setdefault(run_number, []).append(mux_info)
    for run_num_flowcell, mux_list in run_list.items():
        update_status = True
        pipeline_params_dict = get_lib_details(run_num_flowcell, mux_list, args.testing)
        if not bool(pipeline_params_dict):
            logger.warning("pipeline_paramas_dict is empty for run num %s", run_num_flowcell)
            continue
        for lib, lib_info in pipeline_params_dict.items():
            readunits_list = list()
            for outer_key in lib_info:
                if outer_key == 'readunits':
                    for inner_key in lib_info[outer_key]:
                        readunits_list.append(inner_key)
            lib_info['samples'] = {}
            lib_info['samples'][lib] = readunits_list
            if args.dry_run:
                logger.warning("Skipping job delegation for %s", \
                    lib)
                continue
            res = mongodb_insert_libjob(lib_info, connection)
            if not res:
                logger.critical("Skipping rest of analysis job submission" \
                     "for %s from %s", lib, lib_info.run_id)
                subject = "Downstream delegator failed job submission for" \
                    "{}".format(lib)
                if args.testing:
                    subject += " (testing)"
                body = "Downstream delegator failed to insert job submission for" \
                    "{}".format(lib)
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                update_status = False
                logger.warning("Clean up the database for mux %s from run %s and ctime %s", \
                    lib_info.mux_id, lib_info.run_id, lib_info.ctime)
                mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \
                    lib_info.ctime, connection)
                break
        if not args.dry_run and update_status:
            value = mongo_db_ref[run_num_flowcell]
            for mux_id, insert_id, analysis_id in value:
                logger.info("Update mongoDb runComplete for %s and runnumber is %s" \
                    "and id is %s and analysis_id %s", run_num_flowcell, mux_id, \
                    insert_id, analysis_id)
                res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \
                    insert_id, connection)
                if not res:
                    logger.critical("Skipping rest of analysis job submission for %s" \
                        "from %s", mux_id, run_num_flowcell)
                    subject = "Downstream delegator failed job submission for {}" \
                        .format(mux_id)
                    if args.testing:
                        subject += " (testing)"
                    body = "Downstream delegator failed to insert job submission for" \
                        "{}".format(mux_id)
                    send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                    update_status = False
                    break
    connection.close()

Example #15

0

Show file

def main():
    """main function"""

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-e',
                        "--wrapper-args",
                        nargs="*",
                        help="Extra arguments for bcl2fastq wrapper"
                        " (prefix leading dashes with X, e.g. X-n for -n)")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    bcl2fastq_wrapper = os.path.join(os.path.dirname(sys.argv[0]),
                                     "bcl2fastq.py")

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete

    # db query for jobs that are yet to be analysed in the epoch window
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "analysis": {
            "$exists": 0
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        logger.debug("Processing record %s", record)
        cmd = [bcl2fastq_wrapper, "-r", run_number, "-v"]
        if args.testing:
            cmd.append("-t")
        if args.wrapper_args:
            cmd.extend([x.lstrip('X') for x in args.wrapper_args])
        if args.dry_run:
            logger.warning("Skipped following run: %s", ' '.join(cmd))
            continue
        else:
            try:
                logger.info("Executing: %s", ' '.join(cmd))
                res = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
                if res:
                    logger.info("bcl2fastq wrapper returned:\n%s",
                                res.decode().rstrip())
            except subprocess.CalledProcessError as e:
                logger.critical(
                    "The following command failed with"
                    " return code %s: %s", e.returncode, ' '.join(cmd))
                logger.critical("Full error message was: %s", e.stdout)
                if 'commlib error' in e.stdout.decode():
                    logger.critical(
                        "Looks like a qmaster problem (commlib error). Exiting"
                    )
                    break
                else:
                    logger.critical("Will keep going")
                # continue so that a failed run doesn't count,
                # i.e. args.break_after_first shouldn't be trigger
                continue

        if args.break_after_first:
            logger.info("Stopping after first sequencing run")
            break

    # close the connection to MongoDB
    connection.close()
    logger.info("Successful program exit")

Example #16

0

Show file

File: novogene_data_transfer.py Project: yuanjingnan/pipelines

def start_data_transfer(connection, mux, mux_info, site, mail_to):
    """ Data transfer from source to destination
    """
    run_number, downstream_id, analysis_id, bcl_path = mux_info
    fastq_src = os.path.join(bcl_path, "out", "Project_"+mux)
    bcl_dir = os.path.basename(bcl_path)
    if is_devel_version():
        fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \
            mux, run_number, bcl_dir)
        yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \
            mux, mux +"_multisample.yaml")
    else:
        fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \
            mux, run_number, bcl_dir)
        yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \
            mux, mux+ "_multisample.yaml")
    rsync_cmd = 'rsync -va %s %s' % (fastq_src, fastq_dest)
    if not os.path.exists(fastq_dest):
        try:
            os.makedirs(fastq_dest)
            logger.info("data transfer started for %s from %s", mux, run_number)
            st_time = generate_timestamp()
            update_downstream_mux(connection, run_number, analysis_id, downstream_id, \
                "COPYING_" + st_time)
            _ = subprocess.check_output(rsync_cmd, shell=True, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            body = "The following command failed with return code {}: {}". \
                format(e.returncode, rsync_cmd)
            subject = "{} from {}: SG10K data transfer ({}) failed".format(mux, run_number, site)
            logger.fatal(body)
            logger.fatal("Output: %s", e.output.decode())
            logger.fatal("Exiting")
            #Send_mail
            send_mail(subject, body, toaddr=mail_to, ccaddr=None)
            #Delete the partial info being rsync
            update_downstream_mux(connection, run_number, analysis_id, downstream_id, "ERROR")
            sys.exit(1)
        #Update the mongoDB for successful data transfer
        sample_info = get_mux_details(run_number, mux, fastq_dest)
        #Touch rsync complete file
        with open(os.path.join(fastq_dest, "rsync_complete.txt"), "w") as f:
            f.write("")
        with open(yaml_dest, 'w') as fh:
            yaml.dump(dict(sample_info), fh, default_flow_style=False)
        job = {}
        job['sample_cfg'] = {}
        for outer_key, outer_value in sample_info.items():
            ctime, _ = generate_window(1)
            job['sample_cfg'].update({outer_key:outer_value})
            job['site'] = site
            job['pipeline_name'] = 'custom/SG10K'
            job['pipeline_version'] = novogene_conf['PIPELINE_VERSION']
            job['ctime'] = ctime
            job['requestor'] = 'userrig'
            if is_devel_version():
                novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['devel'], \
                    mux)
            else:
                novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['production'],
                    mux)
            job['out_dir_override'] = novogene_outdir
        logger.info("Data transfer completed successfully for %s from %s", mux, run_number)
        job_id = insert_muxjob(connection, mux, job)
        update_downstream_mux(connection, run_number, analysis_id, downstream_id, job_id)
        subject = "{} from {}: SG10K data transfer ({}) completed".format(mux, run_number, site)
        body = "Data transfer successfully completed for {} from {}".format(mux, run_number)
        send_mail(subject, body, toaddr=mail_to, ccaddr=None)
        return True
    else:
        logger.critical("Mux %s from %s directory already exists under %s", mux, \
            run_number, fastq_dest)
        return False

Example #17

0

Show file

File: starter.py Project: yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument('-s', "--site", help="site information")
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    if not args.site:
        site = 'NSCC'
    else:
        site = args.site
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.pipeline_runs
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "run": {
            "$exists": False
        },
        "site": site,
        "ctime": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs to start analysis", results.count())
    for record in results:
        start_analysis(record, args.testing, args.dry_run)

Example #18

0

Show file

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dryrun",
                        action='store_true',
                        help="Don't run anything")
    default = 84
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    default = 60
    parser.add_argument(
        '-d',
        '--days',
        type=int,
        default=default,
        help="Bcl analysis not older than days(default {})".format(default))
    default = 60
    parser.add_argument(
        '-r',
        '--tardays',
        type=int,
        default=default,
        help="tar ball not older than days(default {})".format(default))
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        LOGGER.warning("Not a production user. Skipping archival steps")
        sys.exit(1)
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        mail_to = 'rpd'
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "run": {
            "$regex": "^((?!NG00).)*$"
        },
        "raw-delete": {
            "$exists": False
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    LOGGER.info("Looping through %s jobs", results.count())
    trigger = 0
    for record in results:
        try:
            run_num = record['run']
        except KeyError:
            run_num = None
        if not record.get('deletion'):
            #Check run_status
            res = check_run_status(record, args.days)
            if res:
                LOGGER.info("Create tar ball %s ", run_num)
                if args.dryrun:
                    LOGGER.warning("Skipping Create tar ball %s ", run_num)
                    continue
                create_run_tar(db, run_num)
                trigger = 1
        elif record['deletion'].get('tar'):
            res = check_tar_status_and_delete(db,
                                              record,
                                              args.tardays,
                                              dryrun=args.dryrun)
            if res:
                trigger = 1
        if args.break_after_first and trigger == 1:
            LOGGER.info("Stopping after first run")
            break

Example #19

0

Show file

File: archive_stats_cronjob.py Project: yuanjingnan/pipelines

def main():
    """main function
    """
    stats_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({
        "analysis": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())

    for record in results:
        run_number = record['run']

        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']

            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning(
                        "mux_status is None or incomplete for run %s analysis %s."
                        " Requires fix in DB. Skipping entry for now.",
                        run_number, analysis_id)
                    continue

                if mux_status.get('Status', None) != "SUCCESS":
                    logger.info(
                        "MUX %s from %s is not SUCCESS. Skipping SRA and STATS uploading",
                        mux_status['mux_id'], run_number)
                    continue

                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if args.dry_run:
                    logger.warning(
                        "Skipping analysis %s run %s MUX %s"
                        " with StatsSubmission %s and ArchiveSubmission %s",
                        analysis_id, run_number, mux_status['mux_id'],
                        mux_status.get('StatsSubmission', None),
                        mux_status.get('ArchiveSubmission', None))
                    continue

                # Call STATS upload
                #
                if mux_status.get('StatsSubmission', None) == "TODO":
                    logger.info(
                        "Stats upload for %s from %s and analysis_id is %s",
                        mux_id, run_number, analysis_id)
                    StatsSubmission = "analysis.{}.per_mux_status.{}.StatsSubmission".format(
                        analysis_count, mux_count)

                    stats_upload_script_cmd = [
                        stats_upload_script, '-o', out_dir, '-m', mux_id
                    ]
                    if args.testing:
                        stats_upload_script_cmd.append("-t")
                    try:
                        _ = subprocess.check_output(stats_upload_script_cmd,
                                                    stderr=subprocess.STDOUT)
                        StatsSubmission_status = "SUCCESS"
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(stats_upload_script_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Resetting to TODO")
                        StatsSubmission_status = "TODO"
                    try:
                        db.update(
                            {
                                "run": run_number,
                                'analysis.analysis_id': analysis_id
                            }, {
                                "$set": {
                                    StatsSubmission: StatsSubmission_status,
                                }
                            })
                    except pymongo.errors.OperationFailure:
                        logger.fatal("MongoDB OperationFailure")
                        sys.exit(0)
                    num_triggers += 1

                # Call FASTQ upload
                #
                if mux_status.get('ArchiveSubmission', None) == "TODO":
                    logger.info(
                        "SRA upload for %s from %s and analysis_id is %s",
                        mux_id, run_number, analysis_id)
                    ArchiveSubmission = "analysis.{}.per_mux_status.{}.ArchiveSubmission".format(
                        analysis_count, mux_count)
                    archive_upload_script_cmd = [
                        archive_upload_script, '-o', out_dir, '-m', mux_id
                    ]
                    if args.testing:
                        archive_upload_script_cmd.append("-t")
                    try:
                        _ = subprocess.check_output(archive_upload_script_cmd,
                                                    stderr=subprocess.STDOUT)
                        ArchiveSubmission_status = "SUCCESS"
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(archive_upload_script_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Resetting to TODO")
                        ArchiveSubmission_status = "TODO"
                    #update mongoDB
                    try:
                        db.update(
                            {
                                "run": run_number,
                                'analysis.analysis_id': analysis_id
                            }, {
                                "$set": {
                                    ArchiveSubmission: ArchiveSubmission_status
                                }
                            })
                    except pymongo.errors.OperationFailure:
                        logger.fatal("MongoDB OperationFailure")
                        sys.exit(0)
                    num_triggers += 1

    # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)

Example #20

0

Show file

File: delegator.py Project: wiseflying/pipelines

def get_lib_details(run_num_flowcell, mux_list, testing):
    """Lib info collection from ELM per run
    """
    _, run_num, flowcellid = get_machine_run_flowcell_id(run_num_flowcell)
    # Call rest service to get component libraries
    if testing:
        print(run_num)
        rest_url = rest_services['run_details']['testing'].replace("run_num", run_num)
        logger.info("development server")
    else:
        rest_url = rest_services['run_details']['production'].replace("run_num", run_num)
        logger.info("production server")
    response = requests.get(rest_url)
    if response.status_code != requests.codes.ok:
        response.raise_for_status()
    rest_data = response.json()
    logger.debug("rest_data from %s: %s", rest_url, rest_data)
    sample_info = {}
    if rest_data.get('runId') is None:
        logger.info("JSON data is empty for run num %s", run_num)
        return sample_info
    for mux_id, out_dir in mux_list:
        fastq_data_dir = os.path.join(out_dir[0], 'out', "Project_"+mux_id)
        if os.path.exists(fastq_data_dir):
            for rows in rest_data['lanes']:
                if mux_id in rows['libraryId']:
                    if "MUX" in rows['libraryId']:
                        for child in rows['Children']:
                            if child['Analysis'] != "Sequence only":
                                ctime, _ = generate_window(1)
                                sample_dict = {}
                                sample = child['libraryId']
                                sample_dict['requestor'] = rows['requestor']
                                sample_dict['ctime'] = ctime
                                sample_dict['pipeline_name'] = child['Analysis']
                                if 'pipeline_version' in rows:
                                    sample_dict['pipeline_version'] = child['pipeline_version']
                                else:
                                    sample_dict['pipeline_version'] = None
                                sample_dict['pipeline_params'] = 'params'
                                sample_dict['site'] = get_site()
                                out_dir = get_downstream_outdir(sample_dict['requestor'], \
                                    sample_dict['pipeline_version'], sample_dict['pipeline_name'])
                                sample_dict['out_dir'] = out_dir
                                readunits_dict = {}
                                status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\
                                    rows['laneId'])
                                if status:
                                    ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\
                                        rows['laneId'], None, fq1, fq2)
                                    k = key_for_read_unit(ru)
                                    readunits_dict[k] = dict(ru._asdict())
                                    sample_dict['readunits'] = readunits_dict
                                    if sample_info.get(sample, {}).get('readunits'):
                                        sample_info[sample]['readunits'].update(readunits_dict)
                                    else:
                                        sample_info[sample] = sample_dict
                    else:
                        if rows['Analysis'] != "Sequence only":
                            sample = rows['libraryId']
                            status, fq1, fq2 = check_fastq(fastq_data_dir, rows['libraryId'], \
                                rows['laneId'])
                            if status:
                                ctime, _ = generate_window(1)
                                sample_dict = {}
                                readunits_dict = {}
                                ru = ReadUnit(run_num_flowcell, flowcellid, rows['libraryId'], \
                                    rows['laneId'], None, fq1, fq2)
                                k = key_for_read_unit(ru)
                                readunits_dict[k] = dict(ru._asdict())
                                sample_dict['readunits'] = readunits_dict
                                sample_info[sample] = sample_dict
    return sample_info

Example #21

0

Show file

File: send_email_status.py Project: gis-rpd/pipelines

def main():
    """main function
    """
    stats_upload_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping sending of emails")
        sys.exit(0)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_emails = 0
    results = db.find({"analysis" : {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())

    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
    else:
        #mail_to = '*****@*****.**'
        mail_to = '*****@*****.**'

    for record in results:
        run_number = record['run']
        #print(run_number)
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                if args.dry_run:
                    logger.warning("Skipping analysis %s run %s MUX %s"
                                   " with email_sent %s",
                                   analysis_id, run_number, mux_status['mux_id'],
                                   mux_status.get('email_sent', None))
                    continue

                if mux_status.get('email_sent', None):
                    continue

                # for all others: send email and update db

                email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format(
                    analysis_count, mux_count)
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if mux_status.get('Status', None) == "FAILED":
                    logger.info("bcl2fastq for MUX %s from %s failed. ",
                                mux_status['mux_id'], run_number)
                    subject = 'bcl2fastq: ' + mux_id
                    body = "bcl2fastq for {} from {} failed.".format(mux_id, run_number)
                    body += "\n\nPlease check the logs under {}".format(out_dir + "/logs")
                    send_mail(subject, body, mail_to, ccaddr="rpd")
                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id, email_sent_query, True)

                elif mux_status.get('Status', None) == "SUCCESS":
                    muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir'))
                    summary = path_to_url(os.path.join(muxdir, 'html/index.html'))
                    body = "bcl2fastq for {} from {} successfully completed.".format(
                        mux_id, run_number)
                    body += "\n\nA summary can be found at {}".format(summary)
                    body += "\n\nFastQ files are located in {}".format(muxdir)
                    body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)"
                    
                    confinfo = os.path.join(out_dir, 'conf.yaml')
                    #print(body)
                    if not os.path.exists(confinfo):
                        logger.fatal("conf info '%s' does not exist"
                                     " under run directory.", confinfo)
                        continue

                    subject = 'bcl2fastq'
                    if args.testing:
                        subject += ' testing'
                    if is_devel_version():
                        subject += ' devel'
                    subject += ': ' + mux_id
                    send_mail(subject, body, mail_to, ccaddr="rpd")# mail_to already set

                    if not args.testing and not is_devel_version():
                        requestor = get_requestor(mux_id, confinfo)
                        if requestor is not None:
                            #requestor = "rpd"
                            #subject += " (instead of requestor)"
                            #send_mail(subject, body, requestor, ccaddr="rpd")
                            send_mail(subject, body, requestor)

                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id, email_sent_query, True)


    # close the connection to MongoDB
    connection.close()
    logger.info("%d emails sent", num_emails)

Example #22

0

Show file

File: report_generate.py Project: gis-rpd/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    parser.add_argument('-n', "--no-mail", action='store_true',
                        help="Don't mail. Just print to console")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    runs = {}
    extra_text = ""
    for record in results:
        run_number = record['run']
        timestamp = record['timestamp']
        runs[timestamp] = run_number
    od = collections.OrderedDict(sorted(runs.items()))
    logger.info("Found %s runs", results.count())
    extra_text = "Found {} runs. \n".format(results.count())
    for _, v in od.items():# v is run
        results = db.find({"run": v})
        for record in results:
            if not 'analysis' in record:
                continue
            last_analysis = record['analysis'][-1]
            status = last_analysis.get("Status")
            if not status:
                continue

            if status == 'SUCCESS':
                if last_analysis.get("per_mux_status"):
                    mux = last_analysis.get("per_mux_status")
                    for d in mux:
                        if d is None:
                            logger.warning("Skipping empty per_mux_status for run %s." \
                                "Needs fix in DB", v)
                            continue
                        if d.get('Status') == "SUCCESS":# FIXME what if key is missing?
                            mux_id = d['mux_id']

                            stats_submission = d['StatsSubmission']
                            if stats_submission == "FAILED":
                                extra_text += "StatsSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                     .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

                            archive_submission = d['ArchiveSubmission']
                            if archive_submission == "FAILED":
                                extra_text += "ArchiveSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                    .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

            elif status == 'FAILED':
                extra_text += "Analysis for run {} has failed. \n".format(v)
                extra_text += "Analysis_id is {} and out_dir is {} \n" \
                    .format(last_analysis.get("analysis_id"), \
                    last_analysis.get("out_dir"))
                extra_text += "\n"
                extra_text += "---------------------------------------------------\n"
                logger.info("Analysis for run %s has failed ", v)

            elif status == 'STARTED':
                analysis_id = last_analysis.get("analysis_id")
                analysis_epoch_time = isoformat_to_epoch_time(analysis_id+"+08:00")
                run_completion_time = timestamp/1000
                dt1 = datetime.datetime.fromtimestamp(run_completion_time)
                dt2 = datetime.datetime.fromtimestamp(analysis_epoch_time)
                rd = dateutil.relativedelta.relativedelta(dt1, dt2)
                if rd.days > 3:
                    extra_text += "Analysis for run {} was started {} days ago. "\
                        "Please check. \n".format(v, rd.days)
                    extra_text += "Analysis_id is {} and out_dir is {} \n" \
                        .format(last_analysis.get("analysis_id"), \
                        last_analysis.get("out_dir"))
                    extra_text += "\n"
                    extra_text += "---------------------------------------------------\n"

    extra_text += "Report generation is completed"
    subject = "Report generation for bcl2fastq"
    if args.testing:
        subject = "Testing:" + subject
    if args.no_mail:
        print("Skipping sending of email with subject '{}' and following body:".format(subject))
        print(extra_text)
    else:
        send_mail(subject, extra_text)
    logger.info("Report generation is completed")

Example #23

0

Show file

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '-n',
        "--dryrun",
        action='store_true',
        help="Don't actually update DB (best used in conjunction with -v -v)")
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test-server. Don't do anything")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        LOGGER.warning("Not a production user. Exiting")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    #LOGGER.info("Database connection established")
    dbcol = connection.gisds.pipeline_runs
    site = get_site()
    epoch_now, epoch_then = generate_window(args.win)
    cursor = dbcol.find({
        "ctime": {
            "$gt": epoch_then,
            "$lt": epoch_now
        },
        "site": site
    })
    LOGGER.info("Looping through {} jobs".format(cursor.count()))
    for job in cursor:
        dbid = job['_id']
        # only set here to avoid code duplication below
        try:
            out_dir = job['execution']['out_dir']
        except KeyError:
            out_dir = None

        # no execution dict means start a new analysis
        if not job.get('execution'):
            LOGGER.info('Job {} to be started'.format(dbid))
            # determine out_dir and set in DB
            # out_dir_override will take precedence over generating out_dir with get_downstream_outdir function
            if job.get('out_dir_override'):
                out_dir = job.get('out_dir_override')
                if os.path.exists(out_dir):
                    mux = os.path.basename(out_dir)
                    if not args.dryrun:
                        LOGGER.critical(
                            "Analysis for {} already exists under {}. Please start the analysis manually"
                            .format(mux, out_dir))
                        res = dbcol.update_one(
                            {"_id": ObjectId(dbid)},
                            {"$set": {
                                "execution.status": "MANUAL"
                            }})
                        assert res.modified_count == 1, (
                            "Modified {} documents instead of 1".format(
                                res.modified_count))
                        sys.exit(1)
                #assert not os.path.exists(out_dir), ("Direcotry already exists {}").format(out_dir)
            else:
                out_dir = get_downstream_outdir(job['requestor'],
                                                job['pipeline_name'],
                                                job['pipeline_version'])
            # Note, since execution (key) exists, accidental double
            # starts are prevented even before start time etc is
            # logged via flagfiles.  No active logging here so that
            # flag files logging just works.

            if args.dryrun:
                LOGGER.info("Skipping dry run option")
                continue
            status = start_cmd_execution(job, site, out_dir, args.testing)
            if status:
                res = dbcol.update_one(
                    {"_id": ObjectId(dbid)},
                    {"$set": {
                        "execution.out_dir": out_dir
                    }})
                assert res.modified_count == 1, (
                    "Modified {} documents instead of 1".format(
                        res.modified_count))
            else:
                LOGGER.warning("Job {} could not be started".format(dbid))
        elif job['execution'].get('status') == "MANUAL":
            continue
        elif list_starterflags(
                out_dir
        ):  # out_dir cannot be none because it's part of execution dict
            LOGGER.info(
                'Job {} in {} started but not yet logged as such in DB'.format(
                    dbid, out_dir))

            matches = list_starterflags(out_dir)
            assert len(matches) == 1, (
                "Got several starter flags in {}".format(out_dir))
            sflag = StarterFlag(matches[0])
            assert sflag.dbid == str(dbid)
            set_started(dbcol,
                        sflag.dbid,
                        str(sflag.timestamp),
                        dryrun=args.dryrun)
            os.unlink(sflag.filename)

        elif job['execution'].get('status') in ['STARTED', 'RESTART']:
            LOGGER.info(
                'Job %s in %s set as re|started so checking on completion',
                dbid, out_dir)
            set_completion_if(dbcol, dbid, out_dir, dryrun=args.dryrun)

        else:
            # job complete
            LOGGER.debug('Job %s in %s should be completed', dbid, out_dir)
    LOGGER.info("Successful program exit")

Example #24

0

Show file

File: delegator.py Project: yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "analysis.per_mux_status": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())
    run_list = {}
    mongo_db_ref = {}
    for record in results:
        run_number = record['run']
        mux_list = {}
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue
            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning("mux_status is None or incomplete for run %s analysis %s."
                                   " Requires fix in DB. Skipping entry for now.", \
                                    run_number, analysis_id)
                    continue
                if mux_status.get('Status', None) != "SUCCESS":
                    logger.info(
                        "MUX %s from %s is not SUCCESS. Skipping downstream analysis",
                        mux_status['mux_id'], run_number)
                    continue
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']
                mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format(
                    analysis_count, mux_count)
                if mux_status.get('Status') == "SUCCESS" and \
                    mux_status.get('DownstreamSubmission') == "TODO":
                    mongo_list = (mux_id, mux_db_id, analysis_id)
                    mongo_db_ref.setdefault(run_number, []).append(mongo_list)
                    mux_list.setdefault(mux_id, []).append(out_dir)
        for mux_id, out_dir in mux_list.items():
            mux_list_success = mux_list[mux_id]
            #Check if MUX has been analyzed more then 1 time successfully
            if len(mux_list_success) > 1:
                body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \
                    + "delegator is skipping the downstream analysis under {}. Please" \
                    "check the results.".format(mux_list_success)
                subject = "Downstream delegator skipped job submission for {}".format(
                    mux_id)
                if args.testing:
                    subject += " (testing)"
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                continue
            mux_info = (mux_id, out_dir)
            run_list.setdefault(run_number, []).append(mux_info)
    for run_num_flowcell, mux_list in run_list.items():
        update_status = True
        pipeline_params_dict, mux_analysis_list = get_lib_details(run_num_flowcell, \
            mux_list, args.testing)
        if not bool(pipeline_params_dict):
            logger.warning("pipeline params is empty for run num %s",
                           run_num_flowcell)
            continue
        # Insert jobs into pipeline_runs collection
        for lib, lib_info in pipeline_params_dict.items():
            job = {}
            rd_list = {}
            job['sample_cfg'] = {}
            job['sample_cfg'] = {}
            readunits_list = list()
            rd_list['samples'] = {}
            for outer_key, outer_value in lib_info.items():
                if outer_key == 'readunits':
                    for inner_key in lib_info[outer_key]:
                        readunits_list.append(inner_key)
                    job['sample_cfg'].update({outer_key: outer_value})
                if outer_key == 'references_cfg':
                    job['references_cfg'] = {}
                    job['references_cfg'] = outer_value
                elif outer_key == 'cmdline':
                    job['cmdline'] = {}
                    job['cmdline'] = outer_value
                elif outer_key != 'readunits':
                    job.update({outer_key: outer_value})
                else:
                    rd_list['samples'][lib] = readunits_list
                    job['sample_cfg'].update(rd_list)

            if args.dry_run:
                logger.warning("Skipping job delegation for %s", \
                    lib)
                continue
            res = mongodb_insert_libjob(job, connection)
            if not res:
                logger.critical("Skipping rest of analysis job submission" \
                     "for %s from %s", lib, lib_info.run_id)
                subject = "Downstream delegator failed job submission for" \
                    "{}".format(lib)
                if args.testing:
                    subject += " (testing)"
                body = "Downstream delegator failed to insert job submission for" \
                    "{}".format(lib)
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                update_status = False
                logger.warning("Clean up the database for mux %s from run %s and ctime %s", \
                    lib_info.mux_id, lib_info.run_id, lib_info.ctime)
                mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \
                    lib_info.ctime, connection)
                break
        # Update runcomplete collection for delegated jobs
        if not args.dry_run and update_status:
            value = mongo_db_ref[run_num_flowcell]
            for mux_id, insert_id, analysis_id in value:
                if mux_id in mux_analysis_list:
                    logger.info("Update mongoDb pipeline_runs for mux_id %s from run number %s" \
                        "and analysis_id is %s", mux_id, run_num_flowcell, analysis_id)
                    res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \
                        insert_id, connection)
                    if not res:
                        logger.critical("Skipping rest of analysis job submission for %s" \
                            "from %s", mux_id, run_num_flowcell)
                        subject = "Downstream delegator failed job submission for {}" \
                            .format(mux_id)
                        if args.testing:
                            subject += " (testing)"
                        body = "Downstream delegator failed to insert job submission for" \
                            "{}".format(mux_id)
                        send_mail(subject,
                                  body,
                                  toaddr='veeravallil',
                                  ccaddr=None)
                        update_status = False
                        break
    connection.close()

Example #25

0

Show file

File: run_legacy_pipelines.py Project: yuanjingnan/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test-server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    #Check if pipeline scripts are available
    assert os.path.exists(BWA)
    assert os.path.exists(RNA)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({
        "analysis.Status": "SUCCESS",
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        # Downstream analysis will not be intiated for Novogene (NG00*) runs
        if "NG00" in run_number:
            continue
        for analysis in record['analysis']:
            out_dir = analysis.get("out_dir")

            #Check if bcl2Fastq is completed successfully
            if 'Status' in analysis and analysis.get("Status") == "SUCCESS":
                if not os.path.exists(out_dir):
                    logger.critical(
                        "Following directory listed in DB doesn't exist: %s",
                        out_dir)
                    continue

                #Check if downstream analysis has been started
                if not os.path.exists(
                        os.path.join(out_dir,
                                     "config_casava-1.8.2.txt".format())):
                    logger.info("Start the downstream analysis at %s", out_dir)
                    os.makedirs(os.path.join(out_dir, LOG_DIR_REL),
                                exist_ok=True)
                    #generate config file
                    config_cmd = [CONFIG, '-r', run_number]
                    try:
                        f = open(
                            os.path.join(out_dir,
                                         "config_casava-1.8.2.txt".format()),
                            "w")
                        _ = subprocess.call(config_cmd,
                                            stderr=subprocess.STDOUT,
                                            stdout=f)
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(config_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #generic sample sheet
                    samplesheet_cmd = 'cd {} && {} -r {}'.format(
                        out_dir, SAMPLESHEET, run_number)
                    try:
                        _ = subprocess.check_output(samplesheet_cmd,
                                                    shell=True)
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(samplesheet_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #Generate and Submit BWA and RNAseq mapping pipeline
                    _, runid, _ = get_machine_run_flowcell_id(run_number)
                    generic_samplesheet = (os.path.join(
                        out_dir, runid + "_SampleSheet.csv"))
                    if os.path.exists(
                            os.path.join(out_dir, generic_samplesheet)):
                        dirs = os.path.join(out_dir, "out")
                        cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \
                            .format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, \
                                generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG))
                        cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \
                            .format(RNA, run_number, out_dir, os.path.join(out_dir, \
                                generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG))
                        if args.dry_run:
                            logger.warning("Skipped following run: %s", cmd)
                            #Remove config txt
                            os.remove(
                                os.path.join(
                                    out_dir,
                                    "config_casava-1.8.2.txt".format()))
                        else:
                            try:
                                #ananlysisReport into submission log
                                with open(os.path.join(out_dir, SUBMISSIONLOG),
                                          'w') as fh:
                                    fh.write(cmd)
                                _ = subprocess.check_output(cmd, shell=True)
                            except subprocess.CalledProcessError as e:
                                logger.fatal(
                                    "The following command failed with return code %s: %s",
                                    e.returncode, cmd)
                                logger.fatal("Output: %s", e.output.decode())
                                logger.fatal("Exiting")
                                #send_status_mail
                                send_status_mail(PIPELINE_NAME, False, analysis_id, \
                                    os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log"))
                                sys.exit(1)
                            num_triggers += 1

                        if args.break_after_first:
                            logger.info("Stopping after first sequencing run")
                            sys.exit(0)
                    else:
                        #send_status_mail
                        logger.info("samplesheet.csv missing for %s under %s",
                                    run_number, out_dir)
                        send_status_mail(PIPELINE_NAME, False, analysis_id, \
                            os.path.abspath(out_dir))
            elif analysis.get("Status") == "FAILED":
                logger.debug("BCL2FASTQ FAILED for %s under %s", run_number,
                             out_dir)
    # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)

Example #26

0

Show file

def main():
    """main function
    """
    stats_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        logger.warning("Not a production user. Skipping sending of emails")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_emails = 0
    results = db.find({
        "analysis": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())

    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        #mail_to = '*****@*****.**'
        mail_to = '*****@*****.**'

    for record in results:
        run_number = record['run']
        #print(run_number)
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                if args.dry_run:
                    logger.warning(
                        "Skipping analysis %s run %s MUX %s"
                        " with email_sent %s", analysis_id,
                        run_number, mux_status['mux_id'],
                        mux_status.get('email_sent', None))
                    continue

                if mux_status.get('email_sent', None):
                    continue

                # for all others: send email and update db

                email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format(
                    analysis_count, mux_count)
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if mux_status.get('Status', None) == "FAILED":
                    logger.info("bcl2fastq for MUX %s from %s failed. ",
                                mux_status['mux_id'], run_number)
                    subject = 'bcl2fastq: ' + mux_id
                    body = "bcl2fastq for {} from {} failed.".format(
                        mux_id, run_number)
                    body += "\n\nPlease check the logs under {}".format(
                        out_dir + "/logs")
                    send_mail(subject, body, mail_to, ccaddr="rpd")
                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id,
                                         email_sent_query, True)

                elif mux_status.get('Status', None) == "SUCCESS":
                    muxdir = os.path.join(out_dir, 'out',
                                          mux_status.get('mux_dir'))
                    summary = path_to_url(
                        os.path.join(muxdir, 'html/index.html'))
                    body = "bcl2fastq for {} from {} successfully completed.".format(
                        mux_id, run_number)
                    body += "\n\nA summary can be found at {}".format(summary)
                    body += "\n\nFastQ files are located in {}".format(muxdir)
                    body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)"

                    confinfo = os.path.join(out_dir, 'conf.yaml')
                    #print(body)
                    if not os.path.exists(confinfo):
                        logger.fatal(
                            "conf info '%s' does not exist"
                            " under run directory.", confinfo)
                        continue

                    subject = 'bcl2fastq'
                    if args.testing:
                        subject += ' testing'
                    if is_devel_version():
                        subject += ' devel'
                    subject += ': ' + mux_id
                    send_mail(subject, body, mail_to,
                              ccaddr="rpd")  # mail_to already set

                    if not args.testing and not is_devel_version():
                        requestor = get_requestor(mux_id, confinfo)
                        if requestor is not None:
                            #requestor = "rpd"
                            #subject += " (instead of requestor)"
                            #send_mail(subject, body, requestor, ccaddr="rpd")
                            send_mail(subject, body, requestor)

                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id,
                                         email_sent_query, True)

    # close the connection to MongoDB
    connection.close()
    logger.info("%d emails sent", num_emails)

Example #27

0

Show file

File: mongo_status_qc.py Project: yuanjingnan/pipelines

def main():
    """main function
    """
    bcl2fastq_qc_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "bcl2fastq_qc.py"))
    assert os.path.exists(bcl2fastq_qc_script)
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('--no-mail', action='store_true',
                        help="Don't send email on detected failures")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    
    if not is_production_user():
        logger.warning("Not a production user. Skipping DB update")
        sys.exit(1)
        
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"analysis.Status": "SUCCESS", "analysis.QC_status" : {"$exists": 0},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        #for analysis in record['analysis']:
        for (analysis_count, analysis) in enumerate(record['analysis']):
            out_dir = analysis["out_dir"]
            analysis_id = analysis['analysis_id']
            status = analysis['Status']
            #Check if bcl2Fastq is completed successfully
            if analysis['Status'] != "SUCCESS":
                logger.info("Analysis is not completed successfully under %s", out_dir)
                continue
            if not os.path.exists(out_dir):
                logger.critical("Following directory listed in DB doesn't exist: %s", out_dir)
                continue
            if args.testing:
                bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.test.txt")
            else:
                bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.txt")

            if os.path.exists(bcl2fastq_qc_out):
                logger.critical("Refusing to overwrite existing file %s. Skipping QC check", bcl2fastq_qc_out)
                continue
                
            bcl2fastq_qc_cmd = [bcl2fastq_qc_script, '-d', out_dir]
            if args.no_mail:
                bcl2fastq_qc_cmd.append("--no-mail")
            if args.dry_run:
                logger.warning("Skipped following run: %s", out_dir)
                continue
            try:
                QC_status = "analysis.{}.QC_status".format(analysis_count)
                status = subprocess.check_output(bcl2fastq_qc_cmd, stderr=subprocess.STDOUT)
                if "QC_FAILED" in str(status):
                    db.update({"run": run_number, 'analysis.analysis_id' : analysis_id},
                        {"$set": {QC_status: "FAILED"}})
                    logger.info("Demux QC failed for run: %s", run_number)
                else:
                    db.update({"run": run_number, 'analysis.analysis_id' : analysis_id},
                        {"$set": {QC_status: "SUCCESS"}})
                    logger.info("Demux QC SUCCESS for run: %s", run_number)
                with open(bcl2fastq_qc_out, 'w') as fh:
                    fh.write(status.decode())
            except subprocess.CalledProcessError as e:
                logger.fatal("The following command failed with return code %s: %s",
                             e.returncode, ' '.join(bcl2fastq_qc_cmd))
                logger.fatal("Output: %s", e.output.decode())
                logger.fatal("Exiting")
    connection.close()