Ejemplo n.º 1
0
def start_cmd_execution(record, site, out_dir, testing):
    """ Start the analysis
    """
    pipeline_params = " "
    extra_conf = " --extra-conf "
    extra_conf += "db-id:" + str(record['_id'])
    extra_conf += " requestor:" + record['requestor']
    # sample_cfg and references_cfg
    references_cfg = ""
    sample_cfg = ""
    for outer_key, outer_value in record.items():
        if outer_key == 'sample_cfg':
            LOGGER.info("write temp sample_config")
            with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', prefix='sample_cfg_', \
                delete=False) as fh:
                sample_cfg = fh.name
                yaml.dump(outer_value, fh, default_flow_style=False)
        elif outer_key == 'references_cfg':
            LOGGER.info("write temp reference_config")
            with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', prefix='references_cfg_', \
                delete=False) as fh:
                references_cfg = fh.name
                yaml.dump(outer_value, fh, default_flow_style=False)
        elif outer_key == 'cmdline':
            LOGGER.info("pipeline_cmd")
            for key, value in outer_value.items():
                pipeline_params += " --" + key + " " + value
    #pipeline path for production and testing
    if is_devel_version():
        pipeline_version = ""
    else:
        pipeline_version = record['pipeline_version'].split(".")[0]
    pipeline_path = get_pipeline_path(site, record['pipeline_name'], \
        pipeline_version)
    pipeline_script = os.path.join(pipeline_path, (os.path.split(pipeline_path)[-1] + ".py"))
    if not pipeline_script:
            LOGGER.critical("There seems to be trouble in executing cmd_line for JobId: {}".format(str(record['_id'])))
    pipeline_cmd = pipeline_script + " --sample-cfg " + sample_cfg  + " -o " + out_dir + " --db-logging y"
    if not sample_cfg:
        LOGGER.critical("Job doesn't have sample_cfg %s", str(record['_id']))
        sys.exit(1)
    if references_cfg:
        ref_params = " --references-cfg " + references_cfg
        pipeline_cmd += ref_params
    if pipeline_params:
        pipeline_cmd += pipeline_params
    if extra_conf:
        pipeline_cmd += extra_conf
    try:
        LOGGER.info(pipeline_cmd)
        _ = subprocess.check_output(pipeline_cmd, stderr=subprocess.STDOUT, shell=True)
        return True
    except subprocess.CalledProcessError as e:
        LOGGER.fatal("The following command failed with return code %s: %s",
            e.returncode, ' '.join(pipeline_cmd))
        LOGGER.fatal("Output: %s", e.output.decode())
        return False
def email_non_bcl(libraryId, runId):
    """send email for non-bcl libraries
    """
    if is_devel_version():
        toaddr = email_for_user()
    else:
        toaddr = "*****@*****.**"
    subject = "bcl2fastq conversion not required for {} from run {}.".format(
        libraryId, runId)
    body = subject + "\n" + "Kindly start custom analysis manually. Thanks."
    send_mail(subject, body, toaddr=toaddr, pass_exception=False)
Ejemplo n.º 3
0
def get_pipeline_path(site, pipeline_name, pipeline_version):
    """ get the pipeline path
    """
    basedir_map = PRODUCTION_PIPELINE_VERSION
    if site not in basedir_map:
        raise ValueError(site)
    if is_devel_version():
        basedir = basedir_map[site]['devel']
    else:
        basedir = basedir_map[site]['production']
    pipeline_path = os.path.join(basedir, pipeline_version, pipeline_name)
    return pipeline_path
Ejemplo n.º 4
0
def email_qcfails(subject, body):
    """email qc failures
    """
    if is_devel_version():
        toaddr = email_for_user()
        ccaddr = None
    else:
        toaddr = config['email']
        ccaddr = "*****@*****.**"

    send_mail(subject, body, toaddr=toaddr, ccaddr=ccaddr,
              pass_exception=False)
Ejemplo n.º 5
0
def get_pipeline_path(site, pipeline_name, pipeline_version):
    """ get the pipeline path
    """
    basedir_map = PIPELINE_PATH_BASE
    if site not in basedir_map:
        raise ValueError(site)
    if is_devel_version():
        basedir = basedir_map[site]['devel']
        pipeline_path = os.path.join(basedir, pipeline_name)
        return pipeline_path
    else:
        basedir = basedir_map[site]['production']
        pipeline_path = glob.glob(os.path.join(basedir, "*"+pipeline_version, pipeline_name))
        return pipeline_path[0]
Ejemplo n.º 6
0
def get_bcl2fastq_outdir(runid_and_flowcellid):
    """where to write bcl2fastq output to
    """

    if is_devel_version():
        basedir = site_cfg['bcl2fastq_outdir_base']['devel']
    else:
        basedir = site_cfg['bcl2fastq_outdir_base']['production']

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format(
        basedir=basedir,
        mid=machineid,
        rid=runid,
        fid=flowcellid,
        ts=generate_timestamp())
    return outdir
Ejemplo n.º 7
0
def get_downstream_outdir(requestor, pipeline_version, pipeline_name, site=None, basedir_map=OUTDIR_BASE, base_pipelinedir_map=PRODUCTION_PIPELINE_VERSION):
    """generate downstream output directory
    """
    if not site:
        site = get_site()
    if site not in basedir_map:
        raise ValueError(site)
    if site not in base_pipelinedir_map:
        raise ValueError(site)
    if is_devel_version():
        basedir = basedir_map[site]['devel']
        if not pipeline_version:
            pipeline_version = base_pipelinedir_map[site]['devel']
    else:
        basedir = basedir_map[site]['production']
        if not pipeline_version:
            pipeline_version = os.readlink(base_pipelinedir_map[site]['production'])
    outdir = "{basedir}/{requestor}/{pversion}/{pname}/{ts}".format(
        basedir=basedir, requestor=requestor, pversion=pipeline_version, pname=pipeline_name,
        ts=generate_timestamp())
    return outdir
Ejemplo n.º 8
0
def get_bcl2fastq_outdir(runid_and_flowcellid, site=None, basedir_map=OUTDIR_BASE):
    """FIXME:add-doc
    """

    if not site:
        site = get_site()
    if site not in basedir_map:
        raise ValueError(site)

    if is_devel_version():
        basedir = basedir_map[site]['devel']
    else:
        basedir = basedir_map[site]['production']

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format(
        basedir=basedir, mid=machineid, rid=runid, fid=flowcellid,
        ts=generate_timestamp())
    return outdir
Ejemplo n.º 9
0
def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    default = 34
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
        ccaddr = None
    else:
        mail_to = '*****@*****.**'
        ccaddr = "rpd"
    runs_from_db(db, mail_to, ccaddr, args.win)
Ejemplo n.º 10
0
def main():

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    # only makes sense for production if run as cron
    if is_devel_version():
        k = 'devel'
    else:
        k = 'production'
    basedir = site_cfg['downstream_outdir_base'][k]
    dirglob = DOWNSTREAM_OUTDIR_TEMPLATE.format(
        basedir=basedir, user="******", pipelineversion="*", pipelinename="*", timestamp="*")
    logger.debug("dirglob is %s", dirglob)

    for flagfile in glob.glob(os.path.join(
            dirglob, WORKFLOW_COMPLETION_FLAGFILE)):
        dir = os.path.abspath(os.path.dirname(flagfile))
        
        logger.info("Starting staging out of %s", dir)
        try:
            s3prefix = S3_BUCKET
            s3prefix = os.path.join(s3prefix, os.path.normpath(os.path.join(os.path.relpath(dir, basedir), "..")))
            cmd = [STAGE_OUT_WORKER, '-p', s3prefix, '-r', dir]
            _ = subprocess.check_output(cmd)
        except subprocess.CalledProcessError as e:
            logger.fatal("%s failed with exit code %s: %s. Will try to continue", 
                         ' '.join(cmd), e.returncode, e.output)
            continue
Ejemplo n.º 11
0
def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dryrun",
                        action='store_true',
                        help="Don't run anything")
    default = 84
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    default = 60
    parser.add_argument(
        '-d',
        '--days',
        type=int,
        default=default,
        help="Bcl analysis not older than days(default {})".format(default))
    default = 60
    parser.add_argument(
        '-r',
        '--tardays',
        type=int,
        default=default,
        help="tar ball not older than days(default {})".format(default))
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        LOGGER.warning("Not a production user. Skipping archival steps")
        sys.exit(1)
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        mail_to = 'rpd'
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "run": {
            "$regex": "^((?!NG00).)*$"
        },
        "raw-delete": {
            "$exists": False
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    LOGGER.info("Looping through %s jobs", results.count())
    trigger = 0
    for record in results:
        try:
            run_num = record['run']
        except KeyError:
            run_num = None
        if not record.get('deletion'):
            #Check run_status
            res = check_run_status(record, args.days)
            if res:
                LOGGER.info("Create tar ball %s ", run_num)
                if args.dryrun:
                    LOGGER.warning("Skipping Create tar ball %s ", run_num)
                    continue
                create_run_tar(db, run_num)
                trigger = 1
        elif record['deletion'].get('tar'):
            res = check_tar_status_and_delete(db,
                                              record,
                                              args.tardays,
                                              dryrun=args.dryrun)
            if res:
                trigger = 1
        if args.break_after_first and trigger == 1:
            LOGGER.info("Stopping after first run")
            break
Ejemplo n.º 12
0
def main():
    """main function
    """
    stats_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        logger.warning("Not a production user. Skipping sending of emails")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_emails = 0
    results = db.find({
        "analysis": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())

    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        #mail_to = '*****@*****.**'
        mail_to = '*****@*****.**'

    for record in results:
        run_number = record['run']
        #print(run_number)
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                if args.dry_run:
                    logger.warning(
                        "Skipping analysis %s run %s MUX %s"
                        " with email_sent %s", analysis_id,
                        run_number, mux_status['mux_id'],
                        mux_status.get('email_sent', None))
                    continue

                if mux_status.get('email_sent', None):
                    continue

                # for all others: send email and update db

                email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format(
                    analysis_count, mux_count)
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if mux_status.get('Status', None) == "FAILED":
                    logger.info("bcl2fastq for MUX %s from %s failed. ",
                                mux_status['mux_id'], run_number)
                    subject = 'bcl2fastq: ' + mux_id
                    body = "bcl2fastq for {} from {} failed.".format(
                        mux_id, run_number)
                    body += "\n\nPlease check the logs under {}".format(
                        out_dir + "/logs")
                    send_mail(subject, body, mail_to, ccaddr="rpd")
                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id,
                                         email_sent_query, True)

                elif mux_status.get('Status', None) == "SUCCESS":
                    muxdir = os.path.join(out_dir, 'out',
                                          mux_status.get('mux_dir'))
                    summary = path_to_url(
                        os.path.join(muxdir, 'html/index.html'))
                    body = "bcl2fastq for {} from {} successfully completed.".format(
                        mux_id, run_number)
                    body += "\n\nA summary can be found at {}".format(summary)
                    body += "\n\nFastQ files are located in {}".format(muxdir)
                    body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)"

                    confinfo = os.path.join(out_dir, 'conf.yaml')
                    #print(body)
                    if not os.path.exists(confinfo):
                        logger.fatal(
                            "conf info '%s' does not exist"
                            " under run directory.", confinfo)
                        continue

                    subject = 'bcl2fastq'
                    if args.testing:
                        subject += ' testing'
                    if is_devel_version():
                        subject += ' devel'
                    subject += ': ' + mux_id
                    send_mail(subject, body, mail_to,
                              ccaddr="rpd")  # mail_to already set

                    if not args.testing and not is_devel_version():
                        requestor = get_requestor(mux_id, confinfo)
                        if requestor is not None:
                            #requestor = "rpd"
                            #subject += " (instead of requestor)"
                            #send_mail(subject, body, requestor, ccaddr="rpd")
                            send_mail(subject, body, requestor)

                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id,
                                         email_sent_query, True)

    # close the connection to MongoDB
    connection.close()
    logger.info("%d emails sent", num_emails)
Ejemplo n.º 13
0
def main():
    """main function
    """
    stats_upload_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping sending of emails")
        sys.exit(0)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_emails = 0
    results = db.find({"analysis" : {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())

    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
    else:
        #mail_to = '*****@*****.**'
        mail_to = '*****@*****.**'

    for record in results:
        run_number = record['run']
        #print(run_number)
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                if args.dry_run:
                    logger.warning("Skipping analysis %s run %s MUX %s"
                                   " with email_sent %s",
                                   analysis_id, run_number, mux_status['mux_id'],
                                   mux_status.get('email_sent', None))
                    continue

                if mux_status.get('email_sent', None):
                    continue

                # for all others: send email and update db

                email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format(
                    analysis_count, mux_count)
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if mux_status.get('Status', None) == "FAILED":
                    logger.info("bcl2fastq for MUX %s from %s failed. ",
                                mux_status['mux_id'], run_number)
                    subject = 'bcl2fastq: ' + mux_id
                    body = "bcl2fastq for {} from {} failed.".format(mux_id, run_number)
                    body += "\n\nPlease check the logs under {}".format(out_dir + "/logs")
                    send_mail(subject, body, mail_to, ccaddr="rpd")
                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id, email_sent_query, True)

                elif mux_status.get('Status', None) == "SUCCESS":
                    muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir'))
                    summary = path_to_url(os.path.join(muxdir, 'html/index.html'))
                    body = "bcl2fastq for {} from {} successfully completed.".format(
                        mux_id, run_number)
                    body += "\n\nA summary can be found at {}".format(summary)
                    body += "\n\nFastQ files are located in {}".format(muxdir)
                    body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)"
                    
                    confinfo = os.path.join(out_dir, 'conf.yaml')
                    #print(body)
                    if not os.path.exists(confinfo):
                        logger.fatal("conf info '%s' does not exist"
                                     " under run directory.", confinfo)
                        continue

                    subject = 'bcl2fastq'
                    if args.testing:
                        subject += ' testing'
                    if is_devel_version():
                        subject += ' devel'
                    subject += ': ' + mux_id
                    send_mail(subject, body, mail_to, ccaddr="rpd")# mail_to already set

                    if not args.testing and not is_devel_version():
                        requestor = get_requestor(mux_id, confinfo)
                        if requestor is not None:
                            #requestor = "rpd"
                            #subject += " (instead of requestor)"
                            #send_mail(subject, body, requestor, ccaddr="rpd")
                            send_mail(subject, body, requestor)

                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id, email_sent_query, True)


    # close the connection to MongoDB
    connection.close()
    logger.info("%d emails sent", num_emails)
Ejemplo n.º 14
0
def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1', "--break-after-first", action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Don't run anything")
    default = "NSCC"
    parser.add_argument('-s', "--site", default=default,
                        help="site information (default = {})".format(default),
                        choices=['NSCC', 'GIS'])
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
    else:
        mail_to = '*****@*****.**'
    run_records = runs_from_db(connection, args.testing, args.win)
    trigger = 0
    for run in run_records:
        for mux, mux_info in run.items():
            if args.dry_run:
                logger.warning("Skipping job delegation for %s from %s", mux, mux_info[0])
                continue
            #Check if mux data is getting transferred
            find = check_mux_data_transfer_status(connection, mux_info)
            if find:
                continue
            res = start_data_transfer(connection, mux, mux_info, args.site, mail_to)
            if res:
                trigger = 1
        if args.break_after_first and trigger == 1:
            logger.info("Stopping after first run")
            break
    # close the connection to MongoDB
    connection.close()
Ejemplo n.º 15
0
def start_data_transfer(connection, mux, mux_info, site, mail_to):
    """ Data transfer from source to destination
    """
    run_number, downstream_id, analysis_id, bcl_path = mux_info
    fastq_src = os.path.join(bcl_path, "out", "Project_"+mux)
    bcl_dir = os.path.basename(bcl_path)
    if is_devel_version():
        fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \
            mux, run_number, bcl_dir)
        yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \
            mux, mux +"_multisample.yaml")
    else:
        fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \
            mux, run_number, bcl_dir)
        yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \
            mux, mux+ "_multisample.yaml")
    rsync_cmd = 'rsync -va %s %s' % (fastq_src, fastq_dest)
    if not os.path.exists(fastq_dest):
        try:
            os.makedirs(fastq_dest)
            logger.info("data transfer started for %s from %s", mux, run_number)
            st_time = generate_timestamp()
            update_downstream_mux(connection, run_number, analysis_id, downstream_id, \
                "COPYING_" + st_time)
            _ = subprocess.check_output(rsync_cmd, shell=True, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            body = "The following command failed with return code {}: {}". \
                format(e.returncode, rsync_cmd)
            subject = "{} from {}: SG10K data transfer ({}) failed".format(mux, run_number, site)
            logger.fatal(body)
            logger.fatal("Output: %s", e.output.decode())
            logger.fatal("Exiting")
            #Send_mail
            send_mail(subject, body, toaddr=mail_to, ccaddr=None)
            #Delete the partial info being rsync
            update_downstream_mux(connection, run_number, analysis_id, downstream_id, "ERROR")
            sys.exit(1)
        #Update the mongoDB for successful data transfer
        sample_info = get_mux_details(run_number, mux, fastq_dest)
        #Touch rsync complete file
        with open(os.path.join(fastq_dest, "rsync_complete.txt"), "w") as f:
            f.write("")
        with open(yaml_dest, 'w') as fh:
            yaml.dump(dict(sample_info), fh, default_flow_style=False)
        job = {}
        job['sample_cfg'] = {}
        for outer_key, outer_value in sample_info.items():
            ctime, _ = generate_window(1)
            job['sample_cfg'].update({outer_key:outer_value})
            job['site'] = site
            job['pipeline_name'] = 'custom/SG10K'
            job['pipeline_version'] = novogene_conf['PIPELINE_VERSION']
            job['ctime'] = ctime
            job['requestor'] = 'userrig'
            if is_devel_version():
                novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['devel'], \
                    mux)
            else:
                novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['production'],
                    mux)
            job['out_dir_override'] = novogene_outdir
        logger.info("Data transfer completed successfully for %s from %s", mux, run_number)
        job_id = insert_muxjob(connection, mux, job)
        update_downstream_mux(connection, run_number, analysis_id, downstream_id, job_id)
        subject = "{} from {}: SG10K data transfer ({}) completed".format(mux, run_number, site)
        body = "Data transfer successfully completed for {} from {}".format(mux, run_number)
        send_mail(subject, body, toaddr=mail_to, ccaddr=None)
        return True
    else:
        logger.critical("Mux %s from %s directory already exists under %s", mux, \
            run_number, fastq_dest)
        return False
Ejemplo n.º 16
0
def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    default = 34
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    default = 75
    parser.add_argument(
        '-d',
        '--days',
        type=int,
        default=default,
        help="Bcl analysis not older than days(default {})".format(default))
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        LOGGER.warning("Not a production user. Skipping archival steps")
        sys.exit(1)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        mail_to = 'rpd'
    run_records = runs_from_db(db, args.days, args.win)
    for run in run_records:
        if args.dry_run:
            LOGGER.info("Skipping dryrun option %s", run)
            continue
        purge(db, run, mail_to)
        if args.break_after_first:
            LOGGER.info("Stopping after first sequencing run")
            break