Python mongodb_conn Exemples, mongodb.mongodb_conn Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : bcl2fastq_dbupdate.py Projet : yuanjingnan/pipelines

def get_started_outdirs_from_db(testing=True, win=None):
    """FIXME:add-doc"""
    connection = mongodb_conn(testing)
    if connection is None:
        sys.exit(1)

    db = connection.gisds.runcomplete

    if win:
        epoch_present, epoch_back = generate_window(win)
        results = db.find({
            "analysis.Status": "STARTED",
            "timestamp": {
                "$gt": epoch_back,
                "$lt": epoch_present
            }
        })
    else:
        results = db.find({"analysis.Status": "STARTED"})

    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %d runs", results.count())
    for record in results:
        logger.debug("record: %s", record)
        #run_number = record['run']
        # we might have several analysis runs:
        for analysis in record['analysis']:
            yield analysis["out_dir"]

Exemple #2

0

Afficher le fichier

Fichier : bcl2fastq_records.py Projet : gis-rpd/pipelines

def instantiate_mongo(testing):
	"""
	Instantiates MongoDB database object
	For Test Server, testing == True
	For Production Server, testing == False
	"""
	return mongodb_conn(testing).gisds.runcomplete

Exemple #3

0

Afficher le fichier

Fichier : bcl2fastq_records.py Projet : yuanjingnan/pipelines

def instantiate_mongo(testing):
    """
	Instantiates MongoDB database object
	For Test Server, testing == True
	For Production Server, testing == False
	"""
    return mongodb_conn(testing).gisds.runcomplete

Exemple #4

0

Afficher le fichier

Fichier : accounting.py Projet : gis-rpd/pipelines

def main():
    """
    Main function
    """
    instance = ArgumentParser(description=__doc__)
    instance.add_argument("-j", "--jobNo", nargs="*", help="filter records by jobNo of jobs")
    instance.add_argument("-o", "--owner", nargs="*", help="filter records by owner of jobs")
    args = instance.parse_args()

    if (not args.jobNo) and (args.owner):
        for document in mongodb_conn(False).gisds.accountinglogs.find({"jobs.owner": {"$in": args.owner}}):
            for job in document["jobs"]:
                if job["owner"] in args.owner:
                    job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"]))
                    job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB"
                    job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"]))
                    job["submissionTime"] = str(datetime.fromtimestamp(
                        job["submissionTime"]).isoformat()).replace(":", "-")
                    PrettyPrinter(indent=2).pprint(job)

    if (args.jobNo) and (not args.owner):
        for document in mongodb_conn(False).gisds.accountinglogs.find({"jobs.jobNo": {"$in": args.jobNo}}):
            for job in document["jobs"]:
                if job["jobNo"] in args.jobNo:
                    job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"]))
                    job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB"
                    job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"]))
                    job["submissionTime"] = str(datetime.fromtimestamp(
                        job["submissionTime"]).isoformat()).replace(":", "-")
                    PrettyPrinter(indent=2).pprint(job)

    if args.jobNo and args.owner:
        for document in mongodb_conn(False).gisds.accountinglogs.find({"jobs.jobNo": {"$in": args.jobNo}, "jobs.owner": {"$in": args.owner}}):
            for job in document["jobs"]:
                if (job["jobNo"] in args.jobNo) and (job["owner"] in args.owner):
                    job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"]))
                    job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB"
                    job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"]))
                    job["submissionTime"] = str(datetime.fromtimestamp(
                        job["submissionTime"]).isoformat()).replace(":", "-")
                    PrettyPrinter(indent=2).pprint(job)

Exemple #5

0

Afficher le fichier

def check_mongo():
    """
    Instantiates MongoDB database object
    For Test Server, testing == True
    For Production Server, testing == False
    """
    warnings = ""
    epoch_present, epoch_window = generate_window(MAX_WINDOW)
    epoch_present, epoch_started = generate_window(MAX_RUN)
    del epoch_present

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis.Status"] = "STARTED"
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
        #        PrettyPrinter(indent=2).pprint(record)
        if record["analysis"][-1]["Status"] != "SUCCESS":
            warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" +
                         str(record["run"]) + "\n")
            count_warnings += 1
    if count_warnings > 0:
        warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" +
                     str(count_warnings) + "\n\n")

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis"] = {"$exists": False}
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
        #        PrettyPrinter(indent=2).pprint(record)
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" +
                     str(record["run"]) + "\n")
        count_warnings += 1
    if count_warnings > 0:
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" +
                     str(count_warnings) + "\n\n")

    return warnings

Exemple #6

0

Afficher le fichier

Fichier : production_warnings.py Projet : gis-rpd/pipelines

def check_mongo():
    """
    Instantiates MongoDB database object
    For Test Server, testing == True
    For Production Server, testing == False
    """
    warnings = ""
    epoch_present, epoch_window = generate_window(MAX_WINDOW)
    epoch_present, epoch_started = generate_window(MAX_RUN)
    del epoch_present

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis.Status"] = "STARTED"
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
#        PrettyPrinter(indent=2).pprint(record)
        if record["analysis"][-1]["Status"] != "SUCCESS":
            warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n")
            count_warnings += 1
    if count_warnings > 0:
        warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n")

    query = {}
    query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started}
    query["analysis"] = {"$exists": False}
    mongo = mongodb_conn(False).gisds.runcomplete.find(query)
    count_warnings = 0
    for record in mongo:
#        PrettyPrinter(indent=2).pprint(record)
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n")
        count_warnings += 1
    if count_warnings > 0:
        warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n")

    return warnings

Exemple #7

0

Afficher le fichier

Fichier : downstream_dbupdate.py Projet : gis-rpd/pipelines

def runs_from_db(testing=True, win=34):
    """Get the runs from pipeline_run collections"""
    connection = mongodb_conn(testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.pipeline_runs
    epoch_present, epoch_back = generate_window(win)
    results = db.find({"runs": {"$exists": True},
                       "ctime": {"$gt": 1470127013000, "$lt": 1470127093000}})
    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %d runs for last %s days", results.count(), win)
    for record in results:
        logger.debug("record: %s", record)
        for runs in record['runs']:
            if runs["status"] == "STARTED":
                test = (record['_id'], record['out_dir'], runs['start_time'])
                yield test

Exemple #8

0

Afficher le fichier

Fichier : whereismux.py Projet : gis-rpd/pipelines

def main():
    """
    Main function
    """
    instance = ArgumentParser(description=__doc__)
    instance.add_argument("-m", "--mux", required=True, help="MUX_ID to generate OUT_DIR")
    args = instance.parse_args()

    for document in mongodb_conn(False).gisds.runcomplete.find(
            {"analysis.per_mux_status.mux_id": args.mux}):
        if "analysis" in document:
            last_out_dir = ""
            for analysis in document["analysis"]:
                if analysis["Status"].upper() != "FAILED":
                    if "per_mux_status" in analysis:
                        for mux in analysis["per_mux_status"]:
                            if mux["mux_id"] == args.mux:
                                last_out_dir = analysis["out_dir"].replace("//", "/")
            print(last_out_dir)

Exemple #9

0

Afficher le fichier

Fichier : bcl2fastq_dbupdate.py Projet : gis-rpd/pipelines

def get_outdirs_from_db(testing=True, win=14):
    """FIXME:add-doc"""
    connection = mongodb_conn(testing)
    if connection is None:
        sys.exit(1)

    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(win)

    results = db.find({"analysis": {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %d runs for last %s days", results.count(), win)
    for record in results:
        logger.debug("record: %s", record)
        #run_number = record['run']
        # we might have several analysis runs:
        for analysis in record['analysis']:
            yield analysis["out_dir"]

Exemple #10

0

Afficher le fichier

Fichier : check_elm_run_info.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    default = 34
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
        ccaddr = None
    else:
        mail_to = '*****@*****.**'
        ccaddr = "rpd"
    runs_from_db(db, mail_to, ccaddr, args.win)

Exemple #11

0

Afficher le fichier

Fichier : whereismux.py Projet : yuanjingnan/pipelines

def main():
    """
    Main function
    """
    instance = ArgumentParser(description=__doc__)
    instance.add_argument("-m",
                          "--mux",
                          required=True,
                          help="MUX_ID to generate OUT_DIR")
    args = instance.parse_args()

    for document in mongodb_conn(False).gisds.runcomplete.find(
        {"analysis.per_mux_status.mux_id": args.mux}):
        if "analysis" in document:
            last_out_dir = ""
            for analysis in document["analysis"]:
                if analysis["Status"].upper() != "FAILED":
                    if "per_mux_status" in analysis:
                        for mux in analysis["per_mux_status"]:
                            if mux["mux_id"] == args.mux:
                                last_out_dir = analysis["out_dir"].replace(
                                    "//", "/")
            print(last_out_dir)

Exemple #12

0

Afficher le fichier

def main():
    """
    Main function
    """
    instance = ArgumentParser(description=__doc__)
    instance.add_argument("-d", "--dir", help="specify one MUX_ID to generate OUT_DIR")
    instance.add_argument("-l", "--lib", help="specify one MUX_ID to generate LIB")
    args = instance.parse_args()

    if args.dir:
        for document in mongodb_conn(False).gisds.runcomplete.find({"analysis.per_mux_status.mux_id": args.dir}):
            if "analysis" in document:
                last_out_dir = ""
                for analysis in document["analysis"]:
                    if analysis["Status"].upper() != "FAILED":
                        if "per_mux_status" in analysis:
                            for mux in analysis["per_mux_status"]:
                                if mux["mux_id"] == args.dir:
                                    last_out_dir = analysis["out_dir"].replace("//", "/")
                print (last_out_dir)
    if args.lib:
        for lib in mux_to_lib(args.lib, testing=False):
            print (lib)

Exemple #13

0

Afficher le fichier

Fichier : mongo_status_per_mux.py Projet : yuanjingnan/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-r', "--runid",
                        help="Run ID plus flowcell ID", required=True,)
    parser.add_argument('-a', "--analysis-id",
                        help="Analysis id / start time", required=True)
    parser.add_argument('-i', "--mux-id",
                        help="mux-id", required=True)
    parser.add_argument('-d', "--mux-dir",
                        help="mux-dir", required=True)
    parser.add_argument('-s', "--mux-status",
                        help="Analysis status", required=True,
                        choices=['SUCCESS', 'FAILED', 'NOARCHIVE'])
    parser.add_argument('-t', "--test-server", action='store_true')
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)

    run_number = args.runid.rstrip()
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.runcomplete
    if args.mux_status == "SUCCESS":
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {
                              "$push": {
                                  "analysis.$.per_mux_status":  {
                                      "mux_id" : args.mux_id,
                                      "mux_dir" : args.mux_dir,
                                      "Status" : args.mux_status,
                                      "StatsSubmission" : "TODO",
                                      "ArchiveSubmission" : "TODO",
                                      "DownstreamSubmission" : "TODO",
                                      "email_sent" : False,

                                  }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    elif args.mux_status == "FAILED":
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {
                              "$push": {
                                  "analysis.$.per_mux_status":  {
                                      "mux_id" : args.mux_id,
                                      "mux_dir" : args.mux_dir,
                                      "Status" : args.mux_status,
                                      "email_sent" : False,
                                  }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    elif args.mux_status == "NOARCHIVE":
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {
                              "$push": {
                                  "analysis.$.per_mux_status":  {
                                      "mux_id" : args.mux_id,
                                      "mux_dir" : args.mux_dir,
                                      "Status" : args.mux_status,
                                      "StatsSubmission" : "NOARCHIVE",
                                      "ArchiveSubmission" : "NOARCHIVE",
                                      "DownstreamSubmission" : "TODO",
                                      "email_sent" : True,

                                  }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    else:
        raise ValueError(args.status)
     # close the connection to MongoDB
    connection.close()

Exemple #14

0

Afficher le fichier

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dryrun",
                        action='store_true',
                        help="Don't run anything")
    default = 84
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    default = 60
    parser.add_argument(
        '-d',
        '--days',
        type=int,
        default=default,
        help="Bcl analysis not older than days(default {})".format(default))
    default = 60
    parser.add_argument(
        '-r',
        '--tardays',
        type=int,
        default=default,
        help="tar ball not older than days(default {})".format(default))
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        LOGGER.warning("Not a production user. Skipping archival steps")
        sys.exit(1)
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        mail_to = 'rpd'
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "run": {
            "$regex": "^((?!NG00).)*$"
        },
        "raw-delete": {
            "$exists": False
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    LOGGER.info("Looping through %s jobs", results.count())
    trigger = 0
    for record in results:
        try:
            run_num = record['run']
        except KeyError:
            run_num = None
        if not record.get('deletion'):
            #Check run_status
            res = check_run_status(record, args.days)
            if res:
                LOGGER.info("Create tar ball %s ", run_num)
                if args.dryrun:
                    LOGGER.warning("Skipping Create tar ball %s ", run_num)
                    continue
                create_run_tar(db, run_num)
                trigger = 1
        elif record['deletion'].get('tar'):
            res = check_tar_status_and_delete(db,
                                              record,
                                              args.tardays,
                                              dryrun=args.dryrun)
            if res:
                trigger = 1
        if args.break_after_first and trigger == 1:
            LOGGER.info("Stopping after first run")
            break

Exemple #15

0

Afficher le fichier

Fichier : pipeline_status.py Projet : gis-rpd/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-r", "--DBid", help="DB Id", required=True)
    parser.add_argument(
        "-s", "--status", help="Analysis status", required=True, choices=["STARTED", "SUCCESS", "FAILED", "ORPHAN"]
    )
    parser.add_argument("-st", "--start-time", help="Start time", required=True)
    parser.add_argument("-o", "--out", help="Analysis output directory")
    parser.add_argument("-t", "--test_server", action="store_true")
    parser.add_argument("-n", "--dry-run", action="store_true", help="Dry run")
    parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
    parser.add_argument("-q", "--quiet", action="count", default=0, help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)
    _id = args.DBid
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.pipeline_runs
    logger.debug("DB %s", db)
    logger.info("Status for %s is %s", _id, args.status)
    if args.status in ["STARTED"]:
        try:
            if not args.dry_run:
                db.update(
                    {"_id": ObjectId(_id)}, {"$push": {"runs": {"start_time": args.start_time, "status": args.status}}}
                )
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    elif args.status in ["SUCCESS", "FAILED"]:
        end_time = generate_timestamp()
        logger.info("Setting timestamp to %s", end_time)
        try:
            if not args.dry_run:
                db.update(
                    {"_id": ObjectId(_id), "runs.start_time": args.start_time},
                    {"$set": {"runs.$": {"start_time": args.start_time, "end_time": end_time, "status": args.status}}},
                )
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    else:
        raise ValueError(args.status)

    # close the connection to MongoDB
    connection.close()

Exemple #16

0

Afficher le fichier

Fichier : mongo_status_per_mux.py Projet : gis-rpd/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-r', "--runid",
                        help="Run ID plus flowcell ID", required=True,)
    parser.add_argument('-a', "--analysis-id",
                        help="Analysis id / start time", required=True)
    parser.add_argument('-i', "--mux-id",
                        help="mux-id", required=True)
    parser.add_argument('-d', "--mux-dir",
                        help="mux-dir", required=True)
    parser.add_argument('-s', "--mux-status",
                        help="Analysis status", required=True,
                        choices=['SUCCESS', 'FAILED', 'NOARCHIVE'])
    parser.add_argument('-t', "--test_server", action='store_true')
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)

    run_number = args.runid.rstrip()
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.runcomplete
    if args.mux_status == "SUCCESS":
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {
                              "$push": {
                                  "analysis.$.per_mux_status":  {
                                      "mux_id" : args.mux_id,
                                      "mux_dir" : args.mux_dir,
                                      "Status" : args.mux_status,
                                      "StatsSubmission" : "TODO",
                                      "ArchiveSubmission" : "TODO",
                                      "DownstreamSubmission" : "TODO",
                                      "email_sent" : False,

                                  }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    elif args.mux_status == "FAILED":
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {
                              "$push": {
                                  "analysis.$.per_mux_status":  {
                                      "mux_id" : args.mux_id,
                                      "mux_dir" : args.mux_dir,
                                      "Status" : args.mux_status,
                                      "email_sent" : False,
                                  }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    elif args.mux_status == "NOARCHIVE":
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {
                              "$push": {
                                  "analysis.$.per_mux_status":  {
                                      "mux_id" : args.mux_id,
                                      "mux_dir" : args.mux_dir,
                                      "Status" : args.mux_status,
                                      "StatsSubmission" : "NOARCHIVE",
                                      "ArchiveSubmission" : "NOARCHIVE",
                                      "DownstreamSubmission" : "TODO",
                                      "email_sent" : True,

                                  }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    else:
        raise ValueError(args.status)
     # close the connection to MongoDB
    connection.close()

Exemple #17

0

Afficher le fichier

def main():
    """main function"""

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-e',
                        "--wrapper-args",
                        nargs="*",
                        help="Extra arguments for bcl2fastq wrapper"
                        " (prefix leading dashes with X, e.g. X-n for -n)")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    bcl2fastq_wrapper = os.path.join(os.path.dirname(sys.argv[0]),
                                     "bcl2fastq.py")

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete

    # db query for jobs that are yet to be analysed in the epoch window
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "analysis": {
            "$exists": 0
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len()
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        logger.debug("Processing record %s", record)
        cmd = [bcl2fastq_wrapper, "-r", run_number, "-v"]
        if args.testing:
            cmd.append("-t")
        if args.wrapper_args:
            cmd.extend([x.lstrip('X') for x in args.wrapper_args])
        if args.dry_run:
            logger.warning("Skipped following run: %s", ' '.join(cmd))
            continue
        else:
            try:
                logger.info("Executing: %s", ' '.join(cmd))
                res = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
                if res:
                    logger.info("bcl2fastq wrapper returned:\n%s",
                                res.decode().rstrip())
            except subprocess.CalledProcessError as e:
                logger.critical(
                    "The following command failed with"
                    " return code %s: %s", e.returncode, ' '.join(cmd))
                logger.critical("Full error message was: %s", e.stdout)
                if 'commlib error' in e.stdout.decode():
                    logger.critical(
                        "Looks like a qmaster problem (commlib error). Exiting"
                    )
                    break
                else:
                    logger.critical("Will keep going")
                # continue so that a failed run doesn't count,
                # i.e. args.break_after_first shouldn't be trigger
                continue

        if args.break_after_first:
            logger.info("Stopping after first sequencing run")
            break

    # close the connection to MongoDB
    connection.close()
    logger.info("Successful program exit")

Exemple #18

0

Afficher le fichier

Fichier : starter.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument('-s', "--site", help="site information")
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    if not args.site:
        site = 'NSCC'
    else:
        site = args.site
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.pipeline_runs
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "run": {
            "$exists": False
        },
        "site": site,
        "ctime": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs to start analysis", results.count())
    for record in results:
        start_analysis(record, args.testing, args.dry_run)

Exemple #19

0

Afficher le fichier

Fichier : run_legacy_pipelines.py Projet : gis-rpd/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1', "--break-after-first", action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    #Check if pipeline scripts are available
    assert os.path.exists(BWA)
    assert os.path.exists(RNA)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({"analysis.Status": "SUCCESS",
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        for analysis in record['analysis']:
            out_dir = analysis.get("out_dir")

            #Check if bcl2Fastq is completed successfully
            if 'Status' in analysis and analysis.get("Status") == "SUCCESS":
                if not os.path.exists(out_dir):
                    logger.critical("Following directory listed in DB doesn't exist: %s", out_dir)
                    continue

                #Check if downstream analysis has been started
                if not os.path.exists(os.path.join(out_dir, "config_casava-1.8.2.txt".format())):
                    logger.info("Start the downstream analysis at %s", out_dir)
                    os.makedirs(os.path.join(out_dir, LOG_DIR_REL), exist_ok=True)
                    #generate config file
                    config_cmd = [CONFIG, '-r', run_number]
                    try:
                        f = open(os.path.join(out_dir, "config_casava-1.8.2.txt".format()), "w")
                        _ = subprocess.call(config_cmd, stderr=subprocess.STDOUT, stdout=f)
                    except subprocess.CalledProcessError as e:
                        logger.fatal("The following command failed with return code %s: %s",
                                     e.returncode, ' '.join(config_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #Generate and Submit BWA and RNAseq mapping pipeline
                    if os.path.exists(os.path.join(out_dir, "samplesheet.csv".format())):
                        dirs = os.path.join(out_dir, "out")
                        cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG))
                        cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(RNA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG))
                        if args.dry_run:
                            logger.warning("Skipped following run: %s", cmd)
                            #Remove config txt
                            os.remove(os.path.join(out_dir, "config_casava-1.8.2.txt".format()))
                        else:
                            try:
                                #ananlysisReport into submission log
                                with open(os.path.join(out_dir, SUBMISSIONLOG), 'w') as fh:
                                    fh.write(cmd)
                                _ = subprocess.check_output(cmd, shell=True)
                            except subprocess.CalledProcessError as e:
                                logger.fatal("The following command failed with return code %s: %s",
                                             e.returncode, ' '.join(cmd))
                                logger.fatal("Output: %s", e.output.decode())
                                logger.fatal("Exiting")
                                #send_status_mail
                                send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log"))
                                sys.exit(1)
                            num_triggers += 1

                        if args.break_after_first:
                            logger.info("Stopping after first sequencing run")
                            sys.exit(0)
                    else:
                        #send_status_mail
                        logger.info("samplesheet.csv missing for %s under %s", run_number, out_dir)
                        send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.abspath(out_dir))
            elif analysis.get("Status") == "FAILED":
                logger.debug("BCL2FASTQ FAILED for %s under %s", run_number, out_dir)
     # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)

Exemple #20

0

Afficher le fichier

Fichier : mongo_status.py Projet : yuanjingnan/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '-r',
        "--runid",
        help="Run ID plus flowcell ID",
        required=True,
    )
    parser.add_argument(
        '-s',
        "--status",
        help="Analysis status",
        required=True,
        choices=['STARTED', 'SUCCESS', 'FAILED', 'SEQRUNFAILED', 'NON-BCL'])
    parser.add_argument('-a',
                        "--analysis-id",
                        help="Analysis id",
                        required=True)
    parser.add_argument('-o', "--out", help="Analysis output directory")
    parser.add_argument('-t', "--test-server", action='store_true')
    parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    user_name = "userrig"

    run_number = args.runid
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.runcomplete
    logger.debug("DB %s", db)
    logger.info("Status for %s is %s", run_number, args.status)
    if args.status in ["STARTED", "SEQRUNFAILED"]:
        try:
            if not args.dry_run:
                res = db.update_one({"run": run_number}, {
                    "$push": {
                        "analysis": {
                            "analysis_id": args.analysis_id,
                            "user_name": user_name,
                            "out_dir": args.out,
                            "Status": args.status,
                        }
                    }
                })
                assert res.modified_count == 1, (
                    "Modified {} documents instead of 1".format(
                        res.modified_count))
        except (pymongo.errors.OperationFailure, AssertionError) as e:
            logger.fatal(
                "MongoDB update failure while setting run %s analysis_id %s to %s",
                run_number, args.analysis_id, args.status)
            sys.exit(1)

    elif args.status in ["SUCCESS", "FAILED"]:
        end_time = generate_timestamp()
        logger.info("Setting timestamp to %s", end_time)
        try:
            if not args.dry_run:
                res = db.update_one(
                    {
                        "run": run_number,
                        'analysis.analysis_id': args.analysis_id
                    }, {
                        "$set": {
                            "analysis.$": {
                                "analysis_id": args.analysis_id,
                                "end_time": end_time,
                                "user_name": user_name,
                                "out_dir": args.out,
                                "Status": args.status,
                            }
                        }
                    })
                assert res.modified_count == 1, (
                    "Modified {} documents instead of 1".format(
                        res.modified_count))
        except (pymongo.errors.OperationFailure, AssertionError) as e:
            logger.fatal(
                "MongoDB update failure while setting run %s analysis_id %s to %s",
                run_number, args.analysis_id, args.status)
            sys.exit(1)

    else:
        raise ValueError(args.status)

    # close the connection to MongoDB
    connection.close()

Exemple #21

0

Afficher le fichier

Fichier : novogene_raw_delete.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    default = 34
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    default = 75
    parser.add_argument(
        '-d',
        '--days',
        type=int,
        default=default,
        help="Bcl analysis not older than days(default {})".format(default))
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    if not is_production_user():
        LOGGER.warning("Not a production user. Skipping archival steps")
        sys.exit(1)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        mail_to = 'rpd'
    run_records = runs_from_db(db, args.days, args.win)
    for run in run_records:
        if args.dry_run:
            LOGGER.info("Skipping dryrun option %s", run)
            continue
        purge(db, run, mail_to)
        if args.break_after_first:
            LOGGER.info("Stopping after first sequencing run")
            break

Exemple #22

0

Afficher le fichier

Fichier : mongo_status_qc.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    bcl2fastq_qc_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "bcl2fastq_qc.py"))
    assert os.path.exists(bcl2fastq_qc_script)
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('--no-mail', action='store_true',
                        help="Don't send email on detected failures")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    
    if not is_production_user():
        logger.warning("Not a production user. Skipping DB update")
        sys.exit(1)
        
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"analysis.Status": "SUCCESS", "analysis.QC_status" : {"$exists": 0},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        #for analysis in record['analysis']:
        for (analysis_count, analysis) in enumerate(record['analysis']):
            out_dir = analysis["out_dir"]
            analysis_id = analysis['analysis_id']
            status = analysis['Status']
            #Check if bcl2Fastq is completed successfully
            if analysis['Status'] != "SUCCESS":
                logger.info("Analysis is not completed successfully under %s", out_dir)
                continue
            if not os.path.exists(out_dir):
                logger.critical("Following directory listed in DB doesn't exist: %s", out_dir)
                continue
            if args.testing:
                bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.test.txt")
            else:
                bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.txt")

            if os.path.exists(bcl2fastq_qc_out):
                logger.critical("Refusing to overwrite existing file %s. Skipping QC check", bcl2fastq_qc_out)
                continue
                
            bcl2fastq_qc_cmd = [bcl2fastq_qc_script, '-d', out_dir]
            if args.no_mail:
                bcl2fastq_qc_cmd.append("--no-mail")
            if args.dry_run:
                logger.warning("Skipped following run: %s", out_dir)
                continue
            try:
                QC_status = "analysis.{}.QC_status".format(analysis_count)
                status = subprocess.check_output(bcl2fastq_qc_cmd, stderr=subprocess.STDOUT)
                if "QC_FAILED" in str(status):
                    db.update({"run": run_number, 'analysis.analysis_id' : analysis_id},
                        {"$set": {QC_status: "FAILED"}})
                    logger.info("Demux QC failed for run: %s", run_number)
                else:
                    db.update({"run": run_number, 'analysis.analysis_id' : analysis_id},
                        {"$set": {QC_status: "SUCCESS"}})
                    logger.info("Demux QC SUCCESS for run: %s", run_number)
                with open(bcl2fastq_qc_out, 'w') as fh:
                    fh.write(status.decode())
            except subprocess.CalledProcessError as e:
                logger.fatal("The following command failed with return code %s: %s",
                             e.returncode, ' '.join(bcl2fastq_qc_cmd))
                logger.fatal("Output: %s", e.output.decode())
                logger.fatal("Exiting")
    connection.close()

Exemple #23

0

Afficher le fichier

def main():
    """main function
    """
    stats_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        logger.warning("Not a production user. Skipping sending of emails")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_emails = 0
    results = db.find({
        "analysis": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())

    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'  # domain added in mail function
    else:
        #mail_to = '*****@*****.**'
        mail_to = '*****@*****.**'

    for record in results:
        run_number = record['run']
        #print(run_number)
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                if args.dry_run:
                    logger.warning(
                        "Skipping analysis %s run %s MUX %s"
                        " with email_sent %s", analysis_id,
                        run_number, mux_status['mux_id'],
                        mux_status.get('email_sent', None))
                    continue

                if mux_status.get('email_sent', None):
                    continue

                # for all others: send email and update db

                email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format(
                    analysis_count, mux_count)
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if mux_status.get('Status', None) == "FAILED":
                    logger.info("bcl2fastq for MUX %s from %s failed. ",
                                mux_status['mux_id'], run_number)
                    subject = 'bcl2fastq: ' + mux_id
                    body = "bcl2fastq for {} from {} failed.".format(
                        mux_id, run_number)
                    body += "\n\nPlease check the logs under {}".format(
                        out_dir + "/logs")
                    send_mail(subject, body, mail_to, ccaddr="rpd")
                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id,
                                         email_sent_query, True)

                elif mux_status.get('Status', None) == "SUCCESS":
                    muxdir = os.path.join(out_dir, 'out',
                                          mux_status.get('mux_dir'))
                    summary = path_to_url(
                        os.path.join(muxdir, 'html/index.html'))
                    body = "bcl2fastq for {} from {} successfully completed.".format(
                        mux_id, run_number)
                    body += "\n\nA summary can be found at {}".format(summary)
                    body += "\n\nFastQ files are located in {}".format(muxdir)
                    body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)"

                    confinfo = os.path.join(out_dir, 'conf.yaml')
                    #print(body)
                    if not os.path.exists(confinfo):
                        logger.fatal(
                            "conf info '%s' does not exist"
                            " under run directory.", confinfo)
                        continue

                    subject = 'bcl2fastq'
                    if args.testing:
                        subject += ' testing'
                    if is_devel_version():
                        subject += ' devel'
                    subject += ': ' + mux_id
                    send_mail(subject, body, mail_to,
                              ccaddr="rpd")  # mail_to already set

                    if not args.testing and not is_devel_version():
                        requestor = get_requestor(mux_id, confinfo)
                        if requestor is not None:
                            #requestor = "rpd"
                            #subject += " (instead of requestor)"
                            #send_mail(subject, body, requestor, ccaddr="rpd")
                            send_mail(subject, body, requestor)

                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id,
                                         email_sent_query, True)

    # close the connection to MongoDB
    connection.close()
    logger.info("%d emails sent", num_emails)

Exemple #24

0

Afficher le fichier

Fichier : report_generate.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    parser.add_argument('-n',
                        "--no-mail",
                        action='store_true',
                        help="Don't mail. Just print to console")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    runs = {}
    extra_text = ""
    for record in results:
        run_number = record['run']
        timestamp = record['timestamp']
        runs[timestamp] = run_number
    od = collections.OrderedDict(sorted(runs.items()))
    logger.info("Found %s runs", results.count())
    extra_text = "Found {} runs. \n".format(results.count())
    for _, v in od.items():  # v is run
        results = db.find({"run": v})
        for record in results:
            if not 'analysis' in record:
                continue
            last_analysis = record['analysis'][-1]
            status = last_analysis.get("Status")
            if not status:
                continue

            if status == 'SUCCESS':
                if last_analysis.get("per_mux_status"):
                    mux = last_analysis.get("per_mux_status")
                    for d in mux:
                        if d is None:
                            logger.warning("Skipping empty per_mux_status for run %s." \
                                "Needs fix in DB", v)
                            continue
                        if d.get(
                                'Status'
                        ) == "SUCCESS":  # FIXME what if key is missing?
                            mux_id = d['mux_id']

                            stats_submission = d['StatsSubmission']
                            if stats_submission == "FAILED":
                                extra_text += "StatsSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                     .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

                            archive_submission = d['ArchiveSubmission']
                            if archive_submission == "FAILED":
                                extra_text += "ArchiveSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                    .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

            elif status == 'FAILED':
                extra_text += "Analysis for run {} has failed. \n".format(v)
                extra_text += "Analysis_id is {} and out_dir is {} \n" \
                    .format(last_analysis.get("analysis_id"), \
                    last_analysis.get("out_dir"))
                extra_text += "\n"
                extra_text += "---------------------------------------------------\n"
                logger.info("Analysis for run %s has failed ", v)

            elif status == 'STARTED':
                analysis_id = last_analysis.get("analysis_id")
                analysis_epoch_time = isoformat_to_epoch_time(analysis_id +
                                                              "+08:00")
                run_completion_time = timestamp / 1000
                rd = relative_epoch_time(run_completion_time,
                                         analysis_epoch_time)
                if rd.days > 3:
                    extra_text += "Analysis for run {} was started {} days ago. "\
                        "Please check. \n".format(v, rd.days)
                    extra_text += "Analysis_id is {} and out_dir is {} \n" \
                        .format(last_analysis.get("analysis_id"), \
                        last_analysis.get("out_dir"))
                    extra_text += "\n"
                    extra_text += "---------------------------------------------------\n"

    extra_text += "Report generation is completed"
    subject = "Report generation for bcl2fastq"
    if args.testing:
        subject = "Testing:" + subject
    if args.no_mail:
        print(
            "Skipping sending of email with subject '{}' and following body:".
            format(subject))
        print(extra_text)
    else:
        send_mail(subject, extra_text)
    logger.info("Report generation is completed")

Exemple #25

0

Afficher le fichier

Fichier : report_generate.py Projet : gis-rpd/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    parser.add_argument('-n', "--no-mail", action='store_true',
                        help="Don't mail. Just print to console")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    runs = {}
    extra_text = ""
    for record in results:
        run_number = record['run']
        timestamp = record['timestamp']
        runs[timestamp] = run_number
    od = collections.OrderedDict(sorted(runs.items()))
    logger.info("Found %s runs", results.count())
    extra_text = "Found {} runs. \n".format(results.count())
    for _, v in od.items():# v is run
        results = db.find({"run": v})
        for record in results:
            if not 'analysis' in record:
                continue
            last_analysis = record['analysis'][-1]
            status = last_analysis.get("Status")
            if not status:
                continue

            if status == 'SUCCESS':
                if last_analysis.get("per_mux_status"):
                    mux = last_analysis.get("per_mux_status")
                    for d in mux:
                        if d is None:
                            logger.warning("Skipping empty per_mux_status for run %s." \
                                "Needs fix in DB", v)
                            continue
                        if d.get('Status') == "SUCCESS":# FIXME what if key is missing?
                            mux_id = d['mux_id']

                            stats_submission = d['StatsSubmission']
                            if stats_submission == "FAILED":
                                extra_text += "StatsSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                     .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

                            archive_submission = d['ArchiveSubmission']
                            if archive_submission == "FAILED":
                                extra_text += "ArchiveSubmission for mux_id {} from run {} " \
                                    "has FAILED and out_dir is {} \n" \
                                    .format(mux_id, v, last_analysis.get("out_dir"))
                                extra_text += "\n"

            elif status == 'FAILED':
                extra_text += "Analysis for run {} has failed. \n".format(v)
                extra_text += "Analysis_id is {} and out_dir is {} \n" \
                    .format(last_analysis.get("analysis_id"), \
                    last_analysis.get("out_dir"))
                extra_text += "\n"
                extra_text += "---------------------------------------------------\n"
                logger.info("Analysis for run %s has failed ", v)

            elif status == 'STARTED':
                analysis_id = last_analysis.get("analysis_id")
                analysis_epoch_time = isoformat_to_epoch_time(analysis_id+"+08:00")
                run_completion_time = timestamp/1000
                dt1 = datetime.datetime.fromtimestamp(run_completion_time)
                dt2 = datetime.datetime.fromtimestamp(analysis_epoch_time)
                rd = dateutil.relativedelta.relativedelta(dt1, dt2)
                if rd.days > 3:
                    extra_text += "Analysis for run {} was started {} days ago. "\
                        "Please check. \n".format(v, rd.days)
                    extra_text += "Analysis_id is {} and out_dir is {} \n" \
                        .format(last_analysis.get("analysis_id"), \
                        last_analysis.get("out_dir"))
                    extra_text += "\n"
                    extra_text += "---------------------------------------------------\n"

    extra_text += "Report generation is completed"
    subject = "Report generation for bcl2fastq"
    if args.testing:
        subject = "Testing:" + subject
    if args.no_mail:
        print("Skipping sending of email with subject '{}' and following body:".format(subject))
        print(extra_text)
    else:
        send_mail(subject, extra_text)
    logger.info("Report generation is completed")

Exemple #26

0

Afficher le fichier

Fichier : archive_stats_cronjob.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    stats_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({
        "analysis": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())

    for record in results:
        run_number = record['run']

        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']

            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning(
                        "mux_status is None or incomplete for run %s analysis %s."
                        " Requires fix in DB. Skipping entry for now.",
                        run_number, analysis_id)
                    continue

                if mux_status.get('Status', None) != "SUCCESS":
                    logger.info(
                        "MUX %s from %s is not SUCCESS. Skipping SRA and STATS uploading",
                        mux_status['mux_id'], run_number)
                    continue

                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if args.dry_run:
                    logger.warning(
                        "Skipping analysis %s run %s MUX %s"
                        " with StatsSubmission %s and ArchiveSubmission %s",
                        analysis_id, run_number, mux_status['mux_id'],
                        mux_status.get('StatsSubmission', None),
                        mux_status.get('ArchiveSubmission', None))
                    continue

                # Call STATS upload
                #
                if mux_status.get('StatsSubmission', None) == "TODO":
                    logger.info(
                        "Stats upload for %s from %s and analysis_id is %s",
                        mux_id, run_number, analysis_id)
                    StatsSubmission = "analysis.{}.per_mux_status.{}.StatsSubmission".format(
                        analysis_count, mux_count)

                    stats_upload_script_cmd = [
                        stats_upload_script, '-o', out_dir, '-m', mux_id
                    ]
                    if args.testing:
                        stats_upload_script_cmd.append("-t")
                    try:
                        _ = subprocess.check_output(stats_upload_script_cmd,
                                                    stderr=subprocess.STDOUT)
                        StatsSubmission_status = "SUCCESS"
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(stats_upload_script_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Resetting to TODO")
                        StatsSubmission_status = "TODO"
                    try:
                        db.update(
                            {
                                "run": run_number,
                                'analysis.analysis_id': analysis_id
                            }, {
                                "$set": {
                                    StatsSubmission: StatsSubmission_status,
                                }
                            })
                    except pymongo.errors.OperationFailure:
                        logger.fatal("MongoDB OperationFailure")
                        sys.exit(0)
                    num_triggers += 1

                # Call FASTQ upload
                #
                if mux_status.get('ArchiveSubmission', None) == "TODO":
                    logger.info(
                        "SRA upload for %s from %s and analysis_id is %s",
                        mux_id, run_number, analysis_id)
                    ArchiveSubmission = "analysis.{}.per_mux_status.{}.ArchiveSubmission".format(
                        analysis_count, mux_count)
                    archive_upload_script_cmd = [
                        archive_upload_script, '-o', out_dir, '-m', mux_id
                    ]
                    if args.testing:
                        archive_upload_script_cmd.append("-t")
                    try:
                        _ = subprocess.check_output(archive_upload_script_cmd,
                                                    stderr=subprocess.STDOUT)
                        ArchiveSubmission_status = "SUCCESS"
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(archive_upload_script_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Resetting to TODO")
                        ArchiveSubmission_status = "TODO"
                    #update mongoDB
                    try:
                        db.update(
                            {
                                "run": run_number,
                                'analysis.analysis_id': analysis_id
                            }, {
                                "$set": {
                                    ArchiveSubmission: ArchiveSubmission_status
                                }
                            })
                    except pymongo.errors.OperationFailure:
                        logger.fatal("MongoDB OperationFailure")
                        sys.exit(0)
                    num_triggers += 1

    # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)

Exemple #27

0

Afficher le fichier

Fichier : delegator.py Projet : wiseflying/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({"analysis.per_mux_status" : {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())
    run_list = {}
    mongo_db_ref = {}
    for record in results:
        run_number = record['run']
        print(run_number)
        mux_list = {}
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue
            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning("mux_status is None or incomplete for run %s analysis %s."
                                   " Requires fix in DB. Skipping entry for now.", \
                                    run_number, analysis_id)
                    continue
                if mux_status.get('Status', None) != "SUCCESS":
                    logger.info("MUX %s from %s is not SUCCESS. Skipping downstream analysis",
                                mux_status['mux_id'], run_number)
                    continue
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']
                mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format(
                    analysis_count, mux_count)
                if mux_status.get('Status') == "SUCCESS" and \
                    mux_status.get('DownstreamSubmission', None) == "TODO":
                    mongo_list = (mux_id, mux_db_id, analysis_id)
                    mongo_db_ref.setdefault(run_number, []).append(mongo_list)
                    mux_list.setdefault(mux_id, []).append(out_dir)
        for mux_id, out_dir in mux_list.items():
            mux_list_success = mux_list[mux_id]
            #Check if MUX has been analyzed more then 1 time successfully
            if len(mux_list_success) > 1:
                body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \
                    + "delegator is skipping the downstream analysis under {}. Please" \
                    "check the results.".format(mux_list_success)
                subject = "Downstream delegator skipped job submission for {}".format(mux_id)
                if args.testing:
                    subject += " (testing)"
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                continue
            mux_info = (mux_id, out_dir)
            run_list.setdefault(run_number, []).append(mux_info)
    for run_num_flowcell, mux_list in run_list.items():
        update_status = True
        pipeline_params_dict = get_lib_details(run_num_flowcell, mux_list, args.testing)
        if not bool(pipeline_params_dict):
            logger.warning("pipeline_paramas_dict is empty for run num %s", run_num_flowcell)
            continue
        for lib, lib_info in pipeline_params_dict.items():
            readunits_list = list()
            for outer_key in lib_info:
                if outer_key == 'readunits':
                    for inner_key in lib_info[outer_key]:
                        readunits_list.append(inner_key)
            lib_info['samples'] = {}
            lib_info['samples'][lib] = readunits_list
            if args.dry_run:
                logger.warning("Skipping job delegation for %s", \
                    lib)
                continue
            res = mongodb_insert_libjob(lib_info, connection)
            if not res:
                logger.critical("Skipping rest of analysis job submission" \
                     "for %s from %s", lib, lib_info.run_id)
                subject = "Downstream delegator failed job submission for" \
                    "{}".format(lib)
                if args.testing:
                    subject += " (testing)"
                body = "Downstream delegator failed to insert job submission for" \
                    "{}".format(lib)
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                update_status = False
                logger.warning("Clean up the database for mux %s from run %s and ctime %s", \
                    lib_info.mux_id, lib_info.run_id, lib_info.ctime)
                mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \
                    lib_info.ctime, connection)
                break
        if not args.dry_run and update_status:
            value = mongo_db_ref[run_num_flowcell]
            for mux_id, insert_id, analysis_id in value:
                logger.info("Update mongoDb runComplete for %s and runnumber is %s" \
                    "and id is %s and analysis_id %s", run_num_flowcell, mux_id, \
                    insert_id, analysis_id)
                res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \
                    insert_id, connection)
                if not res:
                    logger.critical("Skipping rest of analysis job submission for %s" \
                        "from %s", mux_id, run_num_flowcell)
                    subject = "Downstream delegator failed job submission for {}" \
                        .format(mux_id)
                    if args.testing:
                        subject += " (testing)"
                    body = "Downstream delegator failed to insert job submission for" \
                        "{}".format(mux_id)
                    send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                    update_status = False
                    break
    connection.close()

Exemple #28

0

Afficher le fichier

Fichier : mongo_status.py Projet : gis-rpd/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-r', "--runid",
                        help="Run ID plus flowcell ID", required=True,)
    parser.add_argument('-s', "--status",
                        help="Analysis status", required=True,
                        choices=['STARTED', 'SUCCESS', 'FAILED', 'SEQRUNFAILED'])
    parser.add_argument('-a', "--analysis-id",
                        help="Analysis id", required=True)
    parser.add_argument('-o', "--out",
                        help="Analysis output directory")
    parser.add_argument('-t', "--test_server", action='store_true')
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)

    run_number = args.runid
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.runcomplete
    logger.debug("DB %s", db)
    logger.info("Status for %s is %s", run_number, args.status)
    if args.status in ["STARTED", "SEQRUNFAILED"]:
        try:
            if not args.dry_run:
                db.update({"run": run_number},
                          {"$push":
                           {"analysis": {
                               "analysis_id" : args.analysis_id,
                               "user_name" : user_name,
                               "out_dir" : args.out,
                               "Status" :  args.status,
                           }}})

        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)

    elif args.status in ["SUCCESS", "FAILED"]:
        end_time = generate_timestamp()
        logger.info("Setting timestamp to %s", end_time)
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {"$set":
                           {"analysis.$": {
                               "analysis_id" : args.analysis_id,
                               "end_time" : end_time,
                               "user_name" : user_name,
                               "out_dir" : args.out,
                               "Status" :  args.status,
                           }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)

    else:
        raise ValueError(args.status)

    # close the connection to MongoDB
    connection.close()

Exemple #29

0

Afficher le fichier

Fichier : send_email_status.py Projet : gis-rpd/pipelines

def main():
    """main function
    """
    stats_upload_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "bcl_stats_upload.py"))
    assert os.path.exists(stats_upload_script)
    archive_upload_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "sra_fastq_upload.py"))
    assert os.path.exists(archive_upload_script)

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping sending of emails")
        sys.exit(0)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_emails = 0
    results = db.find({"analysis" : {"$exists": True},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %s runs", results.count())

    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
    else:
        #mail_to = '*****@*****.**'
        mail_to = '*****@*****.**'

    for record in results:
        run_number = record['run']
        #print(run_number)
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue

            for (mux_count, mux_status) in enumerate(per_mux_status):
                if args.dry_run:
                    logger.warning("Skipping analysis %s run %s MUX %s"
                                   " with email_sent %s",
                                   analysis_id, run_number, mux_status['mux_id'],
                                   mux_status.get('email_sent', None))
                    continue

                if mux_status.get('email_sent', None):
                    continue

                # for all others: send email and update db

                email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format(
                    analysis_count, mux_count)
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']

                if mux_status.get('Status', None) == "FAILED":
                    logger.info("bcl2fastq for MUX %s from %s failed. ",
                                mux_status['mux_id'], run_number)
                    subject = 'bcl2fastq: ' + mux_id
                    body = "bcl2fastq for {} from {} failed.".format(mux_id, run_number)
                    body += "\n\nPlease check the logs under {}".format(out_dir + "/logs")
                    send_mail(subject, body, mail_to, ccaddr="rpd")
                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id, email_sent_query, True)

                elif mux_status.get('Status', None) == "SUCCESS":
                    muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir'))
                    summary = path_to_url(os.path.join(muxdir, 'html/index.html'))
                    body = "bcl2fastq for {} from {} successfully completed.".format(
                        mux_id, run_number)
                    body += "\n\nA summary can be found at {}".format(summary)
                    body += "\n\nFastQ files are located in {}".format(muxdir)
                    body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)"
                    
                    confinfo = os.path.join(out_dir, 'conf.yaml')
                    #print(body)
                    if not os.path.exists(confinfo):
                        logger.fatal("conf info '%s' does not exist"
                                     " under run directory.", confinfo)
                        continue

                    subject = 'bcl2fastq'
                    if args.testing:
                        subject += ' testing'
                    if is_devel_version():
                        subject += ' devel'
                    subject += ': ' + mux_id
                    send_mail(subject, body, mail_to, ccaddr="rpd")# mail_to already set

                    if not args.testing and not is_devel_version():
                        requestor = get_requestor(mux_id, confinfo)
                        if requestor is not None:
                            #requestor = "rpd"
                            #subject += " (instead of requestor)"
                            #send_mail(subject, body, requestor, ccaddr="rpd")
                            send_mail(subject, body, requestor)

                    num_emails += 1
                    update_mongodb_email(db, run_number, analysis_id, email_sent_query, True)


    # close the connection to MongoDB
    connection.close()
    logger.info("%d emails sent", num_emails)

Exemple #30

0

Afficher le fichier

Fichier : novogene_data_transfer.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1', "--break-after-first", action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Don't run anything")
    default = "NSCC"
    parser.add_argument('-s', "--site", default=default,
                        help="site information (default = {})".format(default),
                        choices=['NSCC', 'GIS'])
    default = 14
    parser.add_argument('-w', '--win', type=int, default=default,
                        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    if not is_production_user():
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(1)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    if is_devel_version() or args.testing:
        mail_to = 'veeravallil'# domain added in mail function
    else:
        mail_to = '*****@*****.**'
    run_records = runs_from_db(connection, args.testing, args.win)
    trigger = 0
    for run in run_records:
        for mux, mux_info in run.items():
            if args.dry_run:
                logger.warning("Skipping job delegation for %s from %s", mux, mux_info[0])
                continue
            #Check if mux data is getting transferred
            find = check_mux_data_transfer_status(connection, mux_info)
            if find:
                continue
            res = start_data_transfer(connection, mux, mux_info, args.site, mail_to)
            if res:
                trigger = 1
        if args.break_after_first and trigger == 1:
            logger.info("Stopping after first run")
            break
    # close the connection to MongoDB
    connection.close()

Exemple #31

0

Afficher le fichier

Fichier : delegator.py Projet : yuanjingnan/pipelines

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument(
        '-t',
        "--testing",
        action='store_true',
        help=
        "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    results = db.find({
        "analysis.per_mux_status": {
            "$exists": True
        },
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())
    run_list = {}
    mongo_db_ref = {}
    for record in results:
        run_number = record['run']
        mux_list = {}
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue
            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning("mux_status is None or incomplete for run %s analysis %s."
                                   " Requires fix in DB. Skipping entry for now.", \
                                    run_number, analysis_id)
                    continue
                if mux_status.get('Status', None) != "SUCCESS":
                    logger.info(
                        "MUX %s from %s is not SUCCESS. Skipping downstream analysis",
                        mux_status['mux_id'], run_number)
                    continue
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']
                mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format(
                    analysis_count, mux_count)
                if mux_status.get('Status') == "SUCCESS" and \
                    mux_status.get('DownstreamSubmission') == "TODO":
                    mongo_list = (mux_id, mux_db_id, analysis_id)
                    mongo_db_ref.setdefault(run_number, []).append(mongo_list)
                    mux_list.setdefault(mux_id, []).append(out_dir)
        for mux_id, out_dir in mux_list.items():
            mux_list_success = mux_list[mux_id]
            #Check if MUX has been analyzed more then 1 time successfully
            if len(mux_list_success) > 1:
                body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \
                    + "delegator is skipping the downstream analysis under {}. Please" \
                    "check the results.".format(mux_list_success)
                subject = "Downstream delegator skipped job submission for {}".format(
                    mux_id)
                if args.testing:
                    subject += " (testing)"
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                continue
            mux_info = (mux_id, out_dir)
            run_list.setdefault(run_number, []).append(mux_info)
    for run_num_flowcell, mux_list in run_list.items():
        update_status = True
        pipeline_params_dict, mux_analysis_list = get_lib_details(run_num_flowcell, \
            mux_list, args.testing)
        if not bool(pipeline_params_dict):
            logger.warning("pipeline params is empty for run num %s",
                           run_num_flowcell)
            continue
        # Insert jobs into pipeline_runs collection
        for lib, lib_info in pipeline_params_dict.items():
            job = {}
            rd_list = {}
            job['sample_cfg'] = {}
            job['sample_cfg'] = {}
            readunits_list = list()
            rd_list['samples'] = {}
            for outer_key, outer_value in lib_info.items():
                if outer_key == 'readunits':
                    for inner_key in lib_info[outer_key]:
                        readunits_list.append(inner_key)
                    job['sample_cfg'].update({outer_key: outer_value})
                if outer_key == 'references_cfg':
                    job['references_cfg'] = {}
                    job['references_cfg'] = outer_value
                elif outer_key == 'cmdline':
                    job['cmdline'] = {}
                    job['cmdline'] = outer_value
                elif outer_key != 'readunits':
                    job.update({outer_key: outer_value})
                else:
                    rd_list['samples'][lib] = readunits_list
                    job['sample_cfg'].update(rd_list)

            if args.dry_run:
                logger.warning("Skipping job delegation for %s", \
                    lib)
                continue
            res = mongodb_insert_libjob(job, connection)
            if not res:
                logger.critical("Skipping rest of analysis job submission" \
                     "for %s from %s", lib, lib_info.run_id)
                subject = "Downstream delegator failed job submission for" \
                    "{}".format(lib)
                if args.testing:
                    subject += " (testing)"
                body = "Downstream delegator failed to insert job submission for" \
                    "{}".format(lib)
                send_mail(subject, body, toaddr='veeravallil', ccaddr=None)
                update_status = False
                logger.warning("Clean up the database for mux %s from run %s and ctime %s", \
                    lib_info.mux_id, lib_info.run_id, lib_info.ctime)
                mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \
                    lib_info.ctime, connection)
                break
        # Update runcomplete collection for delegated jobs
        if not args.dry_run and update_status:
            value = mongo_db_ref[run_num_flowcell]
            for mux_id, insert_id, analysis_id in value:
                if mux_id in mux_analysis_list:
                    logger.info("Update mongoDb pipeline_runs for mux_id %s from run number %s" \
                        "and analysis_id is %s", mux_id, run_num_flowcell, analysis_id)
                    res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \
                        insert_id, connection)
                    if not res:
                        logger.critical("Skipping rest of analysis job submission for %s" \
                            "from %s", mux_id, run_num_flowcell)
                        subject = "Downstream delegator failed job submission for {}" \
                            .format(mux_id)
                        if args.testing:
                            subject += " (testing)"
                        body = "Downstream delegator failed to insert job submission for" \
                            "{}".format(mux_id)
                        send_mail(subject,
                                  body,
                                  toaddr='veeravallil',
                                  ccaddr=None)
                        update_status = False
                        break
    connection.close()

Exemple #32

0

Afficher le fichier

def main():
    """main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '-n',
        "--dryrun",
        action='store_true',
        help="Don't actually update DB (best used in conjunction with -v -v)")
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test-server. Don't do anything")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every
    LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not is_production_user():
        LOGGER.warning("Not a production user. Exiting")
        sys.exit(1)

    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    #LOGGER.info("Database connection established")
    dbcol = connection.gisds.pipeline_runs
    site = get_site()
    epoch_now, epoch_then = generate_window(args.win)
    cursor = dbcol.find({
        "ctime": {
            "$gt": epoch_then,
            "$lt": epoch_now
        },
        "site": site
    })
    LOGGER.info("Looping through {} jobs".format(cursor.count()))
    for job in cursor:
        dbid = job['_id']
        # only set here to avoid code duplication below
        try:
            out_dir = job['execution']['out_dir']
        except KeyError:
            out_dir = None

        # no execution dict means start a new analysis
        if not job.get('execution'):
            LOGGER.info('Job {} to be started'.format(dbid))
            # determine out_dir and set in DB
            # out_dir_override will take precedence over generating out_dir with get_downstream_outdir function
            if job.get('out_dir_override'):
                out_dir = job.get('out_dir_override')
                if os.path.exists(out_dir):
                    mux = os.path.basename(out_dir)
                    if not args.dryrun:
                        LOGGER.critical(
                            "Analysis for {} already exists under {}. Please start the analysis manually"
                            .format(mux, out_dir))
                        res = dbcol.update_one(
                            {"_id": ObjectId(dbid)},
                            {"$set": {
                                "execution.status": "MANUAL"
                            }})
                        assert res.modified_count == 1, (
                            "Modified {} documents instead of 1".format(
                                res.modified_count))
                        sys.exit(1)
                #assert not os.path.exists(out_dir), ("Direcotry already exists {}").format(out_dir)
            else:
                out_dir = get_downstream_outdir(job['requestor'],
                                                job['pipeline_name'],
                                                job['pipeline_version'])
            # Note, since execution (key) exists, accidental double
            # starts are prevented even before start time etc is
            # logged via flagfiles.  No active logging here so that
            # flag files logging just works.

            if args.dryrun:
                LOGGER.info("Skipping dry run option")
                continue
            status = start_cmd_execution(job, site, out_dir, args.testing)
            if status:
                res = dbcol.update_one(
                    {"_id": ObjectId(dbid)},
                    {"$set": {
                        "execution.out_dir": out_dir
                    }})
                assert res.modified_count == 1, (
                    "Modified {} documents instead of 1".format(
                        res.modified_count))
            else:
                LOGGER.warning("Job {} could not be started".format(dbid))
        elif job['execution'].get('status') == "MANUAL":
            continue
        elif list_starterflags(
                out_dir
        ):  # out_dir cannot be none because it's part of execution dict
            LOGGER.info(
                'Job {} in {} started but not yet logged as such in DB'.format(
                    dbid, out_dir))

            matches = list_starterflags(out_dir)
            assert len(matches) == 1, (
                "Got several starter flags in {}".format(out_dir))
            sflag = StarterFlag(matches[0])
            assert sflag.dbid == str(dbid)
            set_started(dbcol,
                        sflag.dbid,
                        str(sflag.timestamp),
                        dryrun=args.dryrun)
            os.unlink(sflag.filename)

        elif job['execution'].get('status') in ['STARTED', 'RESTART']:
            LOGGER.info(
                'Job %s in %s set as re|started so checking on completion',
                dbid, out_dir)
            set_completion_if(dbcol, dbid, out_dir, dryrun=args.dryrun)

        else:
            # job complete
            LOGGER.debug('Job %s in %s should be completed', dbid, out_dir)
    LOGGER.info("Successful program exit")

Exemple #33

0

Afficher le fichier

Fichier : run_legacy_pipelines.py Projet : yuanjingnan/pipelines

def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test-server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    #Check if pipeline scripts are available
    assert os.path.exists(BWA)
    assert os.path.exists(RNA)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({
        "analysis.Status": "SUCCESS",
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        # Downstream analysis will not be intiated for Novogene (NG00*) runs
        if "NG00" in run_number:
            continue
        for analysis in record['analysis']:
            out_dir = analysis.get("out_dir")

            #Check if bcl2Fastq is completed successfully
            if 'Status' in analysis and analysis.get("Status") == "SUCCESS":
                if not os.path.exists(out_dir):
                    logger.critical(
                        "Following directory listed in DB doesn't exist: %s",
                        out_dir)
                    continue

                #Check if downstream analysis has been started
                if not os.path.exists(
                        os.path.join(out_dir,
                                     "config_casava-1.8.2.txt".format())):
                    logger.info("Start the downstream analysis at %s", out_dir)
                    os.makedirs(os.path.join(out_dir, LOG_DIR_REL),
                                exist_ok=True)
                    #generate config file
                    config_cmd = [CONFIG, '-r', run_number]
                    try:
                        f = open(
                            os.path.join(out_dir,
                                         "config_casava-1.8.2.txt".format()),
                            "w")
                        _ = subprocess.call(config_cmd,
                                            stderr=subprocess.STDOUT,
                                            stdout=f)
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(config_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #generic sample sheet
                    samplesheet_cmd = 'cd {} && {} -r {}'.format(
                        out_dir, SAMPLESHEET, run_number)
                    try:
                        _ = subprocess.check_output(samplesheet_cmd,
                                                    shell=True)
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(samplesheet_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #Generate and Submit BWA and RNAseq mapping pipeline
                    _, runid, _ = get_machine_run_flowcell_id(run_number)
                    generic_samplesheet = (os.path.join(
                        out_dir, runid + "_SampleSheet.csv"))
                    if os.path.exists(
                            os.path.join(out_dir, generic_samplesheet)):
                        dirs = os.path.join(out_dir, "out")
                        cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \
                            .format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, \
                                generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG))
                        cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \
                            .format(RNA, run_number, out_dir, os.path.join(out_dir, \
                                generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG))
                        if args.dry_run:
                            logger.warning("Skipped following run: %s", cmd)
                            #Remove config txt
                            os.remove(
                                os.path.join(
                                    out_dir,
                                    "config_casava-1.8.2.txt".format()))
                        else:
                            try:
                                #ananlysisReport into submission log
                                with open(os.path.join(out_dir, SUBMISSIONLOG),
                                          'w') as fh:
                                    fh.write(cmd)
                                _ = subprocess.check_output(cmd, shell=True)
                            except subprocess.CalledProcessError as e:
                                logger.fatal(
                                    "The following command failed with return code %s: %s",
                                    e.returncode, cmd)
                                logger.fatal("Output: %s", e.output.decode())
                                logger.fatal("Exiting")
                                #send_status_mail
                                send_status_mail(PIPELINE_NAME, False, analysis_id, \
                                    os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log"))
                                sys.exit(1)
                            num_triggers += 1

                        if args.break_after_first:
                            logger.info("Stopping after first sequencing run")
                            sys.exit(0)
                    else:
                        #send_status_mail
                        logger.info("samplesheet.csv missing for %s under %s",
                                    run_number, out_dir)
                        send_status_mail(PIPELINE_NAME, False, analysis_id, \
                            os.path.abspath(out_dir))
            elif analysis.get("Status") == "FAILED":
                logger.debug("BCL2FASTQ FAILED for %s under %s", run_number,
                             out_dir)
    # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)

Exemple #34

0

Afficher le fichier

Fichier : accounting.py Projet : yuanjingnan/pipelines

def main():
    """
    Main function
    """
    instance = ArgumentParser(description=__doc__)
    instance.add_argument("-j",
                          "--jobNo",
                          nargs="*",
                          help="filter records by jobNo of jobs")
    instance.add_argument("-o",
                          "--owner",
                          nargs="*",
                          help="filter records by owner of jobs")
    args = instance.parse_args()

    if (not args.jobNo) and (args.owner):
        for document in mongodb_conn(False).gisds.accountinglogs.find(
            {"jobs.owner": {
                "$in": args.owner
            }}):
            for job in document["jobs"]:
                if job["owner"] in args.owner:
                    job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"]))
                    job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB"
                    job["ruWallClock"] = strftime("%Hh%Mm%Ss",
                                                  gmtime(job["ruWallClock"]))
                    job["submissionTime"] = str(
                        datetime.fromtimestamp(
                            job["submissionTime"]).isoformat()).replace(
                                ":", "-")
                    PrettyPrinter(indent=2).pprint(job)

    if (args.jobNo) and (not args.owner):
        for document in mongodb_conn(False).gisds.accountinglogs.find(
            {"jobs.jobNo": {
                "$in": args.jobNo
            }}):
            for job in document["jobs"]:
                if job["jobNo"] in args.jobNo:
                    job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"]))
                    job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB"
                    job["ruWallClock"] = strftime("%Hh%Mm%Ss",
                                                  gmtime(job["ruWallClock"]))
                    job["submissionTime"] = str(
                        datetime.fromtimestamp(
                            job["submissionTime"]).isoformat()).replace(
                                ":", "-")
                    PrettyPrinter(indent=2).pprint(job)

    if args.jobNo and args.owner:
        for document in mongodb_conn(False).gisds.accountinglogs.find({
                "jobs.jobNo": {
                    "$in": args.jobNo
                },
                "jobs.owner": {
                    "$in": args.owner
                }
        }):
            for job in document["jobs"]:
                if (job["jobNo"] in args.jobNo) and (job["owner"]
                                                     in args.owner):
                    job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"]))
                    job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB"
                    job["ruWallClock"] = strftime("%Hh%Mm%Ss",
                                                  gmtime(job["ruWallClock"]))
                    job["submissionTime"] = str(
                        datetime.fromtimestamp(
                            job["submissionTime"]).isoformat()).replace(
                                ":", "-")
                    PrettyPrinter(indent=2).pprint(job)