def get_started_outdirs_from_db(testing=True, win=None): """FIXME:add-doc""" connection = mongodb_conn(testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete if win: epoch_present, epoch_back = generate_window(win) results = db.find({ "analysis.Status": "STARTED", "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) else: results = db.find({"analysis.Status": "STARTED"}) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %d runs", results.count()) for record in results: logger.debug("record: %s", record) #run_number = record['run'] # we might have several analysis runs: for analysis in record['analysis']: yield analysis["out_dir"]
def instantiate_mongo(testing): """ Instantiates MongoDB database object For Test Server, testing == True For Production Server, testing == False """ return mongodb_conn(testing).gisds.runcomplete
def main(): """ Main function """ instance = ArgumentParser(description=__doc__) instance.add_argument("-j", "--jobNo", nargs="*", help="filter records by jobNo of jobs") instance.add_argument("-o", "--owner", nargs="*", help="filter records by owner of jobs") args = instance.parse_args() if (not args.jobNo) and (args.owner): for document in mongodb_conn(False).gisds.accountinglogs.find({"jobs.owner": {"$in": args.owner}}): for job in document["jobs"]: if job["owner"] in args.owner: job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"])) job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB" job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"])) job["submissionTime"] = str(datetime.fromtimestamp( job["submissionTime"]).isoformat()).replace(":", "-") PrettyPrinter(indent=2).pprint(job) if (args.jobNo) and (not args.owner): for document in mongodb_conn(False).gisds.accountinglogs.find({"jobs.jobNo": {"$in": args.jobNo}}): for job in document["jobs"]: if job["jobNo"] in args.jobNo: job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"])) job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB" job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"])) job["submissionTime"] = str(datetime.fromtimestamp( job["submissionTime"]).isoformat()).replace(":", "-") PrettyPrinter(indent=2).pprint(job) if args.jobNo and args.owner: for document in mongodb_conn(False).gisds.accountinglogs.find({"jobs.jobNo": {"$in": args.jobNo}, "jobs.owner": {"$in": args.owner}}): for job in document["jobs"]: if (job["jobNo"] in args.jobNo) and (job["owner"] in args.owner): job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"])) job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB" job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"])) job["submissionTime"] = str(datetime.fromtimestamp( job["submissionTime"]).isoformat()).replace(":", "-") PrettyPrinter(indent=2).pprint(job)
def check_mongo(): """ Instantiates MongoDB database object For Test Server, testing == True For Production Server, testing == False """ warnings = "" epoch_present, epoch_window = generate_window(MAX_WINDOW) epoch_present, epoch_started = generate_window(MAX_RUN) del epoch_present query = {} query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started} query["analysis.Status"] = "STARTED" mongo = mongodb_conn(False).gisds.runcomplete.find(query) count_warnings = 0 for record in mongo: # PrettyPrinter(indent=2).pprint(record) if record["analysis"][-1]["Status"] != "SUCCESS": warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n") count_warnings += 1 if count_warnings > 0: warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n") query = {} query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started} query["analysis"] = {"$exists": False} mongo = mongodb_conn(False).gisds.runcomplete.find(query) count_warnings = 0 for record in mongo: # PrettyPrinter(indent=2).pprint(record) warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n") count_warnings += 1 if count_warnings > 0: warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n") return warnings
def runs_from_db(testing=True, win=34): """Get the runs from pipeline_run collections""" connection = mongodb_conn(testing) if connection is None: sys.exit(1) db = connection.gisds.pipeline_runs epoch_present, epoch_back = generate_window(win) results = db.find({"runs": {"$exists": True}, "ctime": {"$gt": 1470127013000, "$lt": 1470127093000}}) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %d runs for last %s days", results.count(), win) for record in results: logger.debug("record: %s", record) for runs in record['runs']: if runs["status"] == "STARTED": test = (record['_id'], record['out_dir'], runs['start_time']) yield test
def main(): """ Main function """ instance = ArgumentParser(description=__doc__) instance.add_argument("-m", "--mux", required=True, help="MUX_ID to generate OUT_DIR") args = instance.parse_args() for document in mongodb_conn(False).gisds.runcomplete.find( {"analysis.per_mux_status.mux_id": args.mux}): if "analysis" in document: last_out_dir = "" for analysis in document["analysis"]: if analysis["Status"].upper() != "FAILED": if "per_mux_status" in analysis: for mux in analysis["per_mux_status"]: if mux["mux_id"] == args.mux: last_out_dir = analysis["out_dir"].replace("//", "/") print(last_out_dir)
def get_outdirs_from_db(testing=True, win=14): """FIXME:add-doc""" connection = mongodb_conn(testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(win) results = db.find({"analysis": {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %d runs for last %s days", results.count(), win) for record in results: logger.debug("record: %s", record) #run_number = record['run'] # we might have several analysis runs: for analysis in record['analysis']: yield analysis["out_dir"]
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) default = 34 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function ccaddr = None else: mail_to = '*****@*****.**' ccaddr = "rpd" runs_from_db(db, mail_to, ccaddr, args.win)
def main(): """ Main function """ instance = ArgumentParser(description=__doc__) instance.add_argument("-m", "--mux", required=True, help="MUX_ID to generate OUT_DIR") args = instance.parse_args() for document in mongodb_conn(False).gisds.runcomplete.find( {"analysis.per_mux_status.mux_id": args.mux}): if "analysis" in document: last_out_dir = "" for analysis in document["analysis"]: if analysis["Status"].upper() != "FAILED": if "per_mux_status" in analysis: for mux in analysis["per_mux_status"]: if mux["mux_id"] == args.mux: last_out_dir = analysis["out_dir"].replace( "//", "/") print(last_out_dir)
def main(): """ Main function """ instance = ArgumentParser(description=__doc__) instance.add_argument("-d", "--dir", help="specify one MUX_ID to generate OUT_DIR") instance.add_argument("-l", "--lib", help="specify one MUX_ID to generate LIB") args = instance.parse_args() if args.dir: for document in mongodb_conn(False).gisds.runcomplete.find({"analysis.per_mux_status.mux_id": args.dir}): if "analysis" in document: last_out_dir = "" for analysis in document["analysis"]: if analysis["Status"].upper() != "FAILED": if "per_mux_status" in analysis: for mux in analysis["per_mux_status"]: if mux["mux_id"] == args.dir: last_out_dir = analysis["out_dir"].replace("//", "/") print (last_out_dir) if args.lib: for lib in mux_to_lib(args.lib, testing=False): print (lib)
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-r', "--runid", help="Run ID plus flowcell ID", required=True,) parser.add_argument('-a', "--analysis-id", help="Analysis id / start time", required=True) parser.add_argument('-i', "--mux-id", help="mux-id", required=True) parser.add_argument('-d', "--mux-dir", help="mux-dir", required=True) parser.add_argument('-s', "--mux-status", help="Analysis status", required=True, choices=['SUCCESS', 'FAILED', 'NOARCHIVE']) parser.add_argument('-t', "--test-server", action='store_true') parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) run_number = args.runid.rstrip() connection = mongodb_conn(args.test_server) if connection is None: sys.exit(1) logger.info("Database connection established") db = connection.gisds.runcomplete if args.mux_status == "SUCCESS": try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, { "$push": { "analysis.$.per_mux_status": { "mux_id" : args.mux_id, "mux_dir" : args.mux_dir, "Status" : args.mux_status, "StatsSubmission" : "TODO", "ArchiveSubmission" : "TODO", "DownstreamSubmission" : "TODO", "email_sent" : False, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) elif args.mux_status == "FAILED": try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, { "$push": { "analysis.$.per_mux_status": { "mux_id" : args.mux_id, "mux_dir" : args.mux_dir, "Status" : args.mux_status, "email_sent" : False, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) elif args.mux_status == "NOARCHIVE": try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, { "$push": { "analysis.$.per_mux_status": { "mux_id" : args.mux_id, "mux_dir" : args.mux_dir, "Status" : args.mux_status, "StatsSubmission" : "NOARCHIVE", "ArchiveSubmission" : "NOARCHIVE", "DownstreamSubmission" : "TODO", "email_sent" : True, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) else: raise ValueError(args.status) # close the connection to MongoDB connection.close()
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dryrun", action='store_true', help="Don't run anything") default = 84 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) default = 60 parser.add_argument( '-d', '--days', type=int, default=default, help="Bcl analysis not older than days(default {})".format(default)) default = 60 parser.add_argument( '-r', '--tardays', type=int, default=default, help="tar ball not older than days(default {})".format(default)) parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Skipping archival steps") sys.exit(1) if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: mail_to = 'rpd' connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({ "run": { "$regex": "^((?!NG00).)*$" }, "raw-delete": { "$exists": False }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) LOGGER.info("Looping through %s jobs", results.count()) trigger = 0 for record in results: try: run_num = record['run'] except KeyError: run_num = None if not record.get('deletion'): #Check run_status res = check_run_status(record, args.days) if res: LOGGER.info("Create tar ball %s ", run_num) if args.dryrun: LOGGER.warning("Skipping Create tar ball %s ", run_num) continue create_run_tar(db, run_num) trigger = 1 elif record['deletion'].get('tar'): res = check_tar_status_and_delete(db, record, args.tardays, dryrun=args.dryrun) if res: trigger = 1 if args.break_after_first and trigger == 1: LOGGER.info("Stopping after first run") break
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-r", "--DBid", help="DB Id", required=True) parser.add_argument( "-s", "--status", help="Analysis status", required=True, choices=["STARTED", "SUCCESS", "FAILED", "ORPHAN"] ) parser.add_argument("-st", "--start-time", help="Start time", required=True) parser.add_argument("-o", "--out", help="Analysis output directory") parser.add_argument("-t", "--test_server", action="store_true") parser.add_argument("-n", "--dry-run", action="store_true", help="Dry run") parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity") parser.add_argument("-q", "--quiet", action="count", default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) _id = args.DBid connection = mongodb_conn(args.test_server) if connection is None: sys.exit(1) logger.info("Database connection established") db = connection.gisds.pipeline_runs logger.debug("DB %s", db) logger.info("Status for %s is %s", _id, args.status) if args.status in ["STARTED"]: try: if not args.dry_run: db.update( {"_id": ObjectId(_id)}, {"$push": {"runs": {"start_time": args.start_time, "status": args.status}}} ) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) elif args.status in ["SUCCESS", "FAILED"]: end_time = generate_timestamp() logger.info("Setting timestamp to %s", end_time) try: if not args.dry_run: db.update( {"_id": ObjectId(_id), "runs.start_time": args.start_time}, {"$set": {"runs.$": {"start_time": args.start_time, "end_time": end_time, "status": args.status}}}, ) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) else: raise ValueError(args.status) # close the connection to MongoDB connection.close()
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-r', "--runid", help="Run ID plus flowcell ID", required=True,) parser.add_argument('-a', "--analysis-id", help="Analysis id / start time", required=True) parser.add_argument('-i', "--mux-id", help="mux-id", required=True) parser.add_argument('-d', "--mux-dir", help="mux-dir", required=True) parser.add_argument('-s', "--mux-status", help="Analysis status", required=True, choices=['SUCCESS', 'FAILED', 'NOARCHIVE']) parser.add_argument('-t', "--test_server", action='store_true') parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) run_number = args.runid.rstrip() connection = mongodb_conn(args.test_server) if connection is None: sys.exit(1) logger.info("Database connection established") db = connection.gisds.runcomplete if args.mux_status == "SUCCESS": try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, { "$push": { "analysis.$.per_mux_status": { "mux_id" : args.mux_id, "mux_dir" : args.mux_dir, "Status" : args.mux_status, "StatsSubmission" : "TODO", "ArchiveSubmission" : "TODO", "DownstreamSubmission" : "TODO", "email_sent" : False, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) elif args.mux_status == "FAILED": try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, { "$push": { "analysis.$.per_mux_status": { "mux_id" : args.mux_id, "mux_dir" : args.mux_dir, "Status" : args.mux_status, "email_sent" : False, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) elif args.mux_status == "NOARCHIVE": try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, { "$push": { "analysis.$.per_mux_status": { "mux_id" : args.mux_id, "mux_dir" : args.mux_dir, "Status" : args.mux_status, "StatsSubmission" : "NOARCHIVE", "ArchiveSubmission" : "NOARCHIVE", "DownstreamSubmission" : "TODO", "email_sent" : True, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) else: raise ValueError(args.status) # close the connection to MongoDB connection.close()
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-e', "--wrapper-args", nargs="*", help="Extra arguments for bcl2fastq wrapper" " (prefix leading dashes with X, e.g. X-n for -n)") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) bcl2fastq_wrapper = os.path.join(os.path.dirname(sys.argv[0]), "bcl2fastq.py") connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete # db query for jobs that are yet to be analysed in the epoch window epoch_present, epoch_back = generate_window(args.win) results = db.find({ "analysis": { "$exists": 0 }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] logger.debug("Processing record %s", record) cmd = [bcl2fastq_wrapper, "-r", run_number, "-v"] if args.testing: cmd.append("-t") if args.wrapper_args: cmd.extend([x.lstrip('X') for x in args.wrapper_args]) if args.dry_run: logger.warning("Skipped following run: %s", ' '.join(cmd)) continue else: try: logger.info("Executing: %s", ' '.join(cmd)) res = subprocess.check_output(cmd, stderr=subprocess.STDOUT) if res: logger.info("bcl2fastq wrapper returned:\n%s", res.decode().rstrip()) except subprocess.CalledProcessError as e: logger.critical( "The following command failed with" " return code %s: %s", e.returncode, ' '.join(cmd)) logger.critical("Full error message was: %s", e.stdout) if 'commlib error' in e.stdout.decode(): logger.critical( "Looks like a qmaster problem (commlib error). Exiting" ) break else: logger.critical("Will keep going") # continue so that a failed run doesn't count, # i.e. args.break_after_first shouldn't be trigger continue if args.break_after_first: logger.info("Stopping after first sequencing run") break # close the connection to MongoDB connection.close() logger.info("Successful program exit")
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-s', "--site", help="site information") parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) if not args.site: site = 'NSCC' else: site = args.site connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.pipeline_runs epoch_present, epoch_back = generate_window(args.win) results = db.find({ "run": { "$exists": False }, "site": site, "ctime": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs to start analysis", results.count()) for record in results: start_analysis(record, args.testing, args.dry_run)
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) #Check if pipeline scripts are available assert os.path.exists(BWA) assert os.path.exists(RNA) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_triggers = 0 results = db.find({"analysis.Status": "SUCCESS", "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] analysis = record['analysis'] for analysis in record['analysis']: out_dir = analysis.get("out_dir") #Check if bcl2Fastq is completed successfully if 'Status' in analysis and analysis.get("Status") == "SUCCESS": if not os.path.exists(out_dir): logger.critical("Following directory listed in DB doesn't exist: %s", out_dir) continue #Check if downstream analysis has been started if not os.path.exists(os.path.join(out_dir, "config_casava-1.8.2.txt".format())): logger.info("Start the downstream analysis at %s", out_dir) os.makedirs(os.path.join(out_dir, LOG_DIR_REL), exist_ok=True) #generate config file config_cmd = [CONFIG, '-r', run_number] try: f = open(os.path.join(out_dir, "config_casava-1.8.2.txt".format()), "w") _ = subprocess.call(config_cmd, stderr=subprocess.STDOUT, stdout=f) except subprocess.CalledProcessError as e: logger.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(config_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") sys.exit(1) #Generate and Submit BWA and RNAseq mapping pipeline if os.path.exists(os.path.join(out_dir, "samplesheet.csv".format())): dirs = os.path.join(out_dir, "out") cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG)) cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(RNA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG)) if args.dry_run: logger.warning("Skipped following run: %s", cmd) #Remove config txt os.remove(os.path.join(out_dir, "config_casava-1.8.2.txt".format())) else: try: #ananlysisReport into submission log with open(os.path.join(out_dir, SUBMISSIONLOG), 'w') as fh: fh.write(cmd) _ = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: logger.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") #send_status_mail send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log")) sys.exit(1) num_triggers += 1 if args.break_after_first: logger.info("Stopping after first sequencing run") sys.exit(0) else: #send_status_mail logger.info("samplesheet.csv missing for %s under %s", run_number, out_dir) send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.abspath(out_dir)) elif analysis.get("Status") == "FAILED": logger.debug("BCL2FASTQ FAILED for %s under %s", run_number, out_dir) # close the connection to MongoDB connection.close() logger.info("%s dirs with triggers", num_triggers)
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '-r', "--runid", help="Run ID plus flowcell ID", required=True, ) parser.add_argument( '-s', "--status", help="Analysis status", required=True, choices=['STARTED', 'SUCCESS', 'FAILED', 'SEQRUNFAILED', 'NON-BCL']) parser.add_argument('-a', "--analysis-id", help="Analysis id", required=True) parser.add_argument('-o', "--out", help="Analysis output directory") parser.add_argument('-t', "--test-server", action='store_true') parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) user_name = "userrig" run_number = args.runid connection = mongodb_conn(args.test_server) if connection is None: sys.exit(1) logger.info("Database connection established") db = connection.gisds.runcomplete logger.debug("DB %s", db) logger.info("Status for %s is %s", run_number, args.status) if args.status in ["STARTED", "SEQRUNFAILED"]: try: if not args.dry_run: res = db.update_one({"run": run_number}, { "$push": { "analysis": { "analysis_id": args.analysis_id, "user_name": user_name, "out_dir": args.out, "Status": args.status, } } }) assert res.modified_count == 1, ( "Modified {} documents instead of 1".format( res.modified_count)) except (pymongo.errors.OperationFailure, AssertionError) as e: logger.fatal( "MongoDB update failure while setting run %s analysis_id %s to %s", run_number, args.analysis_id, args.status) sys.exit(1) elif args.status in ["SUCCESS", "FAILED"]: end_time = generate_timestamp() logger.info("Setting timestamp to %s", end_time) try: if not args.dry_run: res = db.update_one( { "run": run_number, 'analysis.analysis_id': args.analysis_id }, { "$set": { "analysis.$": { "analysis_id": args.analysis_id, "end_time": end_time, "user_name": user_name, "out_dir": args.out, "Status": args.status, } } }) assert res.modified_count == 1, ( "Modified {} documents instead of 1".format( res.modified_count)) except (pymongo.errors.OperationFailure, AssertionError) as e: logger.fatal( "MongoDB update failure while setting run %s analysis_id %s to %s", run_number, args.analysis_id, args.status) sys.exit(1) else: raise ValueError(args.status) # close the connection to MongoDB connection.close()
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") default = 34 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) default = 75 parser.add_argument( '-d', '--days', type=int, default=default, help="Bcl analysis not older than days(default {})".format(default)) parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Skipping archival steps") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: mail_to = 'rpd' run_records = runs_from_db(db, args.days, args.win) for run in run_records: if args.dry_run: LOGGER.info("Skipping dryrun option %s", run) continue purge(db, run, mail_to) if args.break_after_first: LOGGER.info("Stopping after first sequencing run") break
def main(): """main function """ bcl2fastq_qc_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "bcl2fastq_qc.py")) assert os.path.exists(bcl2fastq_qc_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('--no-mail', action='store_true', help="Don't send email on detected failures") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping DB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"analysis.Status": "SUCCESS", "analysis.QC_status" : {"$exists": 0}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] analysis = record['analysis'] #for analysis in record['analysis']: for (analysis_count, analysis) in enumerate(record['analysis']): out_dir = analysis["out_dir"] analysis_id = analysis['analysis_id'] status = analysis['Status'] #Check if bcl2Fastq is completed successfully if analysis['Status'] != "SUCCESS": logger.info("Analysis is not completed successfully under %s", out_dir) continue if not os.path.exists(out_dir): logger.critical("Following directory listed in DB doesn't exist: %s", out_dir) continue if args.testing: bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.test.txt") else: bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.txt") if os.path.exists(bcl2fastq_qc_out): logger.critical("Refusing to overwrite existing file %s. Skipping QC check", bcl2fastq_qc_out) continue bcl2fastq_qc_cmd = [bcl2fastq_qc_script, '-d', out_dir] if args.no_mail: bcl2fastq_qc_cmd.append("--no-mail") if args.dry_run: logger.warning("Skipped following run: %s", out_dir) continue try: QC_status = "analysis.{}.QC_status".format(analysis_count) status = subprocess.check_output(bcl2fastq_qc_cmd, stderr=subprocess.STDOUT) if "QC_FAILED" in str(status): db.update({"run": run_number, 'analysis.analysis_id' : analysis_id}, {"$set": {QC_status: "FAILED"}}) logger.info("Demux QC failed for run: %s", run_number) else: db.update({"run": run_number, 'analysis.analysis_id' : analysis_id}, {"$set": {QC_status: "SUCCESS"}}) logger.info("Demux QC SUCCESS for run: %s", run_number) with open(bcl2fastq_qc_out, 'w') as fh: fh.write(status.decode()) except subprocess.CalledProcessError as e: logger.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(bcl2fastq_qc_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") connection.close()
def main(): """main function """ stats_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping sending of emails") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_emails = 0 results = db.find({ "analysis": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: #mail_to = '*****@*****.**' mail_to = '*****@*****.**' for record in results: run_number = record['run'] #print(run_number) for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): if args.dry_run: logger.warning( "Skipping analysis %s run %s MUX %s" " with email_sent %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('email_sent', None)) continue if mux_status.get('email_sent', None): continue # for all others: send email and update db email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format( analysis_count, mux_count) mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if mux_status.get('Status', None) == "FAILED": logger.info("bcl2fastq for MUX %s from %s failed. ", mux_status['mux_id'], run_number) subject = 'bcl2fastq: ' + mux_id body = "bcl2fastq for {} from {} failed.".format( mux_id, run_number) body += "\n\nPlease check the logs under {}".format( out_dir + "/logs") send_mail(subject, body, mail_to, ccaddr="rpd") num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) elif mux_status.get('Status', None) == "SUCCESS": muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir')) summary = path_to_url( os.path.join(muxdir, 'html/index.html')) body = "bcl2fastq for {} from {} successfully completed.".format( mux_id, run_number) body += "\n\nA summary can be found at {}".format(summary) body += "\n\nFastQ files are located in {}".format(muxdir) body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)" confinfo = os.path.join(out_dir, 'conf.yaml') #print(body) if not os.path.exists(confinfo): logger.fatal( "conf info '%s' does not exist" " under run directory.", confinfo) continue subject = 'bcl2fastq' if args.testing: subject += ' testing' if is_devel_version(): subject += ' devel' subject += ': ' + mux_id send_mail(subject, body, mail_to, ccaddr="rpd") # mail_to already set if not args.testing and not is_devel_version(): requestor = get_requestor(mux_id, confinfo) if requestor is not None: #requestor = "rpd" #subject += " (instead of requestor)" #send_mail(subject, body, requestor, ccaddr="rpd") send_mail(subject, body, requestor) num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) # close the connection to MongoDB connection.close() logger.info("%d emails sent", num_emails)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") parser.add_argument('-n', "--no-mail", action='store_true', help="Don't mail. Just print to console") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) runs = {} extra_text = "" for record in results: run_number = record['run'] timestamp = record['timestamp'] runs[timestamp] = run_number od = collections.OrderedDict(sorted(runs.items())) logger.info("Found %s runs", results.count()) extra_text = "Found {} runs. \n".format(results.count()) for _, v in od.items(): # v is run results = db.find({"run": v}) for record in results: if not 'analysis' in record: continue last_analysis = record['analysis'][-1] status = last_analysis.get("Status") if not status: continue if status == 'SUCCESS': if last_analysis.get("per_mux_status"): mux = last_analysis.get("per_mux_status") for d in mux: if d is None: logger.warning("Skipping empty per_mux_status for run %s." \ "Needs fix in DB", v) continue if d.get( 'Status' ) == "SUCCESS": # FIXME what if key is missing? mux_id = d['mux_id'] stats_submission = d['StatsSubmission'] if stats_submission == "FAILED": extra_text += "StatsSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" archive_submission = d['ArchiveSubmission'] if archive_submission == "FAILED": extra_text += "ArchiveSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" elif status == 'FAILED': extra_text += "Analysis for run {} has failed. \n".format(v) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" logger.info("Analysis for run %s has failed ", v) elif status == 'STARTED': analysis_id = last_analysis.get("analysis_id") analysis_epoch_time = isoformat_to_epoch_time(analysis_id + "+08:00") run_completion_time = timestamp / 1000 rd = relative_epoch_time(run_completion_time, analysis_epoch_time) if rd.days > 3: extra_text += "Analysis for run {} was started {} days ago. "\ "Please check. \n".format(v, rd.days) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" extra_text += "Report generation is completed" subject = "Report generation for bcl2fastq" if args.testing: subject = "Testing:" + subject if args.no_mail: print( "Skipping sending of email with subject '{}' and following body:". format(subject)) print(extra_text) else: send_mail(subject, extra_text) logger.info("Report generation is completed")
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") parser.add_argument('-n', "--no-mail", action='store_true', help="Don't mail. Just print to console") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) runs = {} extra_text = "" for record in results: run_number = record['run'] timestamp = record['timestamp'] runs[timestamp] = run_number od = collections.OrderedDict(sorted(runs.items())) logger.info("Found %s runs", results.count()) extra_text = "Found {} runs. \n".format(results.count()) for _, v in od.items():# v is run results = db.find({"run": v}) for record in results: if not 'analysis' in record: continue last_analysis = record['analysis'][-1] status = last_analysis.get("Status") if not status: continue if status == 'SUCCESS': if last_analysis.get("per_mux_status"): mux = last_analysis.get("per_mux_status") for d in mux: if d is None: logger.warning("Skipping empty per_mux_status for run %s." \ "Needs fix in DB", v) continue if d.get('Status') == "SUCCESS":# FIXME what if key is missing? mux_id = d['mux_id'] stats_submission = d['StatsSubmission'] if stats_submission == "FAILED": extra_text += "StatsSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" archive_submission = d['ArchiveSubmission'] if archive_submission == "FAILED": extra_text += "ArchiveSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" elif status == 'FAILED': extra_text += "Analysis for run {} has failed. \n".format(v) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" logger.info("Analysis for run %s has failed ", v) elif status == 'STARTED': analysis_id = last_analysis.get("analysis_id") analysis_epoch_time = isoformat_to_epoch_time(analysis_id+"+08:00") run_completion_time = timestamp/1000 dt1 = datetime.datetime.fromtimestamp(run_completion_time) dt2 = datetime.datetime.fromtimestamp(analysis_epoch_time) rd = dateutil.relativedelta.relativedelta(dt1, dt2) if rd.days > 3: extra_text += "Analysis for run {} was started {} days ago. "\ "Please check. \n".format(v, rd.days) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" extra_text += "Report generation is completed" subject = "Report generation for bcl2fastq" if args.testing: subject = "Testing:" + subject if args.no_mail: print("Skipping sending of email with subject '{}' and following body:".format(subject)) print(extra_text) else: send_mail(subject, extra_text) logger.info("Report generation is completed")
def main(): """main function """ stats_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_triggers = 0 results = db.find({ "analysis": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning( "mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": logger.info( "MUX %s from %s is not SUCCESS. Skipping SRA and STATS uploading", mux_status['mux_id'], run_number) continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if args.dry_run: logger.warning( "Skipping analysis %s run %s MUX %s" " with StatsSubmission %s and ArchiveSubmission %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('StatsSubmission', None), mux_status.get('ArchiveSubmission', None)) continue # Call STATS upload # if mux_status.get('StatsSubmission', None) == "TODO": logger.info( "Stats upload for %s from %s and analysis_id is %s", mux_id, run_number, analysis_id) StatsSubmission = "analysis.{}.per_mux_status.{}.StatsSubmission".format( analysis_count, mux_count) stats_upload_script_cmd = [ stats_upload_script, '-o', out_dir, '-m', mux_id ] if args.testing: stats_upload_script_cmd.append("-t") try: _ = subprocess.check_output(stats_upload_script_cmd, stderr=subprocess.STDOUT) StatsSubmission_status = "SUCCESS" except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(stats_upload_script_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Resetting to TODO") StatsSubmission_status = "TODO" try: db.update( { "run": run_number, 'analysis.analysis_id': analysis_id }, { "$set": { StatsSubmission: StatsSubmission_status, } }) except pymongo.errors.OperationFailure: logger.fatal("MongoDB OperationFailure") sys.exit(0) num_triggers += 1 # Call FASTQ upload # if mux_status.get('ArchiveSubmission', None) == "TODO": logger.info( "SRA upload for %s from %s and analysis_id is %s", mux_id, run_number, analysis_id) ArchiveSubmission = "analysis.{}.per_mux_status.{}.ArchiveSubmission".format( analysis_count, mux_count) archive_upload_script_cmd = [ archive_upload_script, '-o', out_dir, '-m', mux_id ] if args.testing: archive_upload_script_cmd.append("-t") try: _ = subprocess.check_output(archive_upload_script_cmd, stderr=subprocess.STDOUT) ArchiveSubmission_status = "SUCCESS" except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(archive_upload_script_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Resetting to TODO") ArchiveSubmission_status = "TODO" #update mongoDB try: db.update( { "run": run_number, 'analysis.analysis_id': analysis_id }, { "$set": { ArchiveSubmission: ArchiveSubmission_status } }) except pymongo.errors.OperationFailure: logger.fatal("MongoDB OperationFailure") sys.exit(0) num_triggers += 1 # close the connection to MongoDB connection.close() logger.info("%s dirs with triggers", num_triggers)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"analysis.per_mux_status" : {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) run_list = {} mongo_db_ref = {} for record in results: run_number = record['run'] print(run_number) mux_list = {} for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning("mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", \ run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": logger.info("MUX %s from %s is not SUCCESS. Skipping downstream analysis", mux_status['mux_id'], run_number) continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format( analysis_count, mux_count) if mux_status.get('Status') == "SUCCESS" and \ mux_status.get('DownstreamSubmission', None) == "TODO": mongo_list = (mux_id, mux_db_id, analysis_id) mongo_db_ref.setdefault(run_number, []).append(mongo_list) mux_list.setdefault(mux_id, []).append(out_dir) for mux_id, out_dir in mux_list.items(): mux_list_success = mux_list[mux_id] #Check if MUX has been analyzed more then 1 time successfully if len(mux_list_success) > 1: body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \ + "delegator is skipping the downstream analysis under {}. Please" \ "check the results.".format(mux_list_success) subject = "Downstream delegator skipped job submission for {}".format(mux_id) if args.testing: subject += " (testing)" send_mail(subject, body, toaddr='veeravallil', ccaddr=None) continue mux_info = (mux_id, out_dir) run_list.setdefault(run_number, []).append(mux_info) for run_num_flowcell, mux_list in run_list.items(): update_status = True pipeline_params_dict = get_lib_details(run_num_flowcell, mux_list, args.testing) if not bool(pipeline_params_dict): logger.warning("pipeline_paramas_dict is empty for run num %s", run_num_flowcell) continue for lib, lib_info in pipeline_params_dict.items(): readunits_list = list() for outer_key in lib_info: if outer_key == 'readunits': for inner_key in lib_info[outer_key]: readunits_list.append(inner_key) lib_info['samples'] = {} lib_info['samples'][lib] = readunits_list if args.dry_run: logger.warning("Skipping job delegation for %s", \ lib) continue res = mongodb_insert_libjob(lib_info, connection) if not res: logger.critical("Skipping rest of analysis job submission" \ "for %s from %s", lib, lib_info.run_id) subject = "Downstream delegator failed job submission for" \ "{}".format(lib) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(lib) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False logger.warning("Clean up the database for mux %s from run %s and ctime %s", \ lib_info.mux_id, lib_info.run_id, lib_info.ctime) mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \ lib_info.ctime, connection) break if not args.dry_run and update_status: value = mongo_db_ref[run_num_flowcell] for mux_id, insert_id, analysis_id in value: logger.info("Update mongoDb runComplete for %s and runnumber is %s" \ "and id is %s and analysis_id %s", run_num_flowcell, mux_id, \ insert_id, analysis_id) res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \ insert_id, connection) if not res: logger.critical("Skipping rest of analysis job submission for %s" \ "from %s", mux_id, run_num_flowcell) subject = "Downstream delegator failed job submission for {}" \ .format(mux_id) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(mux_id) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False break connection.close()
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-r', "--runid", help="Run ID plus flowcell ID", required=True,) parser.add_argument('-s', "--status", help="Analysis status", required=True, choices=['STARTED', 'SUCCESS', 'FAILED', 'SEQRUNFAILED']) parser.add_argument('-a', "--analysis-id", help="Analysis id", required=True) parser.add_argument('-o', "--out", help="Analysis output directory") parser.add_argument('-t', "--test_server", action='store_true') parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) run_number = args.runid connection = mongodb_conn(args.test_server) if connection is None: sys.exit(1) logger.info("Database connection established") db = connection.gisds.runcomplete logger.debug("DB %s", db) logger.info("Status for %s is %s", run_number, args.status) if args.status in ["STARTED", "SEQRUNFAILED"]: try: if not args.dry_run: db.update({"run": run_number}, {"$push": {"analysis": { "analysis_id" : args.analysis_id, "user_name" : user_name, "out_dir" : args.out, "Status" : args.status, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) elif args.status in ["SUCCESS", "FAILED"]: end_time = generate_timestamp() logger.info("Setting timestamp to %s", end_time) try: if not args.dry_run: db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id}, {"$set": {"analysis.$": { "analysis_id" : args.analysis_id, "end_time" : end_time, "user_name" : user_name, "out_dir" : args.out, "Status" : args.status, }}}) except pymongo.errors.OperationFailure: logger.fatal("mongoDB OperationFailure") sys.exit(0) else: raise ValueError(args.status) # close the connection to MongoDB connection.close()
def main(): """main function """ stats_upload_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping sending of emails") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_emails = 0 results = db.find({"analysis" : {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function else: #mail_to = '*****@*****.**' mail_to = '*****@*****.**' for record in results: run_number = record['run'] #print(run_number) for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): if args.dry_run: logger.warning("Skipping analysis %s run %s MUX %s" " with email_sent %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('email_sent', None)) continue if mux_status.get('email_sent', None): continue # for all others: send email and update db email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format( analysis_count, mux_count) mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if mux_status.get('Status', None) == "FAILED": logger.info("bcl2fastq for MUX %s from %s failed. ", mux_status['mux_id'], run_number) subject = 'bcl2fastq: ' + mux_id body = "bcl2fastq for {} from {} failed.".format(mux_id, run_number) body += "\n\nPlease check the logs under {}".format(out_dir + "/logs") send_mail(subject, body, mail_to, ccaddr="rpd") num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) elif mux_status.get('Status', None) == "SUCCESS": muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir')) summary = path_to_url(os.path.join(muxdir, 'html/index.html')) body = "bcl2fastq for {} from {} successfully completed.".format( mux_id, run_number) body += "\n\nA summary can be found at {}".format(summary) body += "\n\nFastQ files are located in {}".format(muxdir) body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)" confinfo = os.path.join(out_dir, 'conf.yaml') #print(body) if not os.path.exists(confinfo): logger.fatal("conf info '%s' does not exist" " under run directory.", confinfo) continue subject = 'bcl2fastq' if args.testing: subject += ' testing' if is_devel_version(): subject += ' devel' subject += ': ' + mux_id send_mail(subject, body, mail_to, ccaddr="rpd")# mail_to already set if not args.testing and not is_devel_version(): requestor = get_requestor(mux_id, confinfo) if requestor is not None: #requestor = "rpd" #subject += " (instead of requestor)" #send_mail(subject, body, requestor, ccaddr="rpd") send_mail(subject, body, requestor) num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) # close the connection to MongoDB connection.close() logger.info("%d emails sent", num_emails)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") default = "NSCC" parser.add_argument('-s', "--site", default=default, help="site information (default = {})".format(default), choices=['NSCC', 'GIS']) default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function else: mail_to = '*****@*****.**' run_records = runs_from_db(connection, args.testing, args.win) trigger = 0 for run in run_records: for mux, mux_info in run.items(): if args.dry_run: logger.warning("Skipping job delegation for %s from %s", mux, mux_info[0]) continue #Check if mux data is getting transferred find = check_mux_data_transfer_status(connection, mux_info) if find: continue res = start_data_transfer(connection, mux, mux_info, args.site, mail_to) if res: trigger = 1 if args.break_after_first and trigger == 1: logger.info("Stopping after first run") break # close the connection to MongoDB connection.close()
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({ "analysis.per_mux_status": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) run_list = {} mongo_db_ref = {} for record in results: run_number = record['run'] mux_list = {} for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning("mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", \ run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": logger.info( "MUX %s from %s is not SUCCESS. Skipping downstream analysis", mux_status['mux_id'], run_number) continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format( analysis_count, mux_count) if mux_status.get('Status') == "SUCCESS" and \ mux_status.get('DownstreamSubmission') == "TODO": mongo_list = (mux_id, mux_db_id, analysis_id) mongo_db_ref.setdefault(run_number, []).append(mongo_list) mux_list.setdefault(mux_id, []).append(out_dir) for mux_id, out_dir in mux_list.items(): mux_list_success = mux_list[mux_id] #Check if MUX has been analyzed more then 1 time successfully if len(mux_list_success) > 1: body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \ + "delegator is skipping the downstream analysis under {}. Please" \ "check the results.".format(mux_list_success) subject = "Downstream delegator skipped job submission for {}".format( mux_id) if args.testing: subject += " (testing)" send_mail(subject, body, toaddr='veeravallil', ccaddr=None) continue mux_info = (mux_id, out_dir) run_list.setdefault(run_number, []).append(mux_info) for run_num_flowcell, mux_list in run_list.items(): update_status = True pipeline_params_dict, mux_analysis_list = get_lib_details(run_num_flowcell, \ mux_list, args.testing) if not bool(pipeline_params_dict): logger.warning("pipeline params is empty for run num %s", run_num_flowcell) continue # Insert jobs into pipeline_runs collection for lib, lib_info in pipeline_params_dict.items(): job = {} rd_list = {} job['sample_cfg'] = {} job['sample_cfg'] = {} readunits_list = list() rd_list['samples'] = {} for outer_key, outer_value in lib_info.items(): if outer_key == 'readunits': for inner_key in lib_info[outer_key]: readunits_list.append(inner_key) job['sample_cfg'].update({outer_key: outer_value}) if outer_key == 'references_cfg': job['references_cfg'] = {} job['references_cfg'] = outer_value elif outer_key == 'cmdline': job['cmdline'] = {} job['cmdline'] = outer_value elif outer_key != 'readunits': job.update({outer_key: outer_value}) else: rd_list['samples'][lib] = readunits_list job['sample_cfg'].update(rd_list) if args.dry_run: logger.warning("Skipping job delegation for %s", \ lib) continue res = mongodb_insert_libjob(job, connection) if not res: logger.critical("Skipping rest of analysis job submission" \ "for %s from %s", lib, lib_info.run_id) subject = "Downstream delegator failed job submission for" \ "{}".format(lib) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(lib) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False logger.warning("Clean up the database for mux %s from run %s and ctime %s", \ lib_info.mux_id, lib_info.run_id, lib_info.ctime) mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \ lib_info.ctime, connection) break # Update runcomplete collection for delegated jobs if not args.dry_run and update_status: value = mongo_db_ref[run_num_flowcell] for mux_id, insert_id, analysis_id in value: if mux_id in mux_analysis_list: logger.info("Update mongoDb pipeline_runs for mux_id %s from run number %s" \ "and analysis_id is %s", mux_id, run_num_flowcell, analysis_id) res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \ insert_id, connection) if not res: logger.critical("Skipping rest of analysis job submission for %s" \ "from %s", mux_id, run_num_flowcell) subject = "Downstream delegator failed job submission for {}" \ .format(mux_id) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(mux_id) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False break connection.close()
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '-n', "--dryrun", action='store_true', help="Don't actually update DB (best used in conjunction with -v -v)") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server. Don't do anything") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Exiting") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) #LOGGER.info("Database connection established") dbcol = connection.gisds.pipeline_runs site = get_site() epoch_now, epoch_then = generate_window(args.win) cursor = dbcol.find({ "ctime": { "$gt": epoch_then, "$lt": epoch_now }, "site": site }) LOGGER.info("Looping through {} jobs".format(cursor.count())) for job in cursor: dbid = job['_id'] # only set here to avoid code duplication below try: out_dir = job['execution']['out_dir'] except KeyError: out_dir = None # no execution dict means start a new analysis if not job.get('execution'): LOGGER.info('Job {} to be started'.format(dbid)) # determine out_dir and set in DB # out_dir_override will take precedence over generating out_dir with get_downstream_outdir function if job.get('out_dir_override'): out_dir = job.get('out_dir_override') if os.path.exists(out_dir): mux = os.path.basename(out_dir) if not args.dryrun: LOGGER.critical( "Analysis for {} already exists under {}. Please start the analysis manually" .format(mux, out_dir)) res = dbcol.update_one( {"_id": ObjectId(dbid)}, {"$set": { "execution.status": "MANUAL" }}) assert res.modified_count == 1, ( "Modified {} documents instead of 1".format( res.modified_count)) sys.exit(1) #assert not os.path.exists(out_dir), ("Direcotry already exists {}").format(out_dir) else: out_dir = get_downstream_outdir(job['requestor'], job['pipeline_name'], job['pipeline_version']) # Note, since execution (key) exists, accidental double # starts are prevented even before start time etc is # logged via flagfiles. No active logging here so that # flag files logging just works. if args.dryrun: LOGGER.info("Skipping dry run option") continue status = start_cmd_execution(job, site, out_dir, args.testing) if status: res = dbcol.update_one( {"_id": ObjectId(dbid)}, {"$set": { "execution.out_dir": out_dir }}) assert res.modified_count == 1, ( "Modified {} documents instead of 1".format( res.modified_count)) else: LOGGER.warning("Job {} could not be started".format(dbid)) elif job['execution'].get('status') == "MANUAL": continue elif list_starterflags( out_dir ): # out_dir cannot be none because it's part of execution dict LOGGER.info( 'Job {} in {} started but not yet logged as such in DB'.format( dbid, out_dir)) matches = list_starterflags(out_dir) assert len(matches) == 1, ( "Got several starter flags in {}".format(out_dir)) sflag = StarterFlag(matches[0]) assert sflag.dbid == str(dbid) set_started(dbcol, sflag.dbid, str(sflag.timestamp), dryrun=args.dryrun) os.unlink(sflag.filename) elif job['execution'].get('status') in ['STARTED', 'RESTART']: LOGGER.info( 'Job %s in %s set as re|started so checking on completion', dbid, out_dir) set_completion_if(dbcol, dbid, out_dir, dryrun=args.dryrun) else: # job complete LOGGER.debug('Job %s in %s should be completed', dbid, out_dir) LOGGER.info("Successful program exit")
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) #Check if pipeline scripts are available assert os.path.exists(BWA) assert os.path.exists(RNA) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_triggers = 0 results = db.find({ "analysis.Status": "SUCCESS", "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] analysis = record['analysis'] # Downstream analysis will not be intiated for Novogene (NG00*) runs if "NG00" in run_number: continue for analysis in record['analysis']: out_dir = analysis.get("out_dir") #Check if bcl2Fastq is completed successfully if 'Status' in analysis and analysis.get("Status") == "SUCCESS": if not os.path.exists(out_dir): logger.critical( "Following directory listed in DB doesn't exist: %s", out_dir) continue #Check if downstream analysis has been started if not os.path.exists( os.path.join(out_dir, "config_casava-1.8.2.txt".format())): logger.info("Start the downstream analysis at %s", out_dir) os.makedirs(os.path.join(out_dir, LOG_DIR_REL), exist_ok=True) #generate config file config_cmd = [CONFIG, '-r', run_number] try: f = open( os.path.join(out_dir, "config_casava-1.8.2.txt".format()), "w") _ = subprocess.call(config_cmd, stderr=subprocess.STDOUT, stdout=f) except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(config_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") sys.exit(1) #generic sample sheet samplesheet_cmd = 'cd {} && {} -r {}'.format( out_dir, SAMPLESHEET, run_number) try: _ = subprocess.check_output(samplesheet_cmd, shell=True) except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(samplesheet_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") sys.exit(1) #Generate and Submit BWA and RNAseq mapping pipeline _, runid, _ = get_machine_run_flowcell_id(run_number) generic_samplesheet = (os.path.join( out_dir, runid + "_SampleSheet.csv")) if os.path.exists( os.path.join(out_dir, generic_samplesheet)): dirs = os.path.join(out_dir, "out") cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \ .format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, \ generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG)) cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \ .format(RNA, run_number, out_dir, os.path.join(out_dir, \ generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG)) if args.dry_run: logger.warning("Skipped following run: %s", cmd) #Remove config txt os.remove( os.path.join( out_dir, "config_casava-1.8.2.txt".format())) else: try: #ananlysisReport into submission log with open(os.path.join(out_dir, SUBMISSIONLOG), 'w') as fh: fh.write(cmd) _ = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, cmd) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") #send_status_mail send_status_mail(PIPELINE_NAME, False, analysis_id, \ os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log")) sys.exit(1) num_triggers += 1 if args.break_after_first: logger.info("Stopping after first sequencing run") sys.exit(0) else: #send_status_mail logger.info("samplesheet.csv missing for %s under %s", run_number, out_dir) send_status_mail(PIPELINE_NAME, False, analysis_id, \ os.path.abspath(out_dir)) elif analysis.get("Status") == "FAILED": logger.debug("BCL2FASTQ FAILED for %s under %s", run_number, out_dir) # close the connection to MongoDB connection.close() logger.info("%s dirs with triggers", num_triggers)
def main(): """ Main function """ instance = ArgumentParser(description=__doc__) instance.add_argument("-j", "--jobNo", nargs="*", help="filter records by jobNo of jobs") instance.add_argument("-o", "--owner", nargs="*", help="filter records by owner of jobs") args = instance.parse_args() if (not args.jobNo) and (args.owner): for document in mongodb_conn(False).gisds.accountinglogs.find( {"jobs.owner": { "$in": args.owner }}): for job in document["jobs"]: if job["owner"] in args.owner: job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"])) job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB" job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"])) job["submissionTime"] = str( datetime.fromtimestamp( job["submissionTime"]).isoformat()).replace( ":", "-") PrettyPrinter(indent=2).pprint(job) if (args.jobNo) and (not args.owner): for document in mongodb_conn(False).gisds.accountinglogs.find( {"jobs.jobNo": { "$in": args.jobNo }}): for job in document["jobs"]: if job["jobNo"] in args.jobNo: job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"])) job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB" job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"])) job["submissionTime"] = str( datetime.fromtimestamp( job["submissionTime"]).isoformat()).replace( ":", "-") PrettyPrinter(indent=2).pprint(job) if args.jobNo and args.owner: for document in mongodb_conn(False).gisds.accountinglogs.find({ "jobs.jobNo": { "$in": args.jobNo }, "jobs.owner": { "$in": args.owner } }): for job in document["jobs"]: if (job["jobNo"] in args.jobNo) and (job["owner"] in args.owner): job["cpu"] = strftime("%Hh%Mm%Ss", gmtime(job["cpu"])) job["maxvmem"] = str(job["maxvmem"] / pow(2, 30)) + " GB" job["ruWallClock"] = strftime("%Hh%Mm%Ss", gmtime(job["ruWallClock"])) job["submissionTime"] = str( datetime.fromtimestamp( job["submissionTime"]).isoformat()).replace( ":", "-") PrettyPrinter(indent=2).pprint(job)