def get_started_outdirs_from_db(testing=True, win=None): """FIXME:add-doc""" connection = mongodb_conn(testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete if win: epoch_present, epoch_back = generate_window(win) results = db.find({ "analysis.Status": "STARTED", "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) else: results = db.find({"analysis.Status": "STARTED"}) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %d runs", results.count()) for record in results: logger.debug("record: %s", record) #run_number = record['run'] # we might have several analysis runs: for analysis in record['analysis']: yield analysis["out_dir"]
def instantiate_query(args): """ Instantiates MongoDB query dictionary object """ instance = {} if args.status: instance["analysis.Status"] = args.status if args.mux: instance["analysis.per_mux_status.mux_id"] = args.mux if args.run: instance["run"] = {"$regex": "^" + args.run} if args.win: epoch_present, epoch_initial = generate_window(args.win) else: epoch_present, epoch_initial = generate_window(7) instance["timestamp"] = {"$gt": epoch_initial, "$lt": epoch_present} return instance
def runs_from_db(connection, testing, win=14): """Get the runs from pipeline_run collections""" db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(win) results = db.find({"run" : {"$regex" : "^NG00"}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %d runs", results.count()) for record in results: run_number = record['run'] logger.debug("record: %s", record) if not record.get('analysis'): continue # Check if Novogene run_mode _, run_id, _ = get_machine_run_flowcell_id(run_number) if testing: rest_url = rest_services['run_details']['testing'].replace("run_num", run_id) else: rest_url = rest_services['run_details']['production'].replace("run_num", run_id) response = requests.get(rest_url) if response.status_code != requests.codes.ok: response.raise_for_status() rest_data = response.json() sg10k_lib_list = get_sg10_lib_list(rest_data) run_records = {} for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning("mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", \ run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if not os.path.exists(out_dir): logger.warning("Direcotry does not exists %s", out_dir) continue downstream_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format( analysis_count, mux_count) if mux_status.get('Status') == "SUCCESS" and \ mux_status.get('DownstreamSubmission') == "TODO": mux_info = (run_number, downstream_id, analysis_id, out_dir) if mux_id in run_records: logger.info("MUX %s from %s has been analyzed more than 1 time \ succeessfully, please check", mux_id, run_number) del run_records[mux_id] elif mux_id in sg10k_lib_list: run_records[mux_id] = mux_info if run_records: yield run_records
def check_mongo(): """ Instantiates MongoDB database object For Test Server, testing == True For Production Server, testing == False """ warnings = "" epoch_present, epoch_window = generate_window(MAX_WINDOW) epoch_present, epoch_started = generate_window(MAX_RUN) del epoch_present query = {} query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started} query["analysis.Status"] = "STARTED" mongo = mongodb_conn(False).gisds.runcomplete.find(query) count_warnings = 0 for record in mongo: # PrettyPrinter(indent=2).pprint(record) if record["analysis"][-1]["Status"] != "SUCCESS": warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n") count_warnings += 1 if count_warnings > 0: warnings += ("[started >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n") query = {} query["timestamp"] = {"$gte": epoch_window, "$lte": epoch_started} query["analysis"] = {"$exists": False} mongo = mongodb_conn(False).gisds.runcomplete.find(query) count_warnings = 0 for record in mongo: # PrettyPrinter(indent=2).pprint(record) warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(record["run"]) + "\n") count_warnings += 1 if count_warnings > 0: warnings += ("[no analysis >= " + str(MAX_RUN) + " days]:\t" + str(count_warnings) + "\n\n") return warnings
def get_sample_info(child, rows, mux_analysis_list, mux_id, fastq_data_dir, \ run_num_flowcell, sample_info): """Collects sample info from ELM JOSN """ sample_cfg = {} site = get_site() ctime, _ = generate_window(1) _, _, flowcellid = get_machine_run_flowcell_id(run_num_flowcell) mux_analysis_list.add(mux_id) sample_id = child['libraryId'] sample_cfg['requestor'] = rows['requestor'] sample_cfg['ctime'] = ctime sample_cfg['site'] = site try: sample_cfg['pipeline_name'] = legacy_mapper['pipeline_mapper'][ child['Analysis']] except KeyError as e: sample_cfg['pipeline_name'] = child['Analysis'] logger.warning(str(e) + " Pipeline not mappped to newer version") return sample_info pipeline_version = get_pipeline_version(child['pipeline_version'] \ if 'pipeline_version' in rows else None) sample_cfg['pipeline_version'] = pipeline_version #sample_cfg['pipeline_params'] = 'params' ref_info = get_reference_info(child['Analysis'], \ sample_cfg['pipeline_version'], child['genome']) if not ref_info: logger.info("ref_info not available") return sample_info cmdline_info = get_cmdline_info(child) sample_cfg['references_cfg'] = ref_info if cmdline_info: sample_cfg['cmdline'] = cmdline_info readunits_dict = {} status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\ rows['laneId']) if status: ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\ rows['laneId'], None, fq1, fq2) k = key_for_readunit(ru) readunits_dict[k] = dict(ru._asdict()) sample_cfg['readunits'] = readunits_dict if sample_info.get(sample_id, {}).get('readunits', {}): sample_info[sample_id]['readunits'].update(readunits_dict) else: sample_info[sample_id] = sample_cfg return sample_info
def runs_from_db(testing=True, win=34): """Get the runs from pipeline_run collections""" connection = mongodb_conn(testing) if connection is None: sys.exit(1) db = connection.gisds.pipeline_runs epoch_present, epoch_back = generate_window(win) results = db.find({"runs": {"$exists": True}, "ctime": {"$gt": 1470127013000, "$lt": 1470127093000}}) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %d runs for last %s days", results.count(), win) for record in results: logger.debug("record: %s", record) for runs in record['runs']: if runs["status"] == "STARTED": test = (record['_id'], record['out_dir'], runs['start_time']) yield test
def get_outdirs_from_db(testing=True, win=14): """FIXME:add-doc""" connection = mongodb_conn(testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(win) results = db.find({"analysis": {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %d runs for last %s days", results.count(), win) for record in results: logger.debug("record: %s", record) #run_number = record['run'] # we might have several analysis runs: for analysis in record['analysis']: yield analysis["out_dir"]
def runs_from_db(db, mail_to, ccaddr, win=34): """Get the runs from pipeline_run collections""" epoch_present, epoch_back = generate_window(win) results = db.find({"analysis" : {"$exists": False}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %d runs for last %s days", results.count(), win) mail = False subject = "Runs with missing ELM information" body = "Dear NGSP, " + "\n" body += subject + " for the following runs. Please include in the ELM." + "\n" for record in results: logger.debug("record: %s", record) _, runid, _ = get_machine_run_flowcell_id(record.get('run')) rest_data = get_rest_data(runid) if not rest_data.get('runId'): body += record.get('run')+ "\n" mail = True if mail: send_mail(subject, body, toaddr=mail_to, ccaddr=ccaddr)
def runs_from_db(db, days=75, win=34): """Get the runs from pipeline_run collections""" epoch_present, epoch_back = generate_window(win) results = db.find({ "run": { "$regex": "^NG00" }, "raw-delete": { "$exists": False }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) LOGGER.info("Found %d runs for last %s days", results.count(), win) for record in results: LOGGER.debug("record: %s", record) if not record.get('run'): LOGGER.critical("run is missing for DB-id %s", record['_id']) continue runid_and_flowcellid = (record['run']) results = db.find({"run": runid_and_flowcellid}) if not 'analysis' in record: continue last_analysis = record['analysis'][-1] status = last_analysis.get("Status") end_time = last_analysis.get("end_time") if not status or not end_time: continue analysis_epoch_time = isoformat_to_epoch_time(end_time + "+08:00") epoch_time_now = isoformat_to_epoch_time(generate_timestamp() + "+08:00") rd = relative_epoch_time(epoch_time_now, analysis_epoch_time) relative_days = rd.months * 30 + rd.days if status == 'SUCCESS' and relative_days > days: yield runid_and_flowcellid
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) #Check if pipeline scripts are available assert os.path.exists(BWA) assert os.path.exists(RNA) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_triggers = 0 results = db.find({"analysis.Status": "SUCCESS", "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] analysis = record['analysis'] for analysis in record['analysis']: out_dir = analysis.get("out_dir") #Check if bcl2Fastq is completed successfully if 'Status' in analysis and analysis.get("Status") == "SUCCESS": if not os.path.exists(out_dir): logger.critical("Following directory listed in DB doesn't exist: %s", out_dir) continue #Check if downstream analysis has been started if not os.path.exists(os.path.join(out_dir, "config_casava-1.8.2.txt".format())): logger.info("Start the downstream analysis at %s", out_dir) os.makedirs(os.path.join(out_dir, LOG_DIR_REL), exist_ok=True) #generate config file config_cmd = [CONFIG, '-r', run_number] try: f = open(os.path.join(out_dir, "config_casava-1.8.2.txt".format()), "w") _ = subprocess.call(config_cmd, stderr=subprocess.STDOUT, stdout=f) except subprocess.CalledProcessError as e: logger.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(config_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") sys.exit(1) #Generate and Submit BWA and RNAseq mapping pipeline if os.path.exists(os.path.join(out_dir, "samplesheet.csv".format())): dirs = os.path.join(out_dir, "out") cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG)) cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}".format(RNA, run_number, out_dir, os.path.join(out_dir, "samplesheet.csv".format()), os.path.join(out_dir, SUBMISSIONLOG)) if args.dry_run: logger.warning("Skipped following run: %s", cmd) #Remove config txt os.remove(os.path.join(out_dir, "config_casava-1.8.2.txt".format())) else: try: #ananlysisReport into submission log with open(os.path.join(out_dir, SUBMISSIONLOG), 'w') as fh: fh.write(cmd) _ = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: logger.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") #send_status_mail send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log")) sys.exit(1) num_triggers += 1 if args.break_after_first: logger.info("Stopping after first sequencing run") sys.exit(0) else: #send_status_mail logger.info("samplesheet.csv missing for %s under %s", run_number, out_dir) send_status_mail(PIPELINE_NAME, False, analysis_id, os.path.abspath(out_dir)) elif analysis.get("Status") == "FAILED": logger.debug("BCL2FASTQ FAILED for %s under %s", run_number, out_dir) # close the connection to MongoDB connection.close() logger.info("%s dirs with triggers", num_triggers)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") parser.add_argument('-n', "--no-mail", action='store_true', help="Don't mail. Just print to console") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) runs = {} extra_text = "" for record in results: run_number = record['run'] timestamp = record['timestamp'] runs[timestamp] = run_number od = collections.OrderedDict(sorted(runs.items())) logger.info("Found %s runs", results.count()) extra_text = "Found {} runs. \n".format(results.count()) for _, v in od.items(): # v is run results = db.find({"run": v}) for record in results: if not 'analysis' in record: continue last_analysis = record['analysis'][-1] status = last_analysis.get("Status") if not status: continue if status == 'SUCCESS': if last_analysis.get("per_mux_status"): mux = last_analysis.get("per_mux_status") for d in mux: if d is None: logger.warning("Skipping empty per_mux_status for run %s." \ "Needs fix in DB", v) continue if d.get( 'Status' ) == "SUCCESS": # FIXME what if key is missing? mux_id = d['mux_id'] stats_submission = d['StatsSubmission'] if stats_submission == "FAILED": extra_text += "StatsSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" archive_submission = d['ArchiveSubmission'] if archive_submission == "FAILED": extra_text += "ArchiveSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" elif status == 'FAILED': extra_text += "Analysis for run {} has failed. \n".format(v) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" logger.info("Analysis for run %s has failed ", v) elif status == 'STARTED': analysis_id = last_analysis.get("analysis_id") analysis_epoch_time = isoformat_to_epoch_time(analysis_id + "+08:00") run_completion_time = timestamp / 1000 rd = relative_epoch_time(run_completion_time, analysis_epoch_time) if rd.days > 3: extra_text += "Analysis for run {} was started {} days ago. "\ "Please check. \n".format(v, rd.days) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" extra_text += "Report generation is completed" subject = "Report generation for bcl2fastq" if args.testing: subject = "Testing:" + subject if args.no_mail: print( "Skipping sending of email with subject '{}' and following body:". format(subject)) print(extra_text) else: send_mail(subject, extra_text) logger.info("Report generation is completed")
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"analysis.per_mux_status" : {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) run_list = {} mongo_db_ref = {} for record in results: run_number = record['run'] print(run_number) mux_list = {} for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning("mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", \ run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": logger.info("MUX %s from %s is not SUCCESS. Skipping downstream analysis", mux_status['mux_id'], run_number) continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format( analysis_count, mux_count) if mux_status.get('Status') == "SUCCESS" and \ mux_status.get('DownstreamSubmission', None) == "TODO": mongo_list = (mux_id, mux_db_id, analysis_id) mongo_db_ref.setdefault(run_number, []).append(mongo_list) mux_list.setdefault(mux_id, []).append(out_dir) for mux_id, out_dir in mux_list.items(): mux_list_success = mux_list[mux_id] #Check if MUX has been analyzed more then 1 time successfully if len(mux_list_success) > 1: body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \ + "delegator is skipping the downstream analysis under {}. Please" \ "check the results.".format(mux_list_success) subject = "Downstream delegator skipped job submission for {}".format(mux_id) if args.testing: subject += " (testing)" send_mail(subject, body, toaddr='veeravallil', ccaddr=None) continue mux_info = (mux_id, out_dir) run_list.setdefault(run_number, []).append(mux_info) for run_num_flowcell, mux_list in run_list.items(): update_status = True pipeline_params_dict = get_lib_details(run_num_flowcell, mux_list, args.testing) if not bool(pipeline_params_dict): logger.warning("pipeline_paramas_dict is empty for run num %s", run_num_flowcell) continue for lib, lib_info in pipeline_params_dict.items(): readunits_list = list() for outer_key in lib_info: if outer_key == 'readunits': for inner_key in lib_info[outer_key]: readunits_list.append(inner_key) lib_info['samples'] = {} lib_info['samples'][lib] = readunits_list if args.dry_run: logger.warning("Skipping job delegation for %s", \ lib) continue res = mongodb_insert_libjob(lib_info, connection) if not res: logger.critical("Skipping rest of analysis job submission" \ "for %s from %s", lib, lib_info.run_id) subject = "Downstream delegator failed job submission for" \ "{}".format(lib) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(lib) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False logger.warning("Clean up the database for mux %s from run %s and ctime %s", \ lib_info.mux_id, lib_info.run_id, lib_info.ctime) mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \ lib_info.ctime, connection) break if not args.dry_run and update_status: value = mongo_db_ref[run_num_flowcell] for mux_id, insert_id, analysis_id in value: logger.info("Update mongoDb runComplete for %s and runnumber is %s" \ "and id is %s and analysis_id %s", run_num_flowcell, mux_id, \ insert_id, analysis_id) res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \ insert_id, connection) if not res: logger.critical("Skipping rest of analysis job submission for %s" \ "from %s", mux_id, run_num_flowcell) subject = "Downstream delegator failed job submission for {}" \ .format(mux_id) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(mux_id) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False break connection.close()
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-e', "--wrapper-args", nargs="*", help="Extra arguments for bcl2fastq wrapper" " (prefix leading dashes with X, e.g. X-n for -n)") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) bcl2fastq_wrapper = os.path.join(os.path.dirname(sys.argv[0]), "bcl2fastq.py") connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete # db query for jobs that are yet to be analysed in the epoch window epoch_present, epoch_back = generate_window(args.win) results = db.find({ "analysis": { "$exists": 0 }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) # results is a pymongo.cursor.Cursor which works like an iterator i.e. dont use len() logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] logger.debug("Processing record %s", record) cmd = [bcl2fastq_wrapper, "-r", run_number, "-v"] if args.testing: cmd.append("-t") if args.wrapper_args: cmd.extend([x.lstrip('X') for x in args.wrapper_args]) if args.dry_run: logger.warning("Skipped following run: %s", ' '.join(cmd)) continue else: try: logger.info("Executing: %s", ' '.join(cmd)) res = subprocess.check_output(cmd, stderr=subprocess.STDOUT) if res: logger.info("bcl2fastq wrapper returned:\n%s", res.decode().rstrip()) except subprocess.CalledProcessError as e: logger.critical( "The following command failed with" " return code %s: %s", e.returncode, ' '.join(cmd)) logger.critical("Full error message was: %s", e.stdout) if 'commlib error' in e.stdout.decode(): logger.critical( "Looks like a qmaster problem (commlib error). Exiting" ) break else: logger.critical("Will keep going") # continue so that a failed run doesn't count, # i.e. args.break_after_first shouldn't be trigger continue if args.break_after_first: logger.info("Stopping after first sequencing run") break # close the connection to MongoDB connection.close() logger.info("Successful program exit")
def start_data_transfer(connection, mux, mux_info, site, mail_to): """ Data transfer from source to destination """ run_number, downstream_id, analysis_id, bcl_path = mux_info fastq_src = os.path.join(bcl_path, "out", "Project_"+mux) bcl_dir = os.path.basename(bcl_path) if is_devel_version(): fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \ mux, run_number, bcl_dir) yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \ mux, mux +"_multisample.yaml") else: fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \ mux, run_number, bcl_dir) yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \ mux, mux+ "_multisample.yaml") rsync_cmd = 'rsync -va %s %s' % (fastq_src, fastq_dest) if not os.path.exists(fastq_dest): try: os.makedirs(fastq_dest) logger.info("data transfer started for %s from %s", mux, run_number) st_time = generate_timestamp() update_downstream_mux(connection, run_number, analysis_id, downstream_id, \ "COPYING_" + st_time) _ = subprocess.check_output(rsync_cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: body = "The following command failed with return code {}: {}". \ format(e.returncode, rsync_cmd) subject = "{} from {}: SG10K data transfer ({}) failed".format(mux, run_number, site) logger.fatal(body) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") #Send_mail send_mail(subject, body, toaddr=mail_to, ccaddr=None) #Delete the partial info being rsync update_downstream_mux(connection, run_number, analysis_id, downstream_id, "ERROR") sys.exit(1) #Update the mongoDB for successful data transfer sample_info = get_mux_details(run_number, mux, fastq_dest) #Touch rsync complete file with open(os.path.join(fastq_dest, "rsync_complete.txt"), "w") as f: f.write("") with open(yaml_dest, 'w') as fh: yaml.dump(dict(sample_info), fh, default_flow_style=False) job = {} job['sample_cfg'] = {} for outer_key, outer_value in sample_info.items(): ctime, _ = generate_window(1) job['sample_cfg'].update({outer_key:outer_value}) job['site'] = site job['pipeline_name'] = 'custom/SG10K' job['pipeline_version'] = novogene_conf['PIPELINE_VERSION'] job['ctime'] = ctime job['requestor'] = 'userrig' if is_devel_version(): novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['devel'], \ mux) else: novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['production'], mux) job['out_dir_override'] = novogene_outdir logger.info("Data transfer completed successfully for %s from %s", mux, run_number) job_id = insert_muxjob(connection, mux, job) update_downstream_mux(connection, run_number, analysis_id, downstream_id, job_id) subject = "{} from {}: SG10K data transfer ({}) completed".format(mux, run_number, site) body = "Data transfer successfully completed for {} from {}".format(mux, run_number) send_mail(subject, body, toaddr=mail_to, ccaddr=None) return True else: logger.critical("Mux %s from %s directory already exists under %s", mux, \ run_number, fastq_dest) return False
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-s', "--site", help="site information") parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) if not args.site: site = 'NSCC' else: site = args.site connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.pipeline_runs epoch_present, epoch_back = generate_window(args.win) results = db.find({ "run": { "$exists": False }, "site": site, "ctime": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs to start analysis", results.count()) for record in results: start_analysis(record, args.testing, args.dry_run)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dryrun", action='store_true', help="Don't run anything") default = 84 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) default = 60 parser.add_argument( '-d', '--days', type=int, default=default, help="Bcl analysis not older than days(default {})".format(default)) default = 60 parser.add_argument( '-r', '--tardays', type=int, default=default, help="tar ball not older than days(default {})".format(default)) parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Skipping archival steps") sys.exit(1) if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: mail_to = 'rpd' connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({ "run": { "$regex": "^((?!NG00).)*$" }, "raw-delete": { "$exists": False }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) LOGGER.info("Looping through %s jobs", results.count()) trigger = 0 for record in results: try: run_num = record['run'] except KeyError: run_num = None if not record.get('deletion'): #Check run_status res = check_run_status(record, args.days) if res: LOGGER.info("Create tar ball %s ", run_num) if args.dryrun: LOGGER.warning("Skipping Create tar ball %s ", run_num) continue create_run_tar(db, run_num) trigger = 1 elif record['deletion'].get('tar'): res = check_tar_status_and_delete(db, record, args.tardays, dryrun=args.dryrun) if res: trigger = 1 if args.break_after_first and trigger == 1: LOGGER.info("Stopping after first run") break
def main(): """main function """ stats_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_triggers = 0 results = db.find({ "analysis": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning( "mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": logger.info( "MUX %s from %s is not SUCCESS. Skipping SRA and STATS uploading", mux_status['mux_id'], run_number) continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if args.dry_run: logger.warning( "Skipping analysis %s run %s MUX %s" " with StatsSubmission %s and ArchiveSubmission %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('StatsSubmission', None), mux_status.get('ArchiveSubmission', None)) continue # Call STATS upload # if mux_status.get('StatsSubmission', None) == "TODO": logger.info( "Stats upload for %s from %s and analysis_id is %s", mux_id, run_number, analysis_id) StatsSubmission = "analysis.{}.per_mux_status.{}.StatsSubmission".format( analysis_count, mux_count) stats_upload_script_cmd = [ stats_upload_script, '-o', out_dir, '-m', mux_id ] if args.testing: stats_upload_script_cmd.append("-t") try: _ = subprocess.check_output(stats_upload_script_cmd, stderr=subprocess.STDOUT) StatsSubmission_status = "SUCCESS" except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(stats_upload_script_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Resetting to TODO") StatsSubmission_status = "TODO" try: db.update( { "run": run_number, 'analysis.analysis_id': analysis_id }, { "$set": { StatsSubmission: StatsSubmission_status, } }) except pymongo.errors.OperationFailure: logger.fatal("MongoDB OperationFailure") sys.exit(0) num_triggers += 1 # Call FASTQ upload # if mux_status.get('ArchiveSubmission', None) == "TODO": logger.info( "SRA upload for %s from %s and analysis_id is %s", mux_id, run_number, analysis_id) ArchiveSubmission = "analysis.{}.per_mux_status.{}.ArchiveSubmission".format( analysis_count, mux_count) archive_upload_script_cmd = [ archive_upload_script, '-o', out_dir, '-m', mux_id ] if args.testing: archive_upload_script_cmd.append("-t") try: _ = subprocess.check_output(archive_upload_script_cmd, stderr=subprocess.STDOUT) ArchiveSubmission_status = "SUCCESS" except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(archive_upload_script_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Resetting to TODO") ArchiveSubmission_status = "TODO" #update mongoDB try: db.update( { "run": run_number, 'analysis.analysis_id': analysis_id }, { "$set": { ArchiveSubmission: ArchiveSubmission_status } }) except pymongo.errors.OperationFailure: logger.fatal("MongoDB OperationFailure") sys.exit(0) num_triggers += 1 # close the connection to MongoDB connection.close() logger.info("%s dirs with triggers", num_triggers)
def get_lib_details(run_num_flowcell, mux_list, testing): """Lib info collection from ELM per run """ _, run_num, flowcellid = get_machine_run_flowcell_id(run_num_flowcell) # Call rest service to get component libraries if testing: print(run_num) rest_url = rest_services['run_details']['testing'].replace("run_num", run_num) logger.info("development server") else: rest_url = rest_services['run_details']['production'].replace("run_num", run_num) logger.info("production server") response = requests.get(rest_url) if response.status_code != requests.codes.ok: response.raise_for_status() rest_data = response.json() logger.debug("rest_data from %s: %s", rest_url, rest_data) sample_info = {} if rest_data.get('runId') is None: logger.info("JSON data is empty for run num %s", run_num) return sample_info for mux_id, out_dir in mux_list: fastq_data_dir = os.path.join(out_dir[0], 'out', "Project_"+mux_id) if os.path.exists(fastq_data_dir): for rows in rest_data['lanes']: if mux_id in rows['libraryId']: if "MUX" in rows['libraryId']: for child in rows['Children']: if child['Analysis'] != "Sequence only": ctime, _ = generate_window(1) sample_dict = {} sample = child['libraryId'] sample_dict['requestor'] = rows['requestor'] sample_dict['ctime'] = ctime sample_dict['pipeline_name'] = child['Analysis'] if 'pipeline_version' in rows: sample_dict['pipeline_version'] = child['pipeline_version'] else: sample_dict['pipeline_version'] = None sample_dict['pipeline_params'] = 'params' sample_dict['site'] = get_site() out_dir = get_downstream_outdir(sample_dict['requestor'], \ sample_dict['pipeline_version'], sample_dict['pipeline_name']) sample_dict['out_dir'] = out_dir readunits_dict = {} status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\ rows['laneId']) if status: ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\ rows['laneId'], None, fq1, fq2) k = key_for_read_unit(ru) readunits_dict[k] = dict(ru._asdict()) sample_dict['readunits'] = readunits_dict if sample_info.get(sample, {}).get('readunits'): sample_info[sample]['readunits'].update(readunits_dict) else: sample_info[sample] = sample_dict else: if rows['Analysis'] != "Sequence only": sample = rows['libraryId'] status, fq1, fq2 = check_fastq(fastq_data_dir, rows['libraryId'], \ rows['laneId']) if status: ctime, _ = generate_window(1) sample_dict = {} readunits_dict = {} ru = ReadUnit(run_num_flowcell, flowcellid, rows['libraryId'], \ rows['laneId'], None, fq1, fq2) k = key_for_read_unit(ru) readunits_dict[k] = dict(ru._asdict()) sample_dict['readunits'] = readunits_dict sample_info[sample] = sample_dict return sample_info
def main(): """main function """ stats_upload_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping sending of emails") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_emails = 0 results = db.find({"analysis" : {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function else: #mail_to = '*****@*****.**' mail_to = '*****@*****.**' for record in results: run_number = record['run'] #print(run_number) for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): if args.dry_run: logger.warning("Skipping analysis %s run %s MUX %s" " with email_sent %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('email_sent', None)) continue if mux_status.get('email_sent', None): continue # for all others: send email and update db email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format( analysis_count, mux_count) mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if mux_status.get('Status', None) == "FAILED": logger.info("bcl2fastq for MUX %s from %s failed. ", mux_status['mux_id'], run_number) subject = 'bcl2fastq: ' + mux_id body = "bcl2fastq for {} from {} failed.".format(mux_id, run_number) body += "\n\nPlease check the logs under {}".format(out_dir + "/logs") send_mail(subject, body, mail_to, ccaddr="rpd") num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) elif mux_status.get('Status', None) == "SUCCESS": muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir')) summary = path_to_url(os.path.join(muxdir, 'html/index.html')) body = "bcl2fastq for {} from {} successfully completed.".format( mux_id, run_number) body += "\n\nA summary can be found at {}".format(summary) body += "\n\nFastQ files are located in {}".format(muxdir) body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)" confinfo = os.path.join(out_dir, 'conf.yaml') #print(body) if not os.path.exists(confinfo): logger.fatal("conf info '%s' does not exist" " under run directory.", confinfo) continue subject = 'bcl2fastq' if args.testing: subject += ' testing' if is_devel_version(): subject += ' devel' subject += ': ' + mux_id send_mail(subject, body, mail_to, ccaddr="rpd")# mail_to already set if not args.testing and not is_devel_version(): requestor = get_requestor(mux_id, confinfo) if requestor is not None: #requestor = "rpd" #subject += " (instead of requestor)" #send_mail(subject, body, requestor, ccaddr="rpd") send_mail(subject, body, requestor) num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) # close the connection to MongoDB connection.close() logger.info("%d emails sent", num_emails)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") parser.add_argument('-n', "--no-mail", action='store_true', help="Don't mail. Just print to console") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) runs = {} extra_text = "" for record in results: run_number = record['run'] timestamp = record['timestamp'] runs[timestamp] = run_number od = collections.OrderedDict(sorted(runs.items())) logger.info("Found %s runs", results.count()) extra_text = "Found {} runs. \n".format(results.count()) for _, v in od.items():# v is run results = db.find({"run": v}) for record in results: if not 'analysis' in record: continue last_analysis = record['analysis'][-1] status = last_analysis.get("Status") if not status: continue if status == 'SUCCESS': if last_analysis.get("per_mux_status"): mux = last_analysis.get("per_mux_status") for d in mux: if d is None: logger.warning("Skipping empty per_mux_status for run %s." \ "Needs fix in DB", v) continue if d.get('Status') == "SUCCESS":# FIXME what if key is missing? mux_id = d['mux_id'] stats_submission = d['StatsSubmission'] if stats_submission == "FAILED": extra_text += "StatsSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" archive_submission = d['ArchiveSubmission'] if archive_submission == "FAILED": extra_text += "ArchiveSubmission for mux_id {} from run {} " \ "has FAILED and out_dir is {} \n" \ .format(mux_id, v, last_analysis.get("out_dir")) extra_text += "\n" elif status == 'FAILED': extra_text += "Analysis for run {} has failed. \n".format(v) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" logger.info("Analysis for run %s has failed ", v) elif status == 'STARTED': analysis_id = last_analysis.get("analysis_id") analysis_epoch_time = isoformat_to_epoch_time(analysis_id+"+08:00") run_completion_time = timestamp/1000 dt1 = datetime.datetime.fromtimestamp(run_completion_time) dt2 = datetime.datetime.fromtimestamp(analysis_epoch_time) rd = dateutil.relativedelta.relativedelta(dt1, dt2) if rd.days > 3: extra_text += "Analysis for run {} was started {} days ago. "\ "Please check. \n".format(v, rd.days) extra_text += "Analysis_id is {} and out_dir is {} \n" \ .format(last_analysis.get("analysis_id"), \ last_analysis.get("out_dir")) extra_text += "\n" extra_text += "---------------------------------------------------\n" extra_text += "Report generation is completed" subject = "Report generation for bcl2fastq" if args.testing: subject = "Testing:" + subject if args.no_mail: print("Skipping sending of email with subject '{}' and following body:".format(subject)) print(extra_text) else: send_mail(subject, extra_text) logger.info("Report generation is completed")
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( '-n', "--dryrun", action='store_true', help="Don't actually update DB (best used in conjunction with -v -v)") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server. Don't do anything") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Exiting") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) #LOGGER.info("Database connection established") dbcol = connection.gisds.pipeline_runs site = get_site() epoch_now, epoch_then = generate_window(args.win) cursor = dbcol.find({ "ctime": { "$gt": epoch_then, "$lt": epoch_now }, "site": site }) LOGGER.info("Looping through {} jobs".format(cursor.count())) for job in cursor: dbid = job['_id'] # only set here to avoid code duplication below try: out_dir = job['execution']['out_dir'] except KeyError: out_dir = None # no execution dict means start a new analysis if not job.get('execution'): LOGGER.info('Job {} to be started'.format(dbid)) # determine out_dir and set in DB # out_dir_override will take precedence over generating out_dir with get_downstream_outdir function if job.get('out_dir_override'): out_dir = job.get('out_dir_override') if os.path.exists(out_dir): mux = os.path.basename(out_dir) if not args.dryrun: LOGGER.critical( "Analysis for {} already exists under {}. Please start the analysis manually" .format(mux, out_dir)) res = dbcol.update_one( {"_id": ObjectId(dbid)}, {"$set": { "execution.status": "MANUAL" }}) assert res.modified_count == 1, ( "Modified {} documents instead of 1".format( res.modified_count)) sys.exit(1) #assert not os.path.exists(out_dir), ("Direcotry already exists {}").format(out_dir) else: out_dir = get_downstream_outdir(job['requestor'], job['pipeline_name'], job['pipeline_version']) # Note, since execution (key) exists, accidental double # starts are prevented even before start time etc is # logged via flagfiles. No active logging here so that # flag files logging just works. if args.dryrun: LOGGER.info("Skipping dry run option") continue status = start_cmd_execution(job, site, out_dir, args.testing) if status: res = dbcol.update_one( {"_id": ObjectId(dbid)}, {"$set": { "execution.out_dir": out_dir }}) assert res.modified_count == 1, ( "Modified {} documents instead of 1".format( res.modified_count)) else: LOGGER.warning("Job {} could not be started".format(dbid)) elif job['execution'].get('status') == "MANUAL": continue elif list_starterflags( out_dir ): # out_dir cannot be none because it's part of execution dict LOGGER.info( 'Job {} in {} started but not yet logged as such in DB'.format( dbid, out_dir)) matches = list_starterflags(out_dir) assert len(matches) == 1, ( "Got several starter flags in {}".format(out_dir)) sflag = StarterFlag(matches[0]) assert sflag.dbid == str(dbid) set_started(dbcol, sflag.dbid, str(sflag.timestamp), dryrun=args.dryrun) os.unlink(sflag.filename) elif job['execution'].get('status') in ['STARTED', 'RESTART']: LOGGER.info( 'Job %s in %s set as re|started so checking on completion', dbid, out_dir) set_completion_if(dbcol, dbid, out_dir, dryrun=args.dryrun) else: # job complete LOGGER.debug('Job %s in %s should be completed', dbid, out_dir) LOGGER.info("Successful program exit")
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping MongoDB update") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({ "analysis.per_mux_status": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) run_list = {} mongo_db_ref = {} for record in results: run_number = record['run'] mux_list = {} for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): # sanity checks against corrupted DB entries if mux_status is None or mux_status.get('mux_id') is None: logger.warning("mux_status is None or incomplete for run %s analysis %s." " Requires fix in DB. Skipping entry for now.", \ run_number, analysis_id) continue if mux_status.get('Status', None) != "SUCCESS": logger.info( "MUX %s from %s is not SUCCESS. Skipping downstream analysis", mux_status['mux_id'], run_number) continue mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] mux_db_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format( analysis_count, mux_count) if mux_status.get('Status') == "SUCCESS" and \ mux_status.get('DownstreamSubmission') == "TODO": mongo_list = (mux_id, mux_db_id, analysis_id) mongo_db_ref.setdefault(run_number, []).append(mongo_list) mux_list.setdefault(mux_id, []).append(out_dir) for mux_id, out_dir in mux_list.items(): mux_list_success = mux_list[mux_id] #Check if MUX has been analyzed more then 1 time successfully if len(mux_list_success) > 1: body = "{} has been analyzed more than 1 time successfully..".format(mux_id) \ + "delegator is skipping the downstream analysis under {}. Please" \ "check the results.".format(mux_list_success) subject = "Downstream delegator skipped job submission for {}".format( mux_id) if args.testing: subject += " (testing)" send_mail(subject, body, toaddr='veeravallil', ccaddr=None) continue mux_info = (mux_id, out_dir) run_list.setdefault(run_number, []).append(mux_info) for run_num_flowcell, mux_list in run_list.items(): update_status = True pipeline_params_dict, mux_analysis_list = get_lib_details(run_num_flowcell, \ mux_list, args.testing) if not bool(pipeline_params_dict): logger.warning("pipeline params is empty for run num %s", run_num_flowcell) continue # Insert jobs into pipeline_runs collection for lib, lib_info in pipeline_params_dict.items(): job = {} rd_list = {} job['sample_cfg'] = {} job['sample_cfg'] = {} readunits_list = list() rd_list['samples'] = {} for outer_key, outer_value in lib_info.items(): if outer_key == 'readunits': for inner_key in lib_info[outer_key]: readunits_list.append(inner_key) job['sample_cfg'].update({outer_key: outer_value}) if outer_key == 'references_cfg': job['references_cfg'] = {} job['references_cfg'] = outer_value elif outer_key == 'cmdline': job['cmdline'] = {} job['cmdline'] = outer_value elif outer_key != 'readunits': job.update({outer_key: outer_value}) else: rd_list['samples'][lib] = readunits_list job['sample_cfg'].update(rd_list) if args.dry_run: logger.warning("Skipping job delegation for %s", \ lib) continue res = mongodb_insert_libjob(job, connection) if not res: logger.critical("Skipping rest of analysis job submission" \ "for %s from %s", lib, lib_info.run_id) subject = "Downstream delegator failed job submission for" \ "{}".format(lib) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(lib) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False logger.warning("Clean up the database for mux %s from run %s and ctime %s", \ lib_info.mux_id, lib_info.run_id, lib_info.ctime) mongodb_remove_muxjob(lib_info.mux_id, lib_info.run_id, \ lib_info.ctime, connection) break # Update runcomplete collection for delegated jobs if not args.dry_run and update_status: value = mongo_db_ref[run_num_flowcell] for mux_id, insert_id, analysis_id in value: if mux_id in mux_analysis_list: logger.info("Update mongoDb pipeline_runs for mux_id %s from run number %s" \ "and analysis_id is %s", mux_id, run_num_flowcell, analysis_id) res = mongodb_update_runcomplete(run_num_flowcell, analysis_id, mux_id, \ insert_id, connection) if not res: logger.critical("Skipping rest of analysis job submission for %s" \ "from %s", mux_id, run_num_flowcell) subject = "Downstream delegator failed job submission for {}" \ .format(mux_id) if args.testing: subject += " (testing)" body = "Downstream delegator failed to insert job submission for" \ "{}".format(mux_id) send_mail(subject, body, toaddr='veeravallil', ccaddr=None) update_status = False break connection.close()
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) #Check if pipeline scripts are available assert os.path.exists(BWA) assert os.path.exists(RNA) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_triggers = 0 results = db.find({ "analysis.Status": "SUCCESS", "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] analysis = record['analysis'] # Downstream analysis will not be intiated for Novogene (NG00*) runs if "NG00" in run_number: continue for analysis in record['analysis']: out_dir = analysis.get("out_dir") #Check if bcl2Fastq is completed successfully if 'Status' in analysis and analysis.get("Status") == "SUCCESS": if not os.path.exists(out_dir): logger.critical( "Following directory listed in DB doesn't exist: %s", out_dir) continue #Check if downstream analysis has been started if not os.path.exists( os.path.join(out_dir, "config_casava-1.8.2.txt".format())): logger.info("Start the downstream analysis at %s", out_dir) os.makedirs(os.path.join(out_dir, LOG_DIR_REL), exist_ok=True) #generate config file config_cmd = [CONFIG, '-r', run_number] try: f = open( os.path.join(out_dir, "config_casava-1.8.2.txt".format()), "w") _ = subprocess.call(config_cmd, stderr=subprocess.STDOUT, stdout=f) except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(config_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") sys.exit(1) #generic sample sheet samplesheet_cmd = 'cd {} && {} -r {}'.format( out_dir, SAMPLESHEET, run_number) try: _ = subprocess.check_output(samplesheet_cmd, shell=True) except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, ' '.join(samplesheet_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") sys.exit(1) #Generate and Submit BWA and RNAseq mapping pipeline _, runid, _ = get_machine_run_flowcell_id(run_number) generic_samplesheet = (os.path.join( out_dir, runid + "_SampleSheet.csv")) if os.path.exists( os.path.join(out_dir, generic_samplesheet)): dirs = os.path.join(out_dir, "out") cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \ .format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, \ generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG)) cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \ .format(RNA, run_number, out_dir, os.path.join(out_dir, \ generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG)) if args.dry_run: logger.warning("Skipped following run: %s", cmd) #Remove config txt os.remove( os.path.join( out_dir, "config_casava-1.8.2.txt".format())) else: try: #ananlysisReport into submission log with open(os.path.join(out_dir, SUBMISSIONLOG), 'w') as fh: fh.write(cmd) _ = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError as e: logger.fatal( "The following command failed with return code %s: %s", e.returncode, cmd) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") #send_status_mail send_status_mail(PIPELINE_NAME, False, analysis_id, \ os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log")) sys.exit(1) num_triggers += 1 if args.break_after_first: logger.info("Stopping after first sequencing run") sys.exit(0) else: #send_status_mail logger.info("samplesheet.csv missing for %s under %s", run_number, out_dir) send_status_mail(PIPELINE_NAME, False, analysis_id, \ os.path.abspath(out_dir)) elif analysis.get("Status") == "FAILED": logger.debug("BCL2FASTQ FAILED for %s under %s", run_number, out_dir) # close the connection to MongoDB connection.close() logger.info("%s dirs with triggers", num_triggers)
def main(): """main function """ stats_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping sending of emails") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_emails = 0 results = db.find({ "analysis": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: #mail_to = '*****@*****.**' mail_to = '*****@*****.**' for record in results: run_number = record['run'] #print(run_number) for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): if args.dry_run: logger.warning( "Skipping analysis %s run %s MUX %s" " with email_sent %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('email_sent', None)) continue if mux_status.get('email_sent', None): continue # for all others: send email and update db email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format( analysis_count, mux_count) mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if mux_status.get('Status', None) == "FAILED": logger.info("bcl2fastq for MUX %s from %s failed. ", mux_status['mux_id'], run_number) subject = 'bcl2fastq: ' + mux_id body = "bcl2fastq for {} from {} failed.".format( mux_id, run_number) body += "\n\nPlease check the logs under {}".format( out_dir + "/logs") send_mail(subject, body, mail_to, ccaddr="rpd") num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) elif mux_status.get('Status', None) == "SUCCESS": muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir')) summary = path_to_url( os.path.join(muxdir, 'html/index.html')) body = "bcl2fastq for {} from {} successfully completed.".format( mux_id, run_number) body += "\n\nA summary can be found at {}".format(summary) body += "\n\nFastQ files are located in {}".format(muxdir) body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)" confinfo = os.path.join(out_dir, 'conf.yaml') #print(body) if not os.path.exists(confinfo): logger.fatal( "conf info '%s' does not exist" " under run directory.", confinfo) continue subject = 'bcl2fastq' if args.testing: subject += ' testing' if is_devel_version(): subject += ' devel' subject += ': ' + mux_id send_mail(subject, body, mail_to, ccaddr="rpd") # mail_to already set if not args.testing and not is_devel_version(): requestor = get_requestor(mux_id, confinfo) if requestor is not None: #requestor = "rpd" #subject += " (instead of requestor)" #send_mail(subject, body, requestor, ccaddr="rpd") send_mail(subject, body, requestor) num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) # close the connection to MongoDB connection.close() logger.info("%d emails sent", num_emails)
def main(): """main function """ bcl2fastq_qc_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "bcl2fastq_qc.py")) assert os.path.exists(bcl2fastq_qc_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('--no-mail', action='store_true', help="Don't send email on detected failures") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping DB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({"analysis.Status": "SUCCESS", "analysis.QC_status" : {"$exists": 0}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) for record in results: run_number = record['run'] analysis = record['analysis'] #for analysis in record['analysis']: for (analysis_count, analysis) in enumerate(record['analysis']): out_dir = analysis["out_dir"] analysis_id = analysis['analysis_id'] status = analysis['Status'] #Check if bcl2Fastq is completed successfully if analysis['Status'] != "SUCCESS": logger.info("Analysis is not completed successfully under %s", out_dir) continue if not os.path.exists(out_dir): logger.critical("Following directory listed in DB doesn't exist: %s", out_dir) continue if args.testing: bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.test.txt") else: bcl2fastq_qc_out = os.path.join(out_dir, "bcl2fastq_qc.txt") if os.path.exists(bcl2fastq_qc_out): logger.critical("Refusing to overwrite existing file %s. Skipping QC check", bcl2fastq_qc_out) continue bcl2fastq_qc_cmd = [bcl2fastq_qc_script, '-d', out_dir] if args.no_mail: bcl2fastq_qc_cmd.append("--no-mail") if args.dry_run: logger.warning("Skipped following run: %s", out_dir) continue try: QC_status = "analysis.{}.QC_status".format(analysis_count) status = subprocess.check_output(bcl2fastq_qc_cmd, stderr=subprocess.STDOUT) if "QC_FAILED" in str(status): db.update({"run": run_number, 'analysis.analysis_id' : analysis_id}, {"$set": {QC_status: "FAILED"}}) logger.info("Demux QC failed for run: %s", run_number) else: db.update({"run": run_number, 'analysis.analysis_id' : analysis_id}, {"$set": {QC_status: "SUCCESS"}}) logger.info("Demux QC SUCCESS for run: %s", run_number) with open(bcl2fastq_qc_out, 'w') as fh: fh.write(status.decode()) except subprocess.CalledProcessError as e: logger.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(bcl2fastq_qc_cmd)) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") connection.close()