def start_cmd_execution(record, site, out_dir, testing): """ Start the analysis """ pipeline_params = " " extra_conf = " --extra-conf " extra_conf += "db-id:" + str(record['_id']) extra_conf += " requestor:" + record['requestor'] # sample_cfg and references_cfg references_cfg = "" sample_cfg = "" for outer_key, outer_value in record.items(): if outer_key == 'sample_cfg': LOGGER.info("write temp sample_config") with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', prefix='sample_cfg_', \ delete=False) as fh: sample_cfg = fh.name yaml.dump(outer_value, fh, default_flow_style=False) elif outer_key == 'references_cfg': LOGGER.info("write temp reference_config") with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', prefix='references_cfg_', \ delete=False) as fh: references_cfg = fh.name yaml.dump(outer_value, fh, default_flow_style=False) elif outer_key == 'cmdline': LOGGER.info("pipeline_cmd") for key, value in outer_value.items(): pipeline_params += " --" + key + " " + value #pipeline path for production and testing if is_devel_version(): pipeline_version = "" else: pipeline_version = record['pipeline_version'].split(".")[0] pipeline_path = get_pipeline_path(site, record['pipeline_name'], \ pipeline_version) pipeline_script = os.path.join(pipeline_path, (os.path.split(pipeline_path)[-1] + ".py")) if not pipeline_script: LOGGER.critical("There seems to be trouble in executing cmd_line for JobId: {}".format(str(record['_id']))) pipeline_cmd = pipeline_script + " --sample-cfg " + sample_cfg + " -o " + out_dir + " --db-logging y" if not sample_cfg: LOGGER.critical("Job doesn't have sample_cfg %s", str(record['_id'])) sys.exit(1) if references_cfg: ref_params = " --references-cfg " + references_cfg pipeline_cmd += ref_params if pipeline_params: pipeline_cmd += pipeline_params if extra_conf: pipeline_cmd += extra_conf try: LOGGER.info(pipeline_cmd) _ = subprocess.check_output(pipeline_cmd, stderr=subprocess.STDOUT, shell=True) return True except subprocess.CalledProcessError as e: LOGGER.fatal("The following command failed with return code %s: %s", e.returncode, ' '.join(pipeline_cmd)) LOGGER.fatal("Output: %s", e.output.decode()) return False
def email_non_bcl(libraryId, runId): """send email for non-bcl libraries """ if is_devel_version(): toaddr = email_for_user() else: toaddr = "*****@*****.**" subject = "bcl2fastq conversion not required for {} from run {}.".format( libraryId, runId) body = subject + "\n" + "Kindly start custom analysis manually. Thanks." send_mail(subject, body, toaddr=toaddr, pass_exception=False)
def get_pipeline_path(site, pipeline_name, pipeline_version): """ get the pipeline path """ basedir_map = PRODUCTION_PIPELINE_VERSION if site not in basedir_map: raise ValueError(site) if is_devel_version(): basedir = basedir_map[site]['devel'] else: basedir = basedir_map[site]['production'] pipeline_path = os.path.join(basedir, pipeline_version, pipeline_name) return pipeline_path
def email_qcfails(subject, body): """email qc failures """ if is_devel_version(): toaddr = email_for_user() ccaddr = None else: toaddr = config['email'] ccaddr = "*****@*****.**" send_mail(subject, body, toaddr=toaddr, ccaddr=ccaddr, pass_exception=False)
def get_pipeline_path(site, pipeline_name, pipeline_version): """ get the pipeline path """ basedir_map = PIPELINE_PATH_BASE if site not in basedir_map: raise ValueError(site) if is_devel_version(): basedir = basedir_map[site]['devel'] pipeline_path = os.path.join(basedir, pipeline_name) return pipeline_path else: basedir = basedir_map[site]['production'] pipeline_path = glob.glob(os.path.join(basedir, "*"+pipeline_version, pipeline_name)) return pipeline_path[0]
def get_bcl2fastq_outdir(runid_and_flowcellid): """where to write bcl2fastq output to """ if is_devel_version(): basedir = site_cfg['bcl2fastq_outdir_base']['devel'] else: basedir = site_cfg['bcl2fastq_outdir_base']['production'] machineid, runid, flowcellid = get_machine_run_flowcell_id( runid_and_flowcellid) outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format( basedir=basedir, mid=machineid, rid=runid, fid=flowcellid, ts=generate_timestamp()) return outdir
def get_downstream_outdir(requestor, pipeline_version, pipeline_name, site=None, basedir_map=OUTDIR_BASE, base_pipelinedir_map=PRODUCTION_PIPELINE_VERSION): """generate downstream output directory """ if not site: site = get_site() if site not in basedir_map: raise ValueError(site) if site not in base_pipelinedir_map: raise ValueError(site) if is_devel_version(): basedir = basedir_map[site]['devel'] if not pipeline_version: pipeline_version = base_pipelinedir_map[site]['devel'] else: basedir = basedir_map[site]['production'] if not pipeline_version: pipeline_version = os.readlink(base_pipelinedir_map[site]['production']) outdir = "{basedir}/{requestor}/{pversion}/{pname}/{ts}".format( basedir=basedir, requestor=requestor, pversion=pipeline_version, pname=pipeline_name, ts=generate_timestamp()) return outdir
def get_bcl2fastq_outdir(runid_and_flowcellid, site=None, basedir_map=OUTDIR_BASE): """FIXME:add-doc """ if not site: site = get_site() if site not in basedir_map: raise ValueError(site) if is_devel_version(): basedir = basedir_map[site]['devel'] else: basedir = basedir_map[site]['production'] machineid, runid, flowcellid = get_machine_run_flowcell_id( runid_and_flowcellid) outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format( basedir=basedir, mid=machineid, rid=runid, fid=flowcellid, ts=generate_timestamp()) return outdir
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) default = 34 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function ccaddr = None else: mail_to = '*****@*****.**' ccaddr = "rpd" runs_from_db(db, mail_to, ccaddr, args.win)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) # only makes sense for production if run as cron if is_devel_version(): k = 'devel' else: k = 'production' basedir = site_cfg['downstream_outdir_base'][k] dirglob = DOWNSTREAM_OUTDIR_TEMPLATE.format( basedir=basedir, user="******", pipelineversion="*", pipelinename="*", timestamp="*") logger.debug("dirglob is %s", dirglob) for flagfile in glob.glob(os.path.join( dirglob, WORKFLOW_COMPLETION_FLAGFILE)): dir = os.path.abspath(os.path.dirname(flagfile)) logger.info("Starting staging out of %s", dir) try: s3prefix = S3_BUCKET s3prefix = os.path.join(s3prefix, os.path.normpath(os.path.join(os.path.relpath(dir, basedir), ".."))) cmd = [STAGE_OUT_WORKER, '-p', s3prefix, '-r', dir] _ = subprocess.check_output(cmd) except subprocess.CalledProcessError as e: logger.fatal("%s failed with exit code %s: %s. Will try to continue", ' '.join(cmd), e.returncode, e.output) continue
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dryrun", action='store_true', help="Don't run anything") default = 84 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) default = 60 parser.add_argument( '-d', '--days', type=int, default=default, help="Bcl analysis not older than days(default {})".format(default)) default = 60 parser.add_argument( '-r', '--tardays', type=int, default=default, help="tar ball not older than days(default {})".format(default)) parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Skipping archival steps") sys.exit(1) if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: mail_to = 'rpd' connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) results = db.find({ "run": { "$regex": "^((?!NG00).)*$" }, "raw-delete": { "$exists": False }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) LOGGER.info("Looping through %s jobs", results.count()) trigger = 0 for record in results: try: run_num = record['run'] except KeyError: run_num = None if not record.get('deletion'): #Check run_status res = check_run_status(record, args.days) if res: LOGGER.info("Create tar ball %s ", run_num) if args.dryrun: LOGGER.warning("Skipping Create tar ball %s ", run_num) continue create_run_tar(db, run_num) trigger = 1 elif record['deletion'].get('tar'): res = check_tar_status_and_delete(db, record, args.tardays, dryrun=args.dryrun) if res: trigger = 1 if args.break_after_first and trigger == 1: LOGGER.info("Stopping after first run") break
def main(): """main function """ stats_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath( os.path.join(os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping sending of emails") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_emails = 0 results = db.find({ "analysis": { "$exists": True }, "timestamp": { "$gt": epoch_back, "$lt": epoch_present } }) logger.info("Found %s runs", results.count()) if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: #mail_to = '*****@*****.**' mail_to = '*****@*****.**' for record in results: run_number = record['run'] #print(run_number) for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): if args.dry_run: logger.warning( "Skipping analysis %s run %s MUX %s" " with email_sent %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('email_sent', None)) continue if mux_status.get('email_sent', None): continue # for all others: send email and update db email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format( analysis_count, mux_count) mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if mux_status.get('Status', None) == "FAILED": logger.info("bcl2fastq for MUX %s from %s failed. ", mux_status['mux_id'], run_number) subject = 'bcl2fastq: ' + mux_id body = "bcl2fastq for {} from {} failed.".format( mux_id, run_number) body += "\n\nPlease check the logs under {}".format( out_dir + "/logs") send_mail(subject, body, mail_to, ccaddr="rpd") num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) elif mux_status.get('Status', None) == "SUCCESS": muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir')) summary = path_to_url( os.path.join(muxdir, 'html/index.html')) body = "bcl2fastq for {} from {} successfully completed.".format( mux_id, run_number) body += "\n\nA summary can be found at {}".format(summary) body += "\n\nFastQ files are located in {}".format(muxdir) body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)" confinfo = os.path.join(out_dir, 'conf.yaml') #print(body) if not os.path.exists(confinfo): logger.fatal( "conf info '%s' does not exist" " under run directory.", confinfo) continue subject = 'bcl2fastq' if args.testing: subject += ' testing' if is_devel_version(): subject += ' devel' subject += ': ' + mux_id send_mail(subject, body, mail_to, ccaddr="rpd") # mail_to already set if not args.testing and not is_devel_version(): requestor = get_requestor(mux_id, confinfo) if requestor is not None: #requestor = "rpd" #subject += " (instead of requestor)" #send_mail(subject, body, requestor, ccaddr="rpd") send_mail(subject, body, requestor) num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) # close the connection to MongoDB connection.close() logger.info("%d emails sent", num_emails)
def main(): """main function """ stats_upload_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "bcl_stats_upload.py")) assert os.path.exists(stats_upload_script) archive_upload_script = os.path.abspath(os.path.join( os.path.dirname(sys.argv[0]), "sra_fastq_upload.py")) assert os.path.exists(archive_upload_script) parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test server") default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) user_name = getpass.getuser() if user_name != "userrig": logger.warning("Not a production user. Skipping sending of emails") sys.exit(0) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete epoch_present, epoch_back = generate_window(args.win) num_emails = 0 results = db.find({"analysis" : {"$exists": True}, "timestamp": {"$gt": epoch_back, "$lt": epoch_present}}) logger.info("Found %s runs", results.count()) if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function else: #mail_to = '*****@*****.**' mail_to = '*****@*****.**' for record in results: run_number = record['run'] #print(run_number) for (analysis_count, analysis) in enumerate(record['analysis']): analysis_id = analysis['analysis_id'] per_mux_status = analysis.get("per_mux_status", None) if per_mux_status is None: continue for (mux_count, mux_status) in enumerate(per_mux_status): if args.dry_run: logger.warning("Skipping analysis %s run %s MUX %s" " with email_sent %s", analysis_id, run_number, mux_status['mux_id'], mux_status.get('email_sent', None)) continue if mux_status.get('email_sent', None): continue # for all others: send email and update db email_sent_query = "analysis.{}.per_mux_status.{}.email_sent".format( analysis_count, mux_count) mux_id = mux_status['mux_id'] out_dir = analysis['out_dir'] if mux_status.get('Status', None) == "FAILED": logger.info("bcl2fastq for MUX %s from %s failed. ", mux_status['mux_id'], run_number) subject = 'bcl2fastq: ' + mux_id body = "bcl2fastq for {} from {} failed.".format(mux_id, run_number) body += "\n\nPlease check the logs under {}".format(out_dir + "/logs") send_mail(subject, body, mail_to, ccaddr="rpd") num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) elif mux_status.get('Status', None) == "SUCCESS": muxdir = os.path.join(out_dir, 'out', mux_status.get('mux_dir')) summary = path_to_url(os.path.join(muxdir, 'html/index.html')) body = "bcl2fastq for {} from {} successfully completed.".format( mux_id, run_number) body += "\n\nA summary can be found at {}".format(summary) body += "\n\nFastQ files are located in {}".format(muxdir) body += "\n\nData can also be downloaded from GIS-SRA (once archival is complete)" confinfo = os.path.join(out_dir, 'conf.yaml') #print(body) if not os.path.exists(confinfo): logger.fatal("conf info '%s' does not exist" " under run directory.", confinfo) continue subject = 'bcl2fastq' if args.testing: subject += ' testing' if is_devel_version(): subject += ' devel' subject += ': ' + mux_id send_mail(subject, body, mail_to, ccaddr="rpd")# mail_to already set if not args.testing and not is_devel_version(): requestor = get_requestor(mux_id, confinfo) if requestor is not None: #requestor = "rpd" #subject += " (instead of requestor)" #send_mail(subject, body, requestor, ccaddr="rpd") send_mail(subject, body, requestor) num_emails += 1 update_mongodb_email(db, run_number, analysis_id, email_sent_query, True) # close the connection to MongoDB connection.close() logger.info("%d emails sent", num_emails)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") default = "NSCC" parser.add_argument('-s', "--site", default=default, help="site information (default = {})".format(default), choices=['NSCC', 'GIS']) default = 14 parser.add_argument('-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) parser.add_argument('-t', "--testing", action='store_true', help="Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not is_production_user(): logger.warning("Not a production user. Skipping MongoDB update") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) if is_devel_version() or args.testing: mail_to = 'veeravallil'# domain added in mail function else: mail_to = '*****@*****.**' run_records = runs_from_db(connection, args.testing, args.win) trigger = 0 for run in run_records: for mux, mux_info in run.items(): if args.dry_run: logger.warning("Skipping job delegation for %s from %s", mux, mux_info[0]) continue #Check if mux data is getting transferred find = check_mux_data_transfer_status(connection, mux_info) if find: continue res = start_data_transfer(connection, mux, mux_info, args.site, mail_to) if res: trigger = 1 if args.break_after_first and trigger == 1: logger.info("Stopping after first run") break # close the connection to MongoDB connection.close()
def start_data_transfer(connection, mux, mux_info, site, mail_to): """ Data transfer from source to destination """ run_number, downstream_id, analysis_id, bcl_path = mux_info fastq_src = os.path.join(bcl_path, "out", "Project_"+mux) bcl_dir = os.path.basename(bcl_path) if is_devel_version(): fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \ mux, run_number, bcl_dir) yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['devel'], \ mux, mux +"_multisample.yaml") else: fastq_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \ mux, run_number, bcl_dir) yaml_dest = os.path.join(novogene_conf['FASTQ_DEST'][site]['production'], \ mux, mux+ "_multisample.yaml") rsync_cmd = 'rsync -va %s %s' % (fastq_src, fastq_dest) if not os.path.exists(fastq_dest): try: os.makedirs(fastq_dest) logger.info("data transfer started for %s from %s", mux, run_number) st_time = generate_timestamp() update_downstream_mux(connection, run_number, analysis_id, downstream_id, \ "COPYING_" + st_time) _ = subprocess.check_output(rsync_cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: body = "The following command failed with return code {}: {}". \ format(e.returncode, rsync_cmd) subject = "{} from {}: SG10K data transfer ({}) failed".format(mux, run_number, site) logger.fatal(body) logger.fatal("Output: %s", e.output.decode()) logger.fatal("Exiting") #Send_mail send_mail(subject, body, toaddr=mail_to, ccaddr=None) #Delete the partial info being rsync update_downstream_mux(connection, run_number, analysis_id, downstream_id, "ERROR") sys.exit(1) #Update the mongoDB for successful data transfer sample_info = get_mux_details(run_number, mux, fastq_dest) #Touch rsync complete file with open(os.path.join(fastq_dest, "rsync_complete.txt"), "w") as f: f.write("") with open(yaml_dest, 'w') as fh: yaml.dump(dict(sample_info), fh, default_flow_style=False) job = {} job['sample_cfg'] = {} for outer_key, outer_value in sample_info.items(): ctime, _ = generate_window(1) job['sample_cfg'].update({outer_key:outer_value}) job['site'] = site job['pipeline_name'] = 'custom/SG10K' job['pipeline_version'] = novogene_conf['PIPELINE_VERSION'] job['ctime'] = ctime job['requestor'] = 'userrig' if is_devel_version(): novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['devel'], \ mux) else: novogene_outdir = os.path.join(novogene_conf['NOVOGENE_OUTDIR'][site]['production'], mux) job['out_dir_override'] = novogene_outdir logger.info("Data transfer completed successfully for %s from %s", mux, run_number) job_id = insert_muxjob(connection, mux, job) update_downstream_mux(connection, run_number, analysis_id, downstream_id, job_id) subject = "{} from {}: SG10K data transfer ({}) completed".format(mux, run_number, site) body = "Data transfer successfully completed for {} from {}".format(mux, run_number) send_mail(subject, body, toaddr=mail_to, ccaddr=None) return True else: logger.critical("Mux %s from %s directory already exists under %s", mux, \ run_number, fastq_dest) return False
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('-1', "--break-after-first", action='store_true', help="Only process first run returned") parser.add_argument('-n', "--dry-run", action='store_true', help="Don't run anything") default = 34 parser.add_argument( '-w', '--win', type=int, default=default, help="Number of days to look back (default {})".format(default)) default = 75 parser.add_argument( '-d', '--days', type=int, default=default, help="Bcl analysis not older than days(default {})".format(default)) parser.add_argument( '-t', "--testing", action='store_true', help= "Use MongoDB test-server here and when calling bcl2fastq wrapper (-t)") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all LOGGER.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if not is_production_user(): LOGGER.warning("Not a production user. Skipping archival steps") sys.exit(1) connection = mongodb_conn(args.testing) if connection is None: sys.exit(1) db = connection.gisds.runcomplete if is_devel_version() or args.testing: mail_to = 'veeravallil' # domain added in mail function else: mail_to = 'rpd' run_records = runs_from_db(db, args.days, args.win) for run in run_records: if args.dry_run: LOGGER.info("Skipping dryrun option %s", run) continue purge(db, run, mail_to) if args.break_after_first: LOGGER.info("Stopping after first sequencing run") break