Exemple #1
0
def run_folder_for_run_id(runid_and_flowcellid, site=None, basedir_map=SEQDIR_BASE):
    """runid has to contain flowcell id

    AKA $RAWSEQDIR

    run_folder_for_run_id('HS004-PE-R00139_BC6A7HANXX')
    >>> "/mnt/seq/userrig/HS004/HS004-PE-R00139_BC6A7HANXX"
    if machineid eq MS00
    """

    if not site:
        site = get_site()
    if site not in basedir_map:
        raise ValueError(site)
    basedir = basedir_map[site]

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    if machineid.startswith('MS00'):
        # FIXME untested and unclear for NSCC
        rundir = "{}/{}/MiSeqOutput/{}_{}".format(basedir, machineid, runid, flowcellid)
    else:
        rundir = "{}/{}/{}_{}".format(basedir, machineid, runid, flowcellid)

    return rundir
Exemple #2
0
def run_folder_for_run_id(runid_and_flowcellid):
    """runid has to contain flowcell id

    AKA $RAWSEQDIR

    run_folder_for_run_id('HS004-PE-R00139_BC6A7HANXX')
    >>> "/mnt/seq/userrig/HS004/HS004-PE-R00139_BC6A7HANXX"
    if machineid eq MS00
    """

    basedir = site_cfg['bcl2fastq_seqdir_base']

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    if machineid.startswith('MS00'):  # FIXME needs proper cfg handling
        # FIXME untested and unclear for NSCC
        rundir = "{}/{}/MiSeqOutput/{}_{}".format(basedir, machineid, runid,
                                                  flowcellid)
    else:
        if machineid.startswith('NG0'):  # FIXME needs proper cfg handling
            basedir = basedir.replace("userrig", "novogene")
        rundir = "{}/{}/{}_{}".format(basedir, machineid, runid, flowcellid)

    return rundir
def runs_from_db(connection, testing, win=14):
    """Get the runs from pipeline_run collections"""
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(win)
    results = db.find({"run" : {"$regex" : "^NG00"},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %d runs", results.count())
    for record in results:
        run_number = record['run']
        logger.debug("record: %s", record)
        if not record.get('analysis'):
            continue
        # Check if Novogene run_mode
        _, run_id, _ = get_machine_run_flowcell_id(run_number)
        if testing:
            rest_url = rest_services['run_details']['testing'].replace("run_num", run_id)
        else:
            rest_url = rest_services['run_details']['production'].replace("run_num", run_id)
        response = requests.get(rest_url)
        if response.status_code != requests.codes.ok:
            response.raise_for_status()
        rest_data = response.json()
        sg10k_lib_list = get_sg10_lib_list(rest_data)
        run_records = {}
        for (analysis_count, analysis) in enumerate(record['analysis']):
            analysis_id = analysis['analysis_id']
            per_mux_status = analysis.get("per_mux_status", None)
            if per_mux_status is None:
                continue
            for (mux_count, mux_status) in enumerate(per_mux_status):
                # sanity checks against corrupted DB entries
                if mux_status is None or mux_status.get('mux_id') is None:
                    logger.warning("mux_status is None or incomplete for run %s analysis %s."
                                   " Requires fix in DB. Skipping entry for now.", \
                                    run_number, analysis_id)
                    continue
                if mux_status.get('Status', None) != "SUCCESS":
                    continue
                mux_id = mux_status['mux_id']
                out_dir = analysis['out_dir']
                if not os.path.exists(out_dir):
                    logger.warning("Direcotry does not exists %s", out_dir)
                    continue
                downstream_id = "analysis.{}.per_mux_status.{}.DownstreamSubmission".format(
                    analysis_count, mux_count)
                if mux_status.get('Status') == "SUCCESS" and \
                    mux_status.get('DownstreamSubmission') == "TODO":
                    mux_info = (run_number, downstream_id, analysis_id, out_dir)
                    if mux_id in run_records:
                        logger.info("MUX %s from %s has been analyzed more than 1 time \
                            succeessfully, please check", mux_id, run_number)
                        del run_records[mux_id]
                    elif mux_id in sg10k_lib_list:
                        run_records[mux_id] = mux_info
        if run_records:
            yield run_records
Exemple #4
0
def get_sample_info(child, rows, mux_analysis_list, mux_id, fastq_data_dir, \
    run_num_flowcell, sample_info):
    """Collects sample info from ELM JOSN
    """
    sample_cfg = {}
    site = get_site()
    ctime, _ = generate_window(1)
    _, _, flowcellid = get_machine_run_flowcell_id(run_num_flowcell)
    mux_analysis_list.add(mux_id)
    sample_id = child['libraryId']
    sample_cfg['requestor'] = rows['requestor']
    sample_cfg['ctime'] = ctime
    sample_cfg['site'] = site
    try:
        sample_cfg['pipeline_name'] = legacy_mapper['pipeline_mapper'][
            child['Analysis']]
    except KeyError as e:
        sample_cfg['pipeline_name'] = child['Analysis']
        logger.warning(str(e) + " Pipeline not mappped to newer version")
        return sample_info
    pipeline_version = get_pipeline_version(child['pipeline_version'] \
        if 'pipeline_version' in rows else None)
    sample_cfg['pipeline_version'] = pipeline_version
    #sample_cfg['pipeline_params'] = 'params'
    ref_info = get_reference_info(child['Analysis'], \
        sample_cfg['pipeline_version'], child['genome'])
    if not ref_info:
        logger.info("ref_info not available")
        return sample_info
    cmdline_info = get_cmdline_info(child)
    sample_cfg['references_cfg'] = ref_info
    if cmdline_info:
        sample_cfg['cmdline'] = cmdline_info

    readunits_dict = {}
    status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\
        rows['laneId'])
    if status:
        ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\
            rows['laneId'], None, fq1, fq2)
        k = key_for_readunit(ru)
        readunits_dict[k] = dict(ru._asdict())
        sample_cfg['readunits'] = readunits_dict
        if sample_info.get(sample_id, {}).get('readunits', {}):
            sample_info[sample_id]['readunits'].update(readunits_dict)
        else:
            sample_info[sample_id] = sample_cfg
    return sample_info
def runs_from_db(db, mail_to, ccaddr, win=34):
    """Get the runs from pipeline_run collections"""
    epoch_present, epoch_back = generate_window(win)
    results = db.find({"analysis" : {"$exists": False},
                       "timestamp": {"$gt": epoch_back, "$lt": epoch_present}})
    logger.info("Found %d runs for last %s days", results.count(), win)
    mail = False
    subject = "Runs with missing ELM information"
    body = "Dear NGSP, " + "\n"
    body += subject + " for the following runs. Please include in the ELM." + "\n"
    for record in results:
        logger.debug("record: %s", record)
        _, runid, _ = get_machine_run_flowcell_id(record.get('run'))
        rest_data = get_rest_data(runid)
        if not rest_data.get('runId'):
            body += record.get('run')+ "\n"
            mail = True
    if mail:
        send_mail(subject, body, toaddr=mail_to, ccaddr=ccaddr)
Exemple #6
0
def get_bcl2fastq_outdir(runid_and_flowcellid):
    """where to write bcl2fastq output to
    """

    if is_devel_version():
        basedir = site_cfg['bcl2fastq_outdir_base']['devel']
    else:
        basedir = site_cfg['bcl2fastq_outdir_base']['production']

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format(
        basedir=basedir,
        mid=machineid,
        rid=runid,
        fid=flowcellid,
        ts=generate_timestamp())
    return outdir
Exemple #7
0
def get_bcl2fastq_outdir(runid_and_flowcellid, site=None, basedir_map=OUTDIR_BASE):
    """FIXME:add-doc
    """

    if not site:
        site = get_site()
    if site not in basedir_map:
        raise ValueError(site)

    if is_devel_version():
        basedir = basedir_map[site]['devel']
    else:
        basedir = basedir_map[site]['production']

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format(
        basedir=basedir, mid=machineid, rid=runid, fid=flowcellid,
        ts=generate_timestamp())
    return outdir
Exemple #8
0
def get_lib_details(run_num_flowcell, mux_list, testing):
    """Lib info collection from ELM per run
    """
    _, run_num, _ = get_machine_run_flowcell_id(run_num_flowcell)
    # Call rest service to get component libraries
    if testing:
        rest_url = rest_services['run_details']['testing'].replace(
            "run_num", run_num)
        logger.info("development server")
    else:
        rest_url = rest_services['run_details']['production'].replace(
            "run_num", run_num)
        logger.info("production server")
    response = requests.get(rest_url)
    if response.status_code != requests.codes.ok:
        response.raise_for_status()
    rest_data = response.json()
    logger.debug("rest_data from %s: %s", rest_url, rest_data)
    sample_info = {}
    mux_analysis_list = set()
    if rest_data.get('runId') is None:
        logger.info("JSON data is empty for run num %s", run_num)
        return sample_info
    for mux_id, out_dir in mux_list:
        fastq_data_dir = os.path.join(out_dir[0], 'out', "Project_" + mux_id)
        if os.path.exists(fastq_data_dir):
            for rows in rest_data['lanes']:
                if mux_id in rows['libraryId']:
                    logger.info("Checking the pipeline params for %s from run number %s", \
                        rows['libraryId'], run_num)
                    if "MUX" in rows['libraryId']:
                        for child in rows['Children']:
                            if child['Analysis'] != "Sequence only":
                                sample_info = get_sample_info(child, rows, mux_analysis_list, \
                                    mux_id, fastq_data_dir, run_num_flowcell, sample_info)
                    else:
                        if rows['Analysis'] != "Sequence only":
                            sample_info = get_sample_info(rows, rows, mux_analysis_list, \
                                mux_id, fastq_data_dir, run_num_flowcell, sample_info)
    return sample_info, mux_analysis_list
def get_mux_details(run_number, mux_id, fastq_dest):
    """Fastq details etc for a MUX
    """
    sample_list = glob.glob(os.path.join(fastq_dest, "*"+ mux_id, 'Sample_*'))
    _, run_id, flowcell_id = get_machine_run_flowcell_id(run_number)
    readunits_dict = {}
    samples_dict = {}
    for sample_dir in sample_list:
        readunits = readunits_for_sampledir(sample_dir)
        # insert run id and flowcell id which can't be inferred from filename
        for ru in readunits.values():
            ru['run_id'] = run_id
            ru['flowcell_id'] = flowcell_id
        lib_ids = [ru['library_id'] for ru in readunits.values()]
        assert len(set(lib_ids)) == 1
        sample_name = lib_ids[0]
        assert sample_name not in samples_dict
        samples_dict[sample_name] = list(readunits.keys())
        for k, v in readunits.items():
            assert k not in readunits_dict
            readunits_dict[k] = v
    return {'samples': samples_dict,
        'readunits': readunits_dict}
Exemple #10
0
def get_lib_details(run_num_flowcell, mux_list, testing):
    """Lib info collection from ELM per run
    """
    _, run_num, flowcellid = get_machine_run_flowcell_id(run_num_flowcell)
    # Call rest service to get component libraries
    if testing:
        print(run_num)
        rest_url = rest_services['run_details']['testing'].replace("run_num", run_num)
        logger.info("development server")
    else:
        rest_url = rest_services['run_details']['production'].replace("run_num", run_num)
        logger.info("production server")
    response = requests.get(rest_url)
    if response.status_code != requests.codes.ok:
        response.raise_for_status()
    rest_data = response.json()
    logger.debug("rest_data from %s: %s", rest_url, rest_data)
    sample_info = {}
    if rest_data.get('runId') is None:
        logger.info("JSON data is empty for run num %s", run_num)
        return sample_info
    for mux_id, out_dir in mux_list:
        fastq_data_dir = os.path.join(out_dir[0], 'out', "Project_"+mux_id)
        if os.path.exists(fastq_data_dir):
            for rows in rest_data['lanes']:
                if mux_id in rows['libraryId']:
                    if "MUX" in rows['libraryId']:
                        for child in rows['Children']:
                            if child['Analysis'] != "Sequence only":
                                ctime, _ = generate_window(1)
                                sample_dict = {}
                                sample = child['libraryId']
                                sample_dict['requestor'] = rows['requestor']
                                sample_dict['ctime'] = ctime
                                sample_dict['pipeline_name'] = child['Analysis']
                                if 'pipeline_version' in rows:
                                    sample_dict['pipeline_version'] = child['pipeline_version']
                                else:
                                    sample_dict['pipeline_version'] = None
                                sample_dict['pipeline_params'] = 'params'
                                sample_dict['site'] = get_site()
                                out_dir = get_downstream_outdir(sample_dict['requestor'], \
                                    sample_dict['pipeline_version'], sample_dict['pipeline_name'])
                                sample_dict['out_dir'] = out_dir
                                readunits_dict = {}
                                status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\
                                    rows['laneId'])
                                if status:
                                    ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\
                                        rows['laneId'], None, fq1, fq2)
                                    k = key_for_read_unit(ru)
                                    readunits_dict[k] = dict(ru._asdict())
                                    sample_dict['readunits'] = readunits_dict
                                    if sample_info.get(sample, {}).get('readunits'):
                                        sample_info[sample]['readunits'].update(readunits_dict)
                                    else:
                                        sample_info[sample] = sample_dict
                    else:
                        if rows['Analysis'] != "Sequence only":
                            sample = rows['libraryId']
                            status, fq1, fq2 = check_fastq(fastq_data_dir, rows['libraryId'], \
                                rows['laneId'])
                            if status:
                                ctime, _ = generate_window(1)
                                sample_dict = {}
                                readunits_dict = {}
                                ru = ReadUnit(run_num_flowcell, flowcellid, rows['libraryId'], \
                                    rows['laneId'], None, fq1, fq2)
                                k = key_for_read_unit(ru)
                                readunits_dict[k] = dict(ru._asdict())
                                sample_dict['readunits'] = readunits_dict
                                sample_info[sample] = sample_dict
    return sample_info
def main():
    """
    The main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--force-overwrite",
                        action="store_true",
                        help="Force overwriting of output files")
    parser.add_argument("-r", "--rundir",
                        dest="rundir",
                        required=True,
                        help="rundir, e.g. /mnt/seq/userrig/HS004/HS004-PE-R00139_BC6A7HANXX")
    parser.add_argument('-t', "--test_server", action='store_true')
    parser.add_argument("-o", "--outdir",
                        required=True,
                        dest="outdir",
                        help="Output directory")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    (rundir, outdir, runinfo) = getdirs(args)
    samplesheet_csv = os.path.join(outdir, SAMPLESHEET_CSV)
    usebases_cfg = os.path.join(outdir, USEBASES_CFG)
    muxinfo_cfg = os.path.join(outdir, MUXINFO_CFG)
    for f in [samplesheet_csv, usebases_cfg, muxinfo_cfg]:
        if not args.force_overwrite and os.path.exists(f):
            logger.fatal("Refusing to overwrite existing file %s", f)
            sys.exit(1)

    _, run_num, flowcellid = get_machine_run_flowcell_id(rundir)
    logger.info("Querying ELM for %s", run_num)

    if args.test_server:
        rest_url = rest_services['run_details']['testing'].replace("run_num", run_num)
        logger.info("development server")
    else:
        rest_url = rest_services['run_details']['production'].replace("run_num", run_num)
        logger.info("production server")
    response = requests.get(rest_url)
    if response.status_code != requests.codes.ok:
        response.raise_for_status()
    rest_data = response.json()
    logger.debug("rest_data from {}: {}".format(rest_url, rest_data))
    run_id = rest_data['runId']
    #counter = 0
    if rest_data['runPass'] != 'Pass':
        logger.warning("Skipping non-passed run")
        # NOTE: exit 0 and missing output files is the upstream signal for a failed run
        sys.exit(0)

    # this is the master samplesheet
    logger.info("Writing to %s", samplesheet_csv)
    # keys: lanes, values are barcode lens in lane (always two tuples, -1 if not present)
    barcode_lens = {}
    mux_units = dict()

    with open(samplesheet_csv, 'w') as fh_out:
        fh_out.write(SAMPLESHEET_HEADER + '\n')
        for rows in rest_data['lanes']:
            if rows['lanePass'] != 'Pass':
                continue
            BCL_Mismatch = []
            if 'requestor' in rows:
                requestor = rows['requestor']
            else:
                requestor = None
            if "MUX" in rows['libraryId']:
                # multiplexed
                #counter = 0
                for child in rows['Children']:
                    #counter += 1
                    #id = 'S' + str(counter)
                    if 'BCL_Mismatch' in child:
                        BCL_Mismatch.append(child['BCL_Mismatch'])
                        # older samples have no values and that's okay

                    if "-" in child['barcode']:
                        # dual index
                        index = child['barcode'].split('-')
                        sample = rows['laneId']+',Sample_'+child['libraryId']+','+ \
                            child['libraryId']+'-'+child['barcode']+',,,,'+ index[0] +',,'+ \
                            index[1] + ',' +'Project_'+rows['libraryId']+','+child['libtech']
                        index_lens = (len((index[0])), len((index[1])))
                    else:
                        sample = rows['laneId']+',Sample_'+child['libraryId']+','+ \
                            child['libraryId']+'-'+child['barcode']+',,,,'+child['barcode']+',,,'\
                            +'Project_'+rows['libraryId']+','+child['libtech']
                        index_lens = (len(child['barcode']), -1)

                    barcode_lens.setdefault(rows['laneId'], []).append(index_lens)
                    fh_out.write(sample+ '\n')

            else:# non-multiplexed
                sample = rows['laneId']+',Sample_'+rows['libraryId']+','+rows['libraryId']+ \
                    '-NoIndex'+',,,,,,,'+'Project_'+rows['libraryId']+','+rows['libtech']
                index_lens = (-1, -1)
                barcode_lens.setdefault(rows['laneId'], []).append(index_lens)
                fh_out.write(sample + '\n')

            #Barcode mismatch has to be the same for all the libraries in one MUX.
            #Otherwise default mismatch value to be used
            if len(set(BCL_Mismatch)) == 1:
                barcode_mismatches = BCL_Mismatch[0]
            else:
                barcode_mismatches = DEFAULT_BARCODE_MISMATCHES
            mu = MuxUnit._make([run_id, flowcellid, rows['libraryId'], [rows['laneId']], \
                'Project_'+ rows['libraryId'], barcode_mismatches, requestor])
            # merge lane into existing mux if needed
            if mu.mux_id in mux_units:
                mu_orig = mux_units[mu.mux_id]
                assert mu.barcode_mismatches == mu_orig.barcode_mismatches
                assert len(mu.lane_ids) == 1# is a list by design but just one element.
                #otherwise below fails
                lane_ids = mu_orig.lane_ids.extend(mu.lane_ids)
                mu_orig = mu_orig._replace(lane_ids=lane_ids)
            else:
                mux_units[mu.mux_id] = mu

    logger.info("Writing to %s", usebases_cfg)
    usebases = generate_usebases(barcode_lens, runinfo)
    with open(usebases_cfg, 'w') as fh:
        fh.write(yaml.dump(dict(usebases=usebases), default_flow_style=True))

    logger.info("Writing to %s", muxinfo_cfg)
    with open(muxinfo_cfg, 'w') as fh:
        fh.write(yaml.dump([dict(mu._asdict()) for mu in mux_units.values()], \
            default_flow_style=True))
def main():
    """
    The main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--force-overwrite",
                        action="store_true",
                        help="Force overwriting of output files")
    parser.add_argument(
        "-r",
        "--rundir",
        dest="rundir",
        required=True,
        help="rundir, e.g. /mnt/seq/userrig/HS004/HS004-PE-R00139_BC6A7HANXX")
    parser.add_argument('-t', "--test-server", action='store_true')
    parser.add_argument("-o",
                        "--outdir",
                        required=True,
                        dest="outdir",
                        help="Output directory")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    (rundir, outdir, runinfo) = getdirs(args)
    samplesheet_csv = os.path.join(outdir, SAMPLESHEET_CSV)
    muxinfo_cfg = os.path.join(outdir, MUXINFO_CFG)
    for f in [samplesheet_csv, muxinfo_cfg]:
        if not args.force_overwrite and os.path.exists(f):
            logger.fatal("Refusing to overwrite existing file %s", f)
            sys.exit(1)
    _, run_num, flowcellid = get_machine_run_flowcell_id(rundir)
    logger.info("Querying ELM for %s", run_num)
    rest_data = get_rest_data(run_num, args.test_server)
    status_cfg = os.path.join(outdir, STATUS_CFG)
    assert rest_data['runId'], (
        "Rest data from ELM does not have runId {}".format(run_num))
    if rest_data['runPass'] != 'Pass':
        logger.warning("Skipping non-passed run")
        with open(status_cfg, 'w') as fh_out:
            fh_out.write("SEQRUNFAILED")
        sys.exit(0)
    status = generate_samplesheet(rest_data, flowcellid, outdir, runinfo)
    if not status:
        with open(status_cfg, 'w') as fh_out:
            fh_out.write("NON-BCL")
Exemple #13
0
def main():
    """
    The main function
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--force-overwrite",
                        action="store_true",
                        help="Force overwriting of output files")
    parser.add_argument(
        "-r",
        "--rundir",
        dest="rundir",
        required=True,
        help="rundir, e.g. /mnt/seq/userrig/HS004/HS004-PE-R00139_BC6A7HANXX")
    parser.add_argument('-t', "--test-server", action='store_true')
    parser.add_argument("-o",
                        "--outdir",
                        required=True,
                        dest="outdir",
                        help="Output directory")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    (rundir, outdir, runinfo) = getdirs(args)
    samplesheet_csv = os.path.join(outdir, SAMPLESHEET_CSV)
    usebases_cfg = os.path.join(outdir, USEBASES_CFG)
    muxinfo_cfg = os.path.join(outdir, MUXINFO_CFG)
    for f in [samplesheet_csv, usebases_cfg, muxinfo_cfg]:
        if not args.force_overwrite and os.path.exists(f):
            logger.fatal("Refusing to overwrite existing file %s", f)
            sys.exit(1)

    _, run_num, flowcellid = get_machine_run_flowcell_id(rundir)
    logger.info("Querying ELM for %s", run_num)

    rest_data = get_rest_data(run_num, args.test_server)
    assert rest_data['runId'], (
        "Rest data from ELM does not have runId {}".format(run_num))

    run_id = rest_data['runId']
    #counter = 0
    if rest_data['runPass'] != 'Pass':
        logger.warning("Skipping non-passed run")
        # NOTE: exit 0 and missing output files is the upstream signal for a failed run
        sys.exit(0)

    # this is the master samplesheet
    logger.info("Writing to %s", samplesheet_csv)
    # keys: lanes, values are barcode lens in lane (always two tuples, -1 if not present)
    barcode_lens = {}
    mux_units = dict()

    with open(samplesheet_csv, 'w') as fh_out:
        fh_out.write(SAMPLESHEET_HEADER + '\n')
        for rows in rest_data['lanes']:
            if rows['lanePass'] != 'Pass':
                continue
            BCL_Mismatch = []
            if 'requestor' in rows:
                requestor = rows['requestor']
            else:
                requestor = None
            if "MUX" in rows['libraryId']:
                # multiplexed
                #counter = 0
                for child in rows['Children']:
                    #counter += 1
                    #id = 'S' + str(counter)
                    if 'BCL_Mismatch' in child:
                        BCL_Mismatch.append(child['BCL_Mismatch'])
                        # older samples have no values and that's okay

                    if "-" in child['barcode']:
                        # dual index
                        index = child['barcode'].split('-')
                        sample = rows['laneId']+',Sample_'+child['libraryId']+','+ \
                            child['libraryId']+'-'+child['barcode']+',,,,'+ index[0] +',,'+ \
                            index[1] + ',' +'Project_'+rows['libraryId']+','+child['libtech']
                        index_lens = (len((index[0])), len((index[1])))
                    else:
                        sample = rows['laneId']+',Sample_'+child['libraryId']+','+ \
                            child['libraryId']+'-'+child['barcode']+',,,,'+child['barcode']+',,,'\
                            +'Project_'+rows['libraryId']+','+child['libtech']
                        index_lens = (len(child['barcode']), -1)

                    barcode_lens.setdefault(rows['laneId'],
                                            []).append(index_lens)
                    fh_out.write(sample + '\n')

            else:  # non-multiplexed
                sample = rows['laneId']+',Sample_'+rows['libraryId']+','+rows['libraryId']+ \
                    '-NoIndex'+',,,,,,,'+'Project_'+rows['libraryId']+','+rows['libtech']
                index_lens = (-1, -1)
                barcode_lens.setdefault(rows['laneId'], []).append(index_lens)
                fh_out.write(sample + '\n')

            #Barcode mismatch has to be the same for all the libraries in one MUX.
            #Otherwise default mismatch value to be used
            if len(set(BCL_Mismatch)) == 1:
                barcode_mismatches = BCL_Mismatch[0]
            else:
                barcode_mismatches = DEFAULT_BARCODE_MISMATCHES
            mu = MuxUnit._make([run_id, flowcellid, rows['libraryId'], [rows['laneId']], \
                'Project_'+ rows['libraryId'], barcode_mismatches, requestor])
            # merge lane into existing mux if needed
            if mu.mux_id in mux_units:
                mu_orig = mux_units[mu.mux_id]
                assert mu.barcode_mismatches == mu_orig.barcode_mismatches
                assert len(mu.lane_ids
                           ) == 1  # is a list by design but just one element.
                #otherwise below fails
                lane_ids = mu_orig.lane_ids.extend(mu.lane_ids)
                mu_orig = mu_orig._replace(lane_ids=lane_ids)
            else:
                mux_units[mu.mux_id] = mu

    logger.info("Writing to %s", usebases_cfg)
    usebases = generate_usebases(barcode_lens, runinfo)
    with open(usebases_cfg, 'w') as fh:
        fh.write(yaml.dump(dict(usebases=usebases), default_flow_style=True))

    logger.info("Writing to %s", muxinfo_cfg)
    with open(muxinfo_cfg, 'w') as fh:
        fh.write(yaml.dump([dict(mu._asdict()) for mu in mux_units.values()], \
            default_flow_style=True))
Exemple #14
0
def main():
    """main function
    """

    # FIXME ugly and code duplication in bcl2fastq_dbupdate.py
    mongo_status_script = os.path.abspath(
        os.path.join(os.path.dirname(sys.argv[0]), "mongo_status.py"))
    assert os.path.exists(mongo_status_script)

    default_parser = default_argparser(CFG_DIR,
                                       allow_missing_cfgfile=True,
                                       allow_missing_outdir=True,
                                       default_db_logging=True)
    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                     parents=[default_parser])
    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument('-r',
                        "--runid",
                        help="Run ID plus flowcell ID (clashes with -d)")
    parser.add_argument(
        '-d',
        "--rundir",
        help=
        "BCL input directory (clashes with -r; you also probably want to disable logging)"
    )
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test server")
    parser.add_argument('--no-archive',
                        action='store_true',
                        help="Don't archieve this analysis")
    parser.add_argument(
        '-l',
        '--lanes',
        type=int,
        nargs="*",
        help="Limit run to given lane/s (multiples separated by space")
    parser.add_argument(
        '-i',
        '--mismatches',
        type=int,
        help="Max. number of allowed barcode mismatches (0>=x<=2)"
        " setting a value here overrides the default settings read from ELM)")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if args.mismatches is not None:
        if args.mismatches > 2 or args.mismatches < 0:
            logger.fatal("Number of mismatches must be between 0-2")
            sys.exit(1)

    lane_info = ''
    lane_nos = []
    if args.lanes:
        lane_info = '--tiles '
        for lane in args.lanes:
            if lane > 8 or lane < 1:
                logger.fatal("Lane number must be between 1-8")
                sys.exit(1)
            else:
                lane_info += 's_{}'.format(lane) + ','
        lane_info = lane_info.rstrip()
        lane_info = lane_info[:-1]
        lane_nos = list(args.lanes)

    if args.runid and args.rundir:
        logger.fatal(
            "Cannot use run-id and input directory arguments simultaneously")
        sys.exit(1)
    elif args.runid:
        rundir = run_folder_for_run_id(args.runid)
    elif args.rundir:
        rundir = os.path.abspath(args.rundir)
    else:
        logger.fatal("Need either run-id or input directory")
        sys.exit(1)
    if not os.path.exists(rundir):
        logger.fatal("Expected run directory %s does not exist", rundir)
    logger.info("Rundir is %s", rundir)

    if not args.outdir:
        outdir = get_bcl2fastq_outdir(args.runid)
        args.outdir = outdir
    else:
        outdir = args.outdir
    if os.path.exists(outdir):
        logger.fatal("Output directory %s already exists", outdir)
        sys.exit(1)
    # create now so that generate_bcl2fastq_cfg.py can run
    os.makedirs(outdir)

    # catch cases where rundir was user provided and looks weird
    try:
        _, runid, flowcellid = get_machine_run_flowcell_id(rundir)
        run_num = runid + "_" + flowcellid
    except:
        run_num = "UNKNOWN-" + rundir.split("/")[-1]

    # call generate_bcl2fastq_cfg
    #
    # FIXME ugly assumes same directory (just like import above). better to import and run main()?
    generate_bcl2fastq = os.path.join(os.path.dirname(sys.argv[0]),
                                      "generate_bcl2fastq_cfg.py")
    assert os.path.exists(generate_bcl2fastq)
    cmd = [generate_bcl2fastq, '-r', rundir, '-o', outdir]
    if args.testing:
        cmd.append("-t")
    logger.debug("Executing %s", ' '.join(cmd))
    try:
        res = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        logger.fatal("The following command failed with return code %s: %s",
                     e.returncode, ' '.join(cmd))
        logger.fatal("Output: %s", e.output.decode())
        logger.fatal("Exiting")
        os.rmdir(outdir)
        sys.exit(1)
    # generate_bcl2fastq is normally quiet. if there's output, make caller aware of it
    # use sys instead of logger to avoid double logging
    if res:
        sys.stderr.write(res.decode())

    # just created files
    muxinfo_cfg = os.path.join(outdir, MUXINFO_CFG)
    status_cfg = os.path.join(outdir, STATUS_CFG)

    # NOTE: signal for failed runs is exit 0 from generate_bcl2fastq and missing output files
    #
    if any([not os.path.exists(x) for x in [muxinfo_cfg]]):
        # one missing means all should be missing
        assert all([not os.path.exists(x) for x in [muxinfo_cfg]])
        #Check status as seqrunfailed or non-bcl run
        with open(status_cfg, 'r') as fh:
            status = fh.read().strip()
        update_run_status(mongo_status_script, run_num, outdir, status,
                          args.testing)
        sys.exit(0)

    # turn arguments into cfg_dict that gets merged into pipeline config
    cfg_dict = {
        'rundir': rundir,
        'lanes_arg': lane_info,
        'no_archive': args.no_archive,
        'run_num': run_num
    }

    mux_units = get_mux_units_from_cfgfile(muxinfo_cfg, lane_nos)
    if args.mismatches is not None:
        mux_units = [
            mu._replace(barcode_mismatches=args.mismatches) for mu in mux_units
        ]
    os.unlink(muxinfo_cfg)

    cfg_dict['units'] = dict()
    for mu in mux_units:
        # special case: mux split across multiple lanes. make lanes a list
        # and add in extra lanes if needed.
        k = mu.mux_dir
        mu_dict = dict(mu._asdict())
        cfg_dict['units'][k] = mu_dict

    # create mongodb update command, used later, after submission
    mongo_update_cmd = "{} -r {} -s STARTED".format(mongo_status_script,
                                                    cfg_dict['run_num'])
    mongo_update_cmd += " -a $ANALYSIS_ID -o {}".format(
        outdir)  # set in run.sh
    if args.testing:
        mongo_update_cmd += " -t"

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        logger_cmd=mongo_update_cmd,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))

    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
Exemple #15
0
def main():
    """main function
    """

    # FIXME ugly and code duplication in bcl2fastq_dbupdate.py
    mongo_status_script = os.path.abspath(os.path.join(
        os.path.dirname(sys.argv[0]), "mongo_status.py"))
    assert os.path.exists(mongo_status_script)

    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()))
    parser.add_argument('-r', "--runid",
                        help="Run ID plus flowcell ID (clashes with -d)")
    parser.add_argument('-d', "--rundir",
                        help="BCL input directory (clashes with -r)")
    parser.add_argument('-o', "--outdir",
                        help="Output directory (must not exist; required if called by user)")
    parser.add_argument('-t', "--testing", action='store_true',
                        help="Use MongoDB test server")
    parser.add_argument('--no-archive', action='store_true',
                        help="Don't archieve this analysis")
    parser.add_argument('--no-mail', action='store_true',
                        help="Don't send mail on completion")
    site = get_site()
    default = DEFAULT_SLAVE_Q.get(site, None)
    parser.add_argument('-w', '--slave-q', default=default,
                        help="Queue to use for slave jobs (default: {})".format(default))
    default = DEFAULT_MASTER_Q.get(site, None)
    parser.add_argument('-m', '--master-q', default=default,
                        help="Queue to use for master job (default: {})".format(default))
    parser.add_argument('-l', '--lanes', type=int, nargs="*",
                        help="Limit run to given lane/s (multiples separated by space")
    parser.add_argument('-i', '--mismatches', type=int,
                        help="Max. number of allowed barcode mismatches (0>=x<=2)"
                        " setting a value here overrides the default settings read from ELM)")
    parser.add_argument('-n', '--no-run', action='store_true')
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")


    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    aux_logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if args.mismatches is not None:
        if args.mismatches > 2 or args.mismatches < 0:
            logger.fatal("Number of mismatches must be between 0-2")
            sys.exit(1)

    lane_info = ''
    lane_nos = []
    if args.lanes:
        lane_info = '--tiles '
        for lane in args.lanes:
            if lane > 8 or lane < 1:
                logger.fatal("Lane number must be between 1-8")
                sys.exit(1)
            else:
                lane_info += 's_{}'.format(lane)+','
        lane_info = lane_info.rstrip()
        lane_info = lane_info[:-1]
        lane_nos = list(args.lanes)


    if args.runid and args.rundir:
        logger.fatal("Cannot use run-id and input directory arguments simultaneously")
        sys.exit(1)
    elif args.runid:
        rundir = run_folder_for_run_id(args.runid)
    elif args.rundir:
        rundir = os.path.abspath(args.rundir)
    else:
        logger.fatal("Need either run-id or input directory")
        sys.exit(1)
    if not os.path.exists(rundir):
        logger.fatal("Expected run directory {} does not exist".format(rundir))
    logger.info("Rundir is {}".format(rundir))

    if not args.outdir:
        outdir = get_bcl2fastq_outdir(args.runid)
    else:
        outdir = args.outdir
    if os.path.exists(outdir):
        logger.fatal("Output directory %s already exists", outdir)
        sys.exit(1)
    # create now so that generate_bcl2fastq_cfg.py can run
    os.makedirs(outdir)
    


    # catch cases where rundir was user provided and looks weird
    try:
        _, runid, flowcellid = get_machine_run_flowcell_id(rundir)
        run_num = runid + "_" + flowcellid
    except:
        run_num = "UNKNOWN-" + rundir.split("/")[-1]


    # call generate_bcl2fastq_cfg
    #
    # FIXME ugly assumes same directory (just like import above). better to import and run main()?
    generate_bcl2fastq = os.path.join(
        os.path.dirname(sys.argv[0]), "generate_bcl2fastq_cfg.py")
    assert os.path.exists(generate_bcl2fastq)
    cmd = [generate_bcl2fastq, '-r', rundir, '-o', outdir]
    if args.testing:
        cmd.append("-t")
    logger.debug("Executing {}".format(' ' .join(cmd)))
    try:
        res = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        logger.fatal("The following command failed with return code {}: {}".format(
            e.returncode, ' '.join(cmd)))
        logger.fatal("Output: {}".format(e.output.decode()))
        logger.fatal("Exiting")
        sys.exit(1)
    # generate_bcl2fastq is normally quiet. if there's output, make caller aware of it
    # use sys instead of logger to avoid double logging
    if res:
        sys.stderr.write(res.decode())

    # just created files
    muxinfo_cfg = os.path.join(outdir, MUXINFO_CFG)
    samplesheet_csv = os.path.join(outdir, SAMPLESHEET_CSV)
    usebases_cfg = os.path.join(outdir, USEBASES_CFG)

    # NOTE: signal for failed runs is exit 0 from generate_bcl2fastq and missing output files
    #
    if any([not os.path.exists(x) for x in [muxinfo_cfg, samplesheet_csv, usebases_cfg]]):
        # one missing means all should be missing
        assert all([not os.path.exists(x) for x in [muxinfo_cfg, samplesheet_csv, usebases_cfg]])
        seqrunfailed(mongo_status_script, run_num, outdir, args.testing)
        sys.exit(0)


    # turn arguments into user_data that gets merged into pipeline config
    user_data = {'rundir': rundir,
                 'lanes_arg': lane_info,
                 'samplesheet_csv': samplesheet_csv,
                 'no_archive': args.no_archive,
                 'mail_on_completion': not args.no_mail,
                 'run_num': run_num}


    usebases_arg = ''
    with open(usebases_cfg, 'r') as stream:
        try:
            d = yaml.load(stream)
            assert 'usebases' in d
            assert len(d) == 1# make sure usebases is only key
            for ub in d['usebases']:
                #print (ub)
                usebases_arg += '--use-bases-mask {} '.format(ub)
            #user_data = {'usebases_arg' : usebases_arg}
        except yaml.YAMLError as exc:
            logger.fatal(exc)
            raise
    user_data['usebases_arg'] = usebases_arg
    os.unlink(usebases_cfg)


    mux_units = get_mux_units_from_cfgfile(muxinfo_cfg, lane_nos)
    if args.mismatches is not None:
        mux_units = [mu._replace(barcode_mismatches=args.mismatches)
                     for mu in mux_units]
    os.unlink(muxinfo_cfg)


    user_data['units'] = dict()
    for mu in mux_units:
        # special case: mux split across multiple lanes. make lanes a list
        # and add in extra lanes if needed.
        k = mu.mux_dir
        mu_dict = dict(mu._asdict())
        user_data['units'][k] = mu_dict

    # create mongodb update command, used later, after queueing
    mongo_update_cmd = "{} -r {} -s STARTED".format(mongo_status_script, user_data['run_num'])
    mongo_update_cmd += " -a $ANALYSIS_ID -o {}".format(outdir)# set in run.sh
    if args.testing:
        mongo_update_cmd += " -t"

    # NOTE: bcl2fastq has a special run template, so we need to
    # interfer with the default pipeline_handler.  plenty of
    # opportunity to shoot yourself in the foot

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME, PIPELINE_BASEDIR, outdir, user_data,
        site=site, master_q=args.master_q, slave_q=args.slave_q)
    # use local run template
    pipeline_handler.run_template = os.path.join(
        PIPELINE_BASEDIR, "run.template.{}.sh".format(pipeline_handler.site))
    assert os.path.exists(pipeline_handler.run_template)
    pipeline_handler.setup_env()
    # final mongo update line in run_out
    tmp_run_out = pipeline_handler.run_out + ".tmp"
    with open(pipeline_handler.run_out) as fh_in, \
         open(tmp_run_out, 'w') as fh_out:
        for line in fh_in:
            line = line.replace("@MONGO_UPDATE_CMD@", mongo_update_cmd)
            fh_out.write(line)
    shutil.move(tmp_run_out, pipeline_handler.run_out)
    pipeline_handler.submit(args.no_run)
Exemple #16
0
def main():
    """main function"""

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-o", "--out_dir", required=True, help="out_dir")
    parser.add_argument("-m", "--mux_id", required=True, help="mux_id")
    parser.add_argument('-t',
                        "--test-server",
                        action='store_true',
                        help="Use test-server for stats uploading")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if not os.path.exists(args.out_dir):
        logger.fatal("out_dir %s does not exist", args.out_dir)
        sys.exit(1)
    logger.info("out_dir is %s", args.out_dir)

    confinfo = os.path.join(args.out_dir + '/conf.yaml')
    if not os.path.exists(confinfo):
        logger.fatal("conf info '%s' does not exist under Run directory.\n",
                     confinfo)
        sys.exit(1)
    if args.test_server:
        rest_url = rest_services['stats_upload']['testing']
        logger.info("send status to development server")
    else:
        rest_url = rest_services['stats_upload']['production']
        logger.info("send status to production server")

    with open(confinfo) as fh_cfg:
        yaml_data = yaml.safe_load(fh_cfg)
        assert "run_num" in yaml_data
        runid_with_flowcellid = yaml_data["run_num"]
        _, runid, _ = get_machine_run_flowcell_id(runid_with_flowcellid)
        assert "modules" in yaml_data
        soft_ver = yaml_data["modules"].get('bcl2fastq')
        if not soft_ver:
            logger.fatal("bclpath software version %s does not exist",
                         soft_ver)
            sys.exit(1)
        assert "units" in yaml_data
        if not "Project_" + args.mux_id in yaml_data["units"]:
            logger.fatal("mux_id %s does not exist in conf.yaml under %s",
                         args.mux_id, args.out_dir)
            sys.exit(1)

        for k, v in yaml_data["units"].items():
            if k == "Project_{}".format(args.mux_id):
                data = {}
                mux_dir = v.get('mux_dir')
                index_html_path = glob.glob(
                    os.path.join(args.out_dir, "out", mux_dir,
                                 "html/*/all/all/all/lane.html"))
                index_html = index_html_path[0]
                # FIXME should use the snakemake trigger to decide if complete
                if os.path.exists(index_html):
                    logger.info("Uploading stats for completed bcl2fastq %s",
                                mux_dir)
                    data['path'] = index_html
                    data['software'] = soft_ver
                    data['runid'] = runid
                    test_json = json.dumps(data)
                    data_json = test_json.replace("\\", "")
                    headers = {'content-type': 'application/json'}
                    response = requests.post(rest_url,
                                             data=data_json,
                                             headers=headers)
                    # Response Code is 201 for STATs posting
                    if response.status_code == 201:
                        logger.info("Uploading %s completed successfully",
                                    index_html)
                        logger.info("JSON request was %s", data_json)
                        logger.info("Response was %s", response.status_code)
                    else:
                        logger.error("Uploading %s failed", index_html)
                        sys.exit(1)
                else:
                    logger.info(
                        "Skipping incomplete (html missing) bcl2fastq in %s",
                        mux_dir)
def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1',
                        "--break-after-first",
                        action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n',
                        "--dry-run",
                        action='store_true',
                        help="Don't run anything")
    parser.add_argument('-t',
                        "--testing",
                        action='store_true',
                        help="Use MongoDB test-server")
    default = 14
    parser.add_argument(
        '-w',
        '--win',
        type=int,
        default=default,
        help="Number of days to look back (default {})".format(default))
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    #Check if pipeline scripts are available
    assert os.path.exists(BWA)
    assert os.path.exists(RNA)
    connection = mongodb_conn(args.testing)
    if connection is None:
        sys.exit(1)
    db = connection.gisds.runcomplete
    epoch_present, epoch_back = generate_window(args.win)
    num_triggers = 0
    results = db.find({
        "analysis.Status": "SUCCESS",
        "timestamp": {
            "$gt": epoch_back,
            "$lt": epoch_present
        }
    })
    logger.info("Found %s runs", results.count())
    for record in results:
        run_number = record['run']
        analysis = record['analysis']
        # Downstream analysis will not be intiated for Novogene (NG00*) runs
        if "NG00" in run_number:
            continue
        for analysis in record['analysis']:
            out_dir = analysis.get("out_dir")

            #Check if bcl2Fastq is completed successfully
            if 'Status' in analysis and analysis.get("Status") == "SUCCESS":
                if not os.path.exists(out_dir):
                    logger.critical(
                        "Following directory listed in DB doesn't exist: %s",
                        out_dir)
                    continue

                #Check if downstream analysis has been started
                if not os.path.exists(
                        os.path.join(out_dir,
                                     "config_casava-1.8.2.txt".format())):
                    logger.info("Start the downstream analysis at %s", out_dir)
                    os.makedirs(os.path.join(out_dir, LOG_DIR_REL),
                                exist_ok=True)
                    #generate config file
                    config_cmd = [CONFIG, '-r', run_number]
                    try:
                        f = open(
                            os.path.join(out_dir,
                                         "config_casava-1.8.2.txt".format()),
                            "w")
                        _ = subprocess.call(config_cmd,
                                            stderr=subprocess.STDOUT,
                                            stdout=f)
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(config_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #generic sample sheet
                    samplesheet_cmd = 'cd {} && {} -r {}'.format(
                        out_dir, SAMPLESHEET, run_number)
                    try:
                        _ = subprocess.check_output(samplesheet_cmd,
                                                    shell=True)
                    except subprocess.CalledProcessError as e:
                        logger.fatal(
                            "The following command failed with return code %s: %s",
                            e.returncode, ' '.join(samplesheet_cmd))
                        logger.fatal("Output: %s", e.output.decode())
                        logger.fatal("Exiting")
                        sys.exit(1)
                    #Generate and Submit BWA and RNAseq mapping pipeline
                    _, runid, _ = get_machine_run_flowcell_id(run_number)
                    generic_samplesheet = (os.path.join(
                        out_dir, runid + "_SampleSheet.csv"))
                    if os.path.exists(
                            os.path.join(out_dir, generic_samplesheet)):
                        dirs = os.path.join(out_dir, "out")
                        cmd = "cd {} && {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \
                            .format(dirs, BWA, run_number, out_dir, os.path.join(out_dir, \
                                generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG))
                        cmd += "&& {} -r {} -f {} -s {} -j 0 -p Production -c 5 >> {}" \
                            .format(RNA, run_number, out_dir, os.path.join(out_dir, \
                                generic_samplesheet), os.path.join(out_dir, SUBMISSIONLOG))
                        if args.dry_run:
                            logger.warning("Skipped following run: %s", cmd)
                            #Remove config txt
                            os.remove(
                                os.path.join(
                                    out_dir,
                                    "config_casava-1.8.2.txt".format()))
                        else:
                            try:
                                #ananlysisReport into submission log
                                with open(os.path.join(out_dir, SUBMISSIONLOG),
                                          'w') as fh:
                                    fh.write(cmd)
                                _ = subprocess.check_output(cmd, shell=True)
                            except subprocess.CalledProcessError as e:
                                logger.fatal(
                                    "The following command failed with return code %s: %s",
                                    e.returncode, cmd)
                                logger.fatal("Output: %s", e.output.decode())
                                logger.fatal("Exiting")
                                #send_status_mail
                                send_status_mail(PIPELINE_NAME, False, analysis_id, \
                                    os.path.join(out_dir, LOG_DIR_REL, "mapping_submission.log"))
                                sys.exit(1)
                            num_triggers += 1

                        if args.break_after_first:
                            logger.info("Stopping after first sequencing run")
                            sys.exit(0)
                    else:
                        #send_status_mail
                        logger.info("samplesheet.csv missing for %s under %s",
                                    run_number, out_dir)
                        send_status_mail(PIPELINE_NAME, False, analysis_id, \
                            os.path.abspath(out_dir))
            elif analysis.get("Status") == "FAILED":
                logger.debug("BCL2FASTQ FAILED for %s under %s", run_number,
                             out_dir)
    # close the connection to MongoDB
    connection.close()
    logger.info("%s dirs with triggers", num_triggers)