Example #1
0
    def __init__(
            self,
            script_name,  # used as logging prefix. can be dummy
            pipeline_name,
            pipeline_version,
            submitter,
            site,
            instance_id,
            log_path,  # main logging file
            elm_units):
        """FIXME:add-doc"""
        assert isinstance(elm_units, list)
        #... need to fix this issue
        PRD_ELMLOGDIR = "/home/douj/project/elm_logs"
        #elmlogdir = os.getenv('RPD_ELMLOGDIR')
        elmlogdir = "/home/douj/project/elm_logs"
        assert elmlogdir, ("RPD_ELMLOGDIR undefined")

        pipelogdir = os.path.join(elmlogdir, pipeline_name)
        assert os.path.exists(pipelogdir), (
            "pipeline log dir {} doesn't exist".format(pipelogdir))

        # timestamp just a way to make it unique
        logfile = os.path.join(pipelogdir, generate_timestamp() + ".log")
        assert not os.path.exists(logfile)
        self.logfile = logfile

        # only used as logging prefix (not even parsed by ELM)
        self.script_name = script_name

        # json-like values
        #self.fields = OrderedDict()
        self.fields = dict()
        # caller provided
        self.fields['pipeline_name'] = pipeline_name
        self.fields['pipeline_version'] = pipeline_version
        self.fields['site'] = site
        self.fields['instance_id'] = instance_id
        self.fields['submitter'] = submitter
        self.fields['log_path'] = log_path
        # internally computed
        self.fields['status_id'] = None

        self.elm_units = elm_units
Example #2
0
    def __init__(self,
                 script_name,# used as logging prefix. can be dummy
                 pipeline_name,
                 pipeline_version,
                 submitter,
                 site,
                 instance_id,
                 log_path,# main logging file
                 elm_units):
        """FIXME:add-doc"""

        assert isinstance(elm_units, list)

        elmlogdir = os.getenv('RPD_ELMLOGDIR')
        assert elmlogdir, ("RPD_ELMLOGDIR undefined")

        pipelogdir = os.path.join(elmlogdir, pipeline_name)
        assert os.path.exists(pipelogdir), (
            "pipeline log dir {} doesn't exist".format(pipelogdir))

        # timestamp just a way to make it unique
        logfile = os.path.join(pipelogdir, generate_timestamp() + ".log")
        assert not os.path.exists(logfile)
        self.logfile = logfile

        # only used as logging prefix (not even parsed by ELM)
        self.script_name = script_name

        # json-like values
        #self.fields = OrderedDict()
        self.fields = dict()
        # caller provided
        self.fields['pipeline_name'] = pipeline_name
        self.fields['pipeline_version'] = pipeline_version
        self.fields['site'] = site
        self.fields['instance_id'] = instance_id
        self.fields['submitter'] = submitter
        self.fields['log_path'] = log_path
        # internally computed
        self.fields['status_id'] = None

        self.elm_units = elm_units
Example #3
0
def get_downstream_outdir(requestor, pipeline_version, pipeline_name, site=None, basedir_map=OUTDIR_BASE, base_pipelinedir_map=PRODUCTION_PIPELINE_VERSION):
    """generate downstream output directory
    """
    if not site:
        site = get_site()
    if site not in basedir_map:
        raise ValueError(site)
    if site not in base_pipelinedir_map:
        raise ValueError(site)
    if is_devel_version():
        basedir = basedir_map[site]['devel']
        if not pipeline_version:
            pipeline_version = base_pipelinedir_map[site]['devel']
    else:
        basedir = basedir_map[site]['production']
        if not pipeline_version:
            pipeline_version = os.readlink(base_pipelinedir_map[site]['production'])
    outdir = "{basedir}/{requestor}/{pversion}/{pname}/{ts}".format(
        basedir=basedir, requestor=requestor, pversion=pipeline_version, pname=pipeline_name,
        ts=generate_timestamp())
    return outdir
Example #4
0
def get_bcl2fastq_outdir(runid_and_flowcellid, site=None, basedir_map=OUTDIR_BASE):
    """FIXME:add-doc
    """

    if not site:
        site = get_site()
    if site not in basedir_map:
        raise ValueError(site)

    if is_devel_version():
        basedir = basedir_map[site]['devel']
    else:
        basedir = basedir_map[site]['production']

    machineid, runid, flowcellid = get_machine_run_flowcell_id(
        runid_and_flowcellid)

    outdir = "{basedir}/{mid}/{rid}_{fid}/bcl2fastq_{ts}".format(
        basedir=basedir, mid=machineid, rid=runid, fid=flowcellid,
        ts=generate_timestamp())
    return outdir
Example #5
0
def seqrunfailed(mongo_status_script, run_num, outdir, testing):
    """FIXME:add-doc
    """
    logger.info("Setting analysis for {} to {}".format(run_num, "SEQRUNFAILED"))
    analysis_id = generate_timestamp()
    mongo_update_cmd = [mongo_status_script, "-r", run_num, "-s", "SEQRUNFAILED"]
    mongo_update_cmd.extend(["-a", analysis_id, "-o", outdir])
    if testing:
        mongo_update_cmd.append("-t")
    try:
        _ = subprocess.check_output(mongo_update_cmd, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        logger.fatal("The following command failed with return code {}: {}".format(
            e.returncode, ' '.join(mongo_update_cmd)))
        logger.fatal("Output: {}".format(e.output.decode()))
        logger.fatal("Exiting")
        sys.exit(1)

    flagfile = os.path.join(outdir, "SEQRUNFAILED")
    logger.info("Creating flag file {}".format(flagfile))
    with open(flagfile, 'w') as _:
        pass
Example #6
0
def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-r", "--DBid", help="DB Id", required=True)
    parser.add_argument(
        "-s", "--status", help="Analysis status", required=True, choices=["STARTED", "SUCCESS", "FAILED", "ORPHAN"]
    )
    parser.add_argument("-st", "--start-time", help="Start time", required=True)
    parser.add_argument("-o", "--out", help="Analysis output directory")
    parser.add_argument("-t", "--test_server", action="store_true")
    parser.add_argument("-n", "--dry-run", action="store_true", help="Dry run")
    parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
    parser.add_argument("-q", "--quiet", action="count", default=0, help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)
    _id = args.DBid
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.pipeline_runs
    logger.debug("DB %s", db)
    logger.info("Status for %s is %s", _id, args.status)
    if args.status in ["STARTED"]:
        try:
            if not args.dry_run:
                db.update(
                    {"_id": ObjectId(_id)}, {"$push": {"runs": {"start_time": args.start_time, "status": args.status}}}
                )
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    elif args.status in ["SUCCESS", "FAILED"]:
        end_time = generate_timestamp()
        logger.info("Setting timestamp to %s", end_time)
        try:
            if not args.dry_run:
                db.update(
                    {"_id": ObjectId(_id), "runs.start_time": args.start_time},
                    {"$set": {"runs.$": {"start_time": args.start_time, "end_time": end_time, "status": args.status}}},
                )
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)
    else:
        raise ValueError(args.status)

    # close the connection to MongoDB
    connection.close()
Example #7
0
def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-r', "--runid",
                        help="Run ID plus flowcell ID", required=True,)
    parser.add_argument('-s', "--status",
                        help="Analysis status", required=True,
                        choices=['STARTED', 'SUCCESS', 'FAILED', 'SEQRUNFAILED'])
    parser.add_argument('-a', "--analysis-id",
                        help="Analysis id", required=True)
    parser.add_argument('-o', "--out",
                        help="Analysis output directory")
    parser.add_argument('-t', "--test_server", action='store_true')
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    user_name = getpass.getuser()
    if user_name != "userrig":
        logger.warning("Not a production user. Skipping MongoDB update")
        sys.exit(0)

    run_number = args.runid
    connection = mongodb_conn(args.test_server)
    if connection is None:
        sys.exit(1)
    logger.info("Database connection established")
    db = connection.gisds.runcomplete
    logger.debug("DB %s", db)
    logger.info("Status for %s is %s", run_number, args.status)
    if args.status in ["STARTED", "SEQRUNFAILED"]:
        try:
            if not args.dry_run:
                db.update({"run": run_number},
                          {"$push":
                           {"analysis": {
                               "analysis_id" : args.analysis_id,
                               "user_name" : user_name,
                               "out_dir" : args.out,
                               "Status" :  args.status,
                           }}})

        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)

    elif args.status in ["SUCCESS", "FAILED"]:
        end_time = generate_timestamp()
        logger.info("Setting timestamp to %s", end_time)
        try:
            if not args.dry_run:
                db.update({"run": run_number, 'analysis.analysis_id' : args.analysis_id},
                          {"$set":
                           {"analysis.$": {
                               "analysis_id" : args.analysis_id,
                               "end_time" : end_time,
                               "user_name" : user_name,
                               "out_dir" : args.out,
                               "Status" :  args.status,
                           }}})
        except pymongo.errors.OperationFailure:
            logger.fatal("mongoDB OperationFailure")
            sys.exit(0)

    else:
        raise ValueError(args.status)

    # close the connection to MongoDB
    connection.close()
LOG_DIR_REL = "mappping_logs"
# Submission log relative to outdir
SUBMISSIONLOG = os.path.join(LOG_DIR_REL, "mapping_submission.log")
# same as folder name. also used for cluster job names
PIPELINE_NAME = "Mapping"
#CONFIG
CONFIG = "/home/userrig/Solexa/bcl2fastq2-v2.17/"
CONFIG += "generateBCL2FASTQ2.17config.sh"
#BWA mapping pipeline
BWA = "/home/userrig/pipelines/NewBwaMappingPipelineMem/"
BWA += "generateBwa0.7.5aconfigurationV217V2.sh"
#RNA mapping pipeline
RNA = "/home/userrig/pipelines/NewRNAseqTophatCufflinksPipeline/"
RNA += "generateTophatCufflinksconfigurationV217V2.sh"
#ANALYSIS_ID
analysis_id = generate_timestamp()
# global logger
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
    '[{asctime}] {levelname:8s} {filename} {message}', style='{'))
logger.addHandler(handler)



def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('-1', "--break-after-first", action='store_true',
                        help="Only process first run returned")
    parser.add_argument('-n', "--dry-run", action='store_true',