Example #1
0
        def put(self, run_id, action):
            """
            Delete / stop / start a run
            """
            parser = reqparse.RequestParser()
            parser.add_argument('user', type=str, default=None)
            parser.add_argument('force', action='store_true', default=False)
            args = parser.parse_args()
            arg_user = auth_get_username(request.authorization,
                                         args.get('user'))
            arg_force = args.get('force')
            errors = ''

            query = {'run_id': run_id}
            keys = {'work_dir': 1, 'output_dir': 1, 'status': 1, 'user': 1}
            pipeline = db.pipelines.find_one(query, keys)

            if not pipeline:
                errors = 'ERROR: Run ID %s not found' % str(run_id)
            elif pipeline['user'] != arg_user:
                errors = 'ERROR: cannot modify pipeline %s: permission denied' % str(
                    run_id)
            elif action.lower() == "delete":
                if pipeline['status'] != JOB_STATUS.FAILED:
                    errors = 'ERROR: Run status is %s: cannot delete' % pipeline[
                        'status'].upper()
                else:
                    db.steps.delete_many({'run_id': run_id})
                    db.pipelines.find_one_and_delete({'run_id': run_id})
                    errors = pm.delete_pipeline(run_id, arg_user,
                                                pipeline.get('output_dir'),
                                                pipeline.get('work_dir'))
            elif action.lower() == "resume":
                if pipeline['status'] != JOB_STATUS.INTERRUPTED:
                    errors = 'ERROR: Run status is %s: cannot resume' % pipeline[
                        'status'].upper()
                else:
                    db.pipelines.update(
                        {'run_id': run_id},
                        {'$set': {
                            'status': JOB_STATUS.QUEUED
                        }})
                    errors = pm.resume_pipeline(run_id, arg_user,
                                                pipeline.get('work_dir'))
            elif action.lower() == "pause":
                if pipeline['status'] != JOB_STATUS.RUNNING:
                    errors = 'ERROR: Run status is %s: cannot pause' % pipeline[
                        'status'].upper()
                else:
                    errors = pm.pause_pipeline(run_id, arg_user)
            else:
                errors = "Uknown action requested: %s" % str(action)

            if errors:
                ut.pretty_print(errors)
                return errors, 400
            else:
                return 200
Example #2
0
def validate_token(token):
    """
    Send request to the server to check if the token is valid
    """

    valid = False
    if token:
        address = "%s/verifytoken" % (cfg.SERVICE_ROOT_URL)
        ut.pretty_print("Sending request to...%s" % address)
        response = requests.get(address, data={"token": token, "username": getpass.getuser()}, verify=False)
        valid = response.json()['valid']
    if not valid:
        ut.pretty_print("User %s not authenticated" %getpass.getuser())
    return valid
Example #3
0
def verify_token(username, token):
    """
    Verify validity of token
    """
    s = TimedJWSSerializer(app.config['SECRET_KEY'])

    try:
        ut.pretty_print("Trying to load the token")
        data = s.loads(token)
    except SignatureExpired:
        ut.pretty_print("ERROR: Expired Token")
        return False
    except BadSignature:
        ut.pretty_print("ERROR: Invalid Token")
        return False
    else:
        ut.pretty_print("Token successfully loaded")
        stored = db.sessions.find_one(
            filter={'username': data['username']}, sort=[('_id', -1)])

        if not stored:
            return False
        result = json_util.loads(json_util.dumps(stored))

        return pwd_context.verify(
            data['password'],
            result['password_hash']) and data['username'] == username
Example #4
0
def verify_token(username, token):
    """
    Verify validity of token
    """
    s = TimedJWSSerializer(app.config['SECRET_KEY'])

    try:
        ut.pretty_print("Trying to load the token")
        data = s.loads(token)
    except SignatureExpired:
        ut.pretty_print("ERROR: Expired Token")
        return False
    except BadSignature:
        ut.pretty_print("ERROR: Invalid Token")
        return False
    else:
        ut.pretty_print("Token successfully loaded")
        stored = db.sessions.find_one(filter={'username': data['username']},
                                      sort=[('_id', -1)])

        if not stored:
            return False
        result = json_util.loads(json_util.dumps(stored))

        return pwd_context.verify(
            data['password'],
            result['password_hash']) and data['username'] == username
Example #5
0
        def put(self, run_id, action):
            """
            Delete / stop / start a run
            """
            parser = reqparse.RequestParser()
            parser.add_argument('user',  type=str, default=None)
            parser.add_argument('force', action='store_true', default=False)
            args      = parser.parse_args()
            arg_user  = auth_get_username(request.authorization, args.get('user'))
            arg_force = args.get('force')
            errors = ''

            query = {'run_id' : run_id}
            keys  = {'work_dir':1, 'output_dir':1, 'status':1, 'user':1}
            pipeline = db.pipelines.find_one(query, keys)

            if not pipeline:
                errors = 'ERROR: Run ID %s not found' % str(run_id)
            elif pipeline['user'] != arg_user:
                errors = 'ERROR: cannot modify pipeline %s: permission denied' % str(run_id)
            elif action.lower() == "delete":
                if pipeline['status'] != JOB_STATUS.FAILED:
                    errors = 'ERROR: Run status is %s: cannot delete' % pipeline['status'].upper()
                else:
                    db.steps.delete_many({'run_id': run_id})
                    db.pipelines.find_one_and_delete({'run_id': run_id})
                    errors = pm.delete_pipeline(run_id, arg_user,
                                                pipeline.get('output_dir'), pipeline.get('work_dir')
                                               )
            elif action.lower() == "resume":
                if pipeline['status'] != JOB_STATUS.INTERRUPTED:
                    errors = 'ERROR: Run status is %s: cannot resume' % pipeline['status'].upper()
                else:
                    db.pipelines.update({'run_id': run_id},
                                        {'$set': {'status': JOB_STATUS.QUEUED}})
                    errors = pm.resume_pipeline(run_id, arg_user, pipeline.get('work_dir'))
            elif action.lower() == "pause":
                if pipeline['status'] != JOB_STATUS.RUNNING:
                    errors = 'ERROR: Run status is %s: cannot pause' % pipeline['status'].upper()
                else:
                    errors = pm.pause_pipeline(run_id, arg_user)
            else:
                errors = "Uknown action requested: %s" % str(action)

            if errors:
                ut.pretty_print(errors)
                return errors, 400
            else:
                return 200
Example #6
0
def install_secret_key(app):
    """
    Configure the SECRET_KEY from a file in the instance directory.
    If the file does not exist, print instructions to create it from a shell with a random key, then exit.
    """
    keyfile = os.path.join(KEYDIR, 'flask_key')
    try:
        if not os.path.exists(keyfile):
            ut.pretty_print('No key file found: creating it')
            if not os.path.exists(KEYDIR):
                os.makedirs(KEYDIR, mode=0700)
            with open(keyfile, 'wb') as fh:
                key = os.urandom(256)
                fh.write(key)
                app.config['SECRET_KEY'] = key
        else:
            app.config['SECRET_KEY'] = open(keyfile, 'rb').read()
    except:
        raise Exception("Unable to install flask key")
Example #7
0
def install_secret_key(app):
    """
    Configure the SECRET_KEY from a file in the instance directory.
    If the file does not exist, print instructions to create it from a shell with a random key, then exit.
    """
    keyfile = os.path.join(KEYDIR, "flask_key")
    try:
        if not os.path.exists(keyfile):
            ut.pretty_print("No key file found: creating it")
            if not os.path.exists(KEYDIR):
                os.makedirs(KEYDIR, mode=0700)
            with open(keyfile, "wb") as fh:
                key = os.urandom(256)
                fh.write(key)
                app.config["SECRET_KEY"] = key
        else:
            app.config["SECRET_KEY"] = open(keyfile, "rb").read()
    except:
        raise Exception("Unable to install flask key")
Example #8
0
        def put(self):
            """
            Queue the specific pipeline
            """
            data   = request.get_json(force=True)
            config = data.get('config')
            user   = auth_get_username(request.authorization, data.get('user'))

            errors = None # Pipeline.validate_config(config, user)
            if not errors:
                config = Pipeline.load_cfg(config)
                # Get id from DB
                db_info = dbmodel.PipelineDb(config['name'], config, Pipeline.ordered_steps(config), user)
                config['run_id'] = db_info.run_id

                ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (config['label'], config['run_id'], user))
                return pm.add_pipeline(config, user)
            else:
                return errors, 400
Example #9
0
        def put(self):
            """
            Queue the specific pipeline
            """
            data = request.get_json(force=True)
            config = data.get('config')
            user = auth_get_username(request.authorization, data.get('user'))

            errors = None  # Pipeline.validate_config(config, user)
            if not errors:
                config = Pipeline.load_cfg(config)
                # Get id from DB
                db_info = dbmodel.PipelineDb(config['name'], config,
                                             Pipeline.ordered_steps(config),
                                             user)
                config['run_id'] = db_info.run_id

                ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" %
                                (config['label'], config['run_id'], user))
                return pm.add_pipeline(config, user)
            else:
                return errors, 400
Example #10
0
def get_new_token(user):
    """
    Send request to the server to get a new token
    """

    pwrd = getpass.getpass("Please enter your password: "******"%s/login" % (cfg.SERVICE_ROOT_URL)
    data = {'user': user, 'password': pwrd}
    ut.pretty_print("Sending request to...%s" % address)
    response = requests.post(address, data=data, verify=False)
    if response.json().get('token'):
        ut.pretty_print("user %s successsfully authenticated" % user)
        token = response.json()['token']
        save_token(token)
    else:
        ut.pretty_print("Invalid username or password")
        token = None
    return token
Example #11
0
    def post(self):
        """
        Queue the specific pipeline type
        """
        user = request.form['user']
        password = request.form['password']

        # ip = request.remote_addr

        ut.pretty_print("Checking user %s" % user)

        # TODO : Provide proper code for special users wilee and demux
        if ((cfg.ACME_DEV or cfg.ACME_PROD)
                and (user == 'wilee' or user == 'demux')):
            return {'token': generate_token(user, password)}
        else:
            try:
                # Make sure the password is not empty to prevent LDAP anonymous bind to succeed
                if not password:
                    raise ldap.LDAPError("Empty password for user %s!" % user)

                #check if the LDAP binding works
                ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT,
                                ldap.OPT_X_TLS_NEVER)
                conn = ldap.initialize(cfg.LDAPS_ADDRESS)
                conn.set_option(ldap.OPT_REFERRALS, 0)
                conn.set_option(ldap.OPT_PROTOCOL_VERSION, 3)
                conn.set_option(ldap.OPT_X_TLS_CACERTFILE,
                                cfg.search_cfg_file('certificate_file.cer'))
                conn.set_option(ldap.OPT_X_TLS, ldap.OPT_X_TLS_DEMAND)
                conn.set_option(ldap.OPT_X_TLS_DEMAND, True)

                bind = conn.simple_bind_s("%s@%s" % user, password,
                                          cfg.COMPANY_ADDRESS)
                ut.pretty_print("Bind success!!")
                #then create a session
                return {'token': generate_token(user, password)}
            except ldap.LDAPError, e:
                ut.pretty_print("ERROR: authentication error: %s" % e)
                return "Authentication error", 401
Example #12
0
    def post(self):
        """
        Queue the specific pipeline type
        """
        user = request.form['user']
        password = request.form['password']

        # ip = request.remote_addr

        ut.pretty_print("Checking user %s" % user)

        # TODO : Provide proper code for special users wilee and demux
        if ((cfg.ACME_DEV or cfg.ACME_PROD)
                and (user == 'wilee' or user == 'demux')):
            return {'token': generate_token(user, password)}
        else:
            try:
                # Make sure the password is not empty to prevent LDAP anonymous bind to succeed
                if not password:
                    raise ldap.LDAPError("Empty password for user %s!" % user)

                #check if the LDAP binding works
                ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT,
                                ldap.OPT_X_TLS_NEVER)
                conn = ldap.initialize(cfg.LDAPS_ADDRESS)
                conn.set_option(ldap.OPT_REFERRALS, 0)
                conn.set_option(ldap.OPT_PROTOCOL_VERSION, 3)
                conn.set_option(ldap.OPT_X_TLS_CACERTFILE,
                                cfg.search_cfg_file('certificate_file.cer'))
                conn.set_option(ldap.OPT_X_TLS, ldap.OPT_X_TLS_DEMAND)
                conn.set_option(ldap.OPT_X_TLS_DEMAND, True)

                bind = conn.simple_bind_s("%s@%s" % user, password,
                                          cfg.COMPANY_ADDRESS)
                ut.pretty_print("Bind success!!")
                #then create a session
                return {'token': generate_token(user, password)}
            except ldap.LDAPError, e:
                ut.pretty_print("ERROR: authentication error: %s" % e)
                return "Authentication error", 401
Example #13
0
    def __init__(self, cfg, user='******', db=True, schedname="SCHED_CONDOR"):
        """
        Read in the pipeline graph and load the configuration.
        """
        self.all_ok = True
        self.user = user
        self.status = JOB_STATUS.QUEUED
        self.lock = ''

        self.completed = []
        self.running = {}
        self.outputs = {}
        self.schedname = schedname
        db_model_name = "MONGO_DB" if db else "STUB_DB"

        # Load configuration
        self.one_step = False
        try:
            self.cfg = Pipeline.load_cfg(cfg)
        except Exception as e1:
            print('Failed to load config as pipeline (error=%s). Trying as step' % e1)
            try:
                self.cfg = Step.load_cfg(cfg)
                self.step = Step.load_step(self.cfg)
                self.one_step = True
            except Exception as e2:
                 Exception("Unable to load config file %s:\n" \
                           "pipeline load: %s\n" \
                           "step load: %s" % (cfg, e1, e2))

        # Set all additional information
        self.run_id = self.cfg.get('run_id')
        if self.one_step:
            self.name  = self.step.name
            self.label = self.step.name
            self.project_name = self.cfg.get('project_name', '')
            self.description  = self.cfg.get('description', '')
            self.output_dir   = self.step.output_dir
            self.ordered      = [self.step.name]
        else:
            self.name  = self.cfg['name']
            self.label = self.cfg['label']
            self.project_name = self.cfg['config']['pipeline'].get('project_name', '')
            self.description  = self.cfg['config']['pipeline'].get('description', '')
            self.output_dir   = self.cfg['config']['pipeline']['output_dir']
            if not self.output_dir.startswith('/scratch'):
                self.cfg['dag']['nodes'][FINAL_STEP] = 'utils.Finalize' #TODO: Make it work for one_step as well
            self.ordered      = Pipeline.ordered_steps(self.cfg)


        self.sys_path = self.cfg.get('sys_path')
        if self.sys_path:
            sys.path.insert(0, self.sys_path)

        self.dag = self.create_dag(self.cfg, one_step=self.one_step)

        self.meta = {
            'pipeline': {
                'label': self.label,
                'project_name': self.project_name,
                'descr': self.description,
                'run_id': self.run_id
            },
            'steps': {},
            'job' : {}
        }

        self.db = db_models[db_model_name](self.name, self.cfg, self.ordered, self.user, output_dir=self.output_dir)
        if hasattr(self.db, 'run_id'):
            self.run_id = self.db.run_id
            self.cfg['run_id'] = self.run_id

        # Define the output directories
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir, 0775)

        # Use default output dir under /scratch/cgi/nespipe (linked to user-defined dir.)
        # if: a) this run is using the db (so we have a run ID); b) it is not a demux. run;
        # and c) the user-defined directory is not already under /scratch
        if self.run_id and not (self.name == 'demultiplexing'):
            dirname = '%s_%d' % (self.name, self.db.run_id)
            self.output_dir = os.path.join(self.output_dir, dirname)
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir, 0775)
            # In case of /scratch, do not create an additional sub-directory
            if self.output_dir.startswith('/scratch'):
                self.work_dir = self.output_dir
            else:
                self.work_dir = os.path.join(WORK_DIR, self.user, dirname)
                if not os.path.exists(self.work_dir):
                    os.makedirs(self.work_dir, 0775)
                symlink = os.path.join(self.output_dir, 'work_area')
                if not os.path.exists(symlink):
                    os.symlink(self.work_dir, symlink)
        else:
            self.work_dir = self.output_dir

        ut.pretty_print('Output directories: output_dir=%s, work_dir=%s' % (self.output_dir, self.work_dir))
        self.db.update_pipeline(self.run_id, {'output_dir': self.output_dir,
                                              'work_dir':   self.work_dir })
Example #14
0
    def load_cfg(cls, cfg):
        """
        Return the json cfg
        Is expecting as input one between a file, a json text or a dictionary
        """

        cfg_load = None
        try:
            if type(cfg) == dict:
                cfg_load = copy.deepcopy(cfg)
            elif isinstance(cfg, basestring):
                if os.path.exists(cfg):
                    with open(cfg) as fh:
                        cfg_load = json.load(fh)
                        if 'sys_path' not in cfg_load:
                            cfg_load['sys_path'] = os.path.dirname(os.path.realpath(cfg))
                else:
                    cfg_load = json.load(cfg)
        except Exception as e:
            raise Exception("Unable to load config file %s: %s" % (cfg, e))
        else:
            #load the spec_type or spec_file into the json_spec
            #if they exists
            cfg_data = { 'config' : {'steps': {}, 'pipeline' : {'project_name' : '', 'description' : '', 'output_dir': ''}}}
            ut.dict_update(cfg_data, cfg_load)

            if 'sys_path' in cfg_data:
                sys.path.insert(0, cfg_data['sys_path'])

            pipeline_to_load = cfg_data['dag'].pop("load") if "load" in cfg_data['dag'] else None
            if pipeline_to_load:
                try:
                    if os.path.exists(pipeline_to_load):
                        spec_file = pipeline_to_load
                    else:
                        if pipeline_to_load in pipeline_names:
                            spec_file = pipeline_names[pipeline_to_load]
                        else:
                            raise Exception("Pipeline %s not found in list of pipelines: [%s]"
                                            % (pipeline_to_load, ','.join(pipeline_names)))

                    with open(spec_file) as fh:
                        ut.pretty_print("Loading pipeline spec from %s" % spec_file)
                        spec = json.load(fh)
                        stepobjs = Pipeline.create_steps(spec)
                        steps_defaults = {}
                        for step in stepobjs:
                            step_default = stepobjs[step].keys_values(['params', 'requirements'])
                            if step_default:
                                steps_defaults[step] = step_default

                        spec.setdefault('config', {})
                        spec['config'].setdefault('pipeline', {})
                        spec['config'].setdefault('steps', {})
                        ut.dict_update(spec['config']['steps'], steps_defaults, replace=False)
                        ut.dict_update(spec['config'], cfg_data.get('config', ''))
                        cfg_data = spec
                except:
                    raise


            if cfg_data.get('config', {}).get('pipeline', {}).get('refgenome',{}):
                key_refgenome = cfg_data['config']['pipeline'].pop('refgenome')
                try:
                    ref_genomes = Pipeline.get_refgenomes(cfg_data)
                    if key_refgenome in ref_genomes:
                        # set refgenome parameters in each step (update config if already exists)
                        for step in ref_genomes[key_refgenome]:
                            if step in cfg_data['config']['steps']:
                                cfg_data['config']['steps'][step].update(ref_genomes[key_refgenome][step])
                            else:
                                cfg_data['config']['steps'][step] = ref_genomes[key_refgenome][step]
                    else:
                        raise Exception("unable to load ref genome paths for %s " % key_refgenome)
                except Exception, e:
                    raise

            if 'sys_path' in cfg_data:
                del sys.path[0]

            return cfg_data
Example #15
0
        def post(self, run_id):
            """
            Pushes files into iRODS
            """

            data = request.get_json(force=True)

            runmeta = data.get('meta')
            selection = data.get('selection')
            user = auth_get_username(request.authorization, data.get('user'))

            npdis = dbmodel.get_npdi_projects()
            npdi = runmeta.get('Project NPDI ID', '')
            study_nickname = runmeta.get('Study nickname',
                                         'Required field missing')
            if (npdi + study_nickname) not in npdis:
                return {
                    'pipeline': {
                        'Project': '%s (%s)' % (npdi, study_nickname)
                    }
                }, 400

            run = db.pipelines.find_one({'run_id': run_id}, {
                'meta': 1,
                'run_id': 1
            })

            steps_names = selection.keys()
            steps = list(
                db.steps.find(
                    {
                        "run_id": run_id,
                        "name": {
                            '$in': steps_names
                        },
                        "jobs": {
                            "$elemMatch": {
                                "outputs": {
                                    "$exists": True
                                }
                            }
                        }
                    }, {
                        "name": 1,
                        "jobs": 1,
                        "outputs.output_dir": 1,
                        "step_config": 1
                    }))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = {}
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs',
                                             key_filter={'type': 'file'}):
                                for i, filename in enumerate(
                                        job['outputs'][key]):
                                    filemeta = {
                                        'step': step['name'],
                                        'job_id': job_id
                                    }
                                    ext = os.path.splitext(
                                        filename)[1][1:].upper()
                                    for key in job.get('meta', {}):
                                        meta = job['meta'][key]
                                        if key == 'sample_id':
                                            okey = 'Operational sample accession'
                                        else:
                                            okey = key

                                        if isinstance(meta, list):
                                            filemeta[okey] = meta[i]
                                        else:
                                            filemeta[okey] = meta

                                    filemeta[
                                        'File type'] = 'Processed data file'
                                    filemeta['File format'] = ext

                                    output_files[filename] = filemeta

                    if output_files:
                        outputs[step['name']] = output_files

            input_files = []
            meta_data = []
            for step_name, step_selection in selection.iteritems():
                for filepath in step_selection:
                    input_files.append(filepath)

                    filemeta = outputs[step_name][filepath]
                    filemeta.update(runmeta)
                    meta_data.append(filemeta)

            cfg = Pipeline.load_cfg(pipeline_specs['irods_lz'])
            cfg['config']['steps']['irods_mvtolz'] = {
                'input_files': input_files,
                'meta_data': meta_data
            }
            cfg['config']['steps']['irods_monitorlz'] = {
                'prun_id': run['run_id']
            }

            cfg['config']['pipeline']['project_name'] = run['meta'][
                'project_name']
            cfg['config']['pipeline'][
                'description'] = 'Archive data for run %s' % run['run_id']
            cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods'

            # Get id from DB
            db_info = dbmodel.PipelineDb(cfg['name'], cfg,
                                         Pipeline.ordered_steps(cfg), user)
            cfg['run_id'] = db_info.run_id

            ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" %
                            (cfg['label'], cfg['run_id'], user))
            return pm.add_pipeline(cfg, user)
Example #16
0
    def __init__(self, cfg, user='******', db=True, schedname="SCHED_CONDOR"):
        """
        Read in the pipeline graph and load the configuration.
        """
        self.all_ok = True
        self.user = user
        self.status = JOB_STATUS.QUEUED
        self.lock = ''

        self.completed = []
        self.running = {}
        self.outputs = {}
        self.schedname = schedname
        db_model_name = "MONGO_DB" if db else "STUB_DB"

        # Load configuration
        self.one_step = False
        try:
            self.cfg = Pipeline.load_cfg(cfg)
        except Exception as e1:
            print(
                'Failed to load config as pipeline (error=%s). Trying as step'
                % e1)
            try:
                self.cfg = Step.load_cfg(cfg)
                self.step = Step.load_step(self.cfg)
                self.one_step = True
            except Exception as e2:
                Exception("Unable to load config file %s:\n" \
                          "pipeline load: %s\n" \
                          "step load: %s" % (cfg, e1, e2))

        # Set all additional information
        self.run_id = self.cfg.get('run_id')
        if self.one_step:
            self.name = self.step.name
            self.label = self.step.name
            self.project_name = self.cfg.get('project_name', '')
            self.description = self.cfg.get('description', '')
            self.output_dir = self.step.output_dir
            self.ordered = [self.step.name]
        else:
            self.name = self.cfg['name']
            self.label = self.cfg['label']
            self.project_name = self.cfg['config']['pipeline'].get(
                'project_name', '')
            self.description = self.cfg['config']['pipeline'].get(
                'description', '')
            self.output_dir = self.cfg['config']['pipeline']['output_dir']
            if not self.output_dir.startswith('/scratch'):
                self.cfg['dag']['nodes'][
                    FINAL_STEP] = 'utils.Finalize'  #TODO: Make it work for one_step as well
            self.ordered = Pipeline.ordered_steps(self.cfg)

        self.sys_path = self.cfg.get('sys_path')
        if self.sys_path:
            sys.path.insert(0, self.sys_path)

        self.dag = self.create_dag(self.cfg, one_step=self.one_step)

        self.meta = {
            'pipeline': {
                'label': self.label,
                'project_name': self.project_name,
                'descr': self.description,
                'run_id': self.run_id
            },
            'steps': {},
            'job': {}
        }

        self.db = db_models[db_model_name](self.name,
                                           self.cfg,
                                           self.ordered,
                                           self.user,
                                           output_dir=self.output_dir)
        if hasattr(self.db, 'run_id'):
            self.run_id = self.db.run_id
            self.cfg['run_id'] = self.run_id

        # Define the output directories
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir, 0775)

        # Use default output dir under /scratch/cgi/nespipe (linked to user-defined dir.)
        # if: a) this run is using the db (so we have a run ID); b) it is not a demux. run;
        # and c) the user-defined directory is not already under /scratch
        if self.run_id and not (self.name == 'demultiplexing'):
            dirname = '%s_%d' % (self.name, self.db.run_id)
            self.output_dir = os.path.join(self.output_dir, dirname)
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir, 0775)
            # In case of /scratch, do not create an additional sub-directory
            if self.output_dir.startswith('/scratch'):
                self.work_dir = self.output_dir
            else:
                self.work_dir = os.path.join(WORK_DIR, self.user, dirname)
                if not os.path.exists(self.work_dir):
                    os.makedirs(self.work_dir, 0775)
                symlink = os.path.join(self.output_dir, 'work_area')
                if not os.path.exists(symlink):
                    os.symlink(self.work_dir, symlink)
        else:
            self.work_dir = self.output_dir

        ut.pretty_print('Output directories: output_dir=%s, work_dir=%s' %
                        (self.output_dir, self.work_dir))
        self.db.update_pipeline(self.run_id, {
            'output_dir': self.output_dir,
            'work_dir': self.work_dir
        })
Example #17
0
    def load_cfg(cls, cfg):
        """
        Return the json cfg
        Is expecting as input one between a file, a json text or a dictionary
        """

        cfg_load = None
        try:
            if type(cfg) == dict:
                cfg_load = copy.deepcopy(cfg)
            elif isinstance(cfg, basestring):
                if os.path.exists(cfg):
                    with open(cfg) as fh:
                        cfg_load = json.load(fh)
                        if 'sys_path' not in cfg_load:
                            cfg_load['sys_path'] = os.path.dirname(
                                os.path.realpath(cfg))
                else:
                    cfg_load = json.load(cfg)
        except Exception as e:
            raise Exception("Unable to load config file %s: %s" % (cfg, e))
        else:
            #load the spec_type or spec_file into the json_spec
            #if they exists
            cfg_data = {
                'config': {
                    'steps': {},
                    'pipeline': {
                        'project_name': '',
                        'description': '',
                        'output_dir': ''
                    }
                }
            }
            ut.dict_update(cfg_data, cfg_load)

            if 'sys_path' in cfg_data:
                sys.path.insert(0, cfg_data['sys_path'])

            pipeline_to_load = cfg_data['dag'].pop(
                "load") if "load" in cfg_data['dag'] else None
            if pipeline_to_load:
                try:
                    if os.path.exists(pipeline_to_load):
                        spec_file = pipeline_to_load
                    else:
                        if pipeline_to_load in pipeline_names:
                            spec_file = pipeline_names[pipeline_to_load]
                        else:
                            raise Exception(
                                "Pipeline %s not found in list of pipelines: [%s]"
                                % (pipeline_to_load, ','.join(pipeline_names)))

                    with open(spec_file) as fh:
                        ut.pretty_print("Loading pipeline spec from %s" %
                                        spec_file)
                        spec = json.load(fh)
                        stepobjs = Pipeline.create_steps(spec)
                        steps_defaults = {}
                        for step in stepobjs:
                            step_default = stepobjs[step].keys_values(
                                ['params', 'requirements'])
                            if step_default:
                                steps_defaults[step] = step_default

                        spec.setdefault('config', {})
                        spec['config'].setdefault('pipeline', {})
                        spec['config'].setdefault('steps', {})
                        ut.dict_update(spec['config']['steps'],
                                       steps_defaults,
                                       replace=False)
                        ut.dict_update(spec['config'],
                                       cfg_data.get('config', ''))
                        cfg_data = spec
                except:
                    raise

            if cfg_data.get('config', {}).get('pipeline',
                                              {}).get('refgenome', {}):
                key_refgenome = cfg_data['config']['pipeline'].pop('refgenome')
                try:
                    ref_genomes = Pipeline.get_refgenomes(cfg_data)
                    if key_refgenome in ref_genomes:
                        # set refgenome parameters in each step (update config if already exists)
                        for step in ref_genomes[key_refgenome]:
                            if step in cfg_data['config']['steps']:
                                cfg_data['config']['steps'][step].update(
                                    ref_genomes[key_refgenome][step])
                            else:
                                cfg_data['config']['steps'][
                                    step] = ref_genomes[key_refgenome][step]
                    else:
                        raise Exception(
                            "unable to load ref genome paths for %s " %
                            key_refgenome)
                except Exception, e:
                    raise

            if 'sys_path' in cfg_data:
                del sys.path[0]

            return cfg_data
Example #18
0
        def post(self, run_id):
            """
            Pushes files into iRODS
            """

            data = request.get_json(force=True)

            runmeta   = data.get('meta')
            selection = data.get('selection')
            user      = auth_get_username(request.authorization, data.get('user'))

            npdis = dbmodel.get_npdi_projects()
            npdi = runmeta.get('Project NPDI ID', '')
            study_nickname = runmeta.get('Study nickname', 'Required field missing')
            if (npdi + study_nickname) not in npdis:
                return {'pipeline': {
                            'Project': '%s (%s)' %(npdi, study_nickname)
                        }}, 400

            run = db.pipelines.find_one({'run_id': run_id}, {'meta':1, 'run_id':1})

            steps_names = selection.keys()
            steps = list(db.steps.find(
                {"run_id":run_id, "name": {'$in': steps_names}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}},
                {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1}))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = {}
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs', key_filter={'type':'file'}):
                                for i, filename in enumerate(job['outputs'][key]):
                                    filemeta = {'step': step['name'], 'job_id': job_id}
                                    ext = os.path.splitext(filename)[1][1:].upper()
                                    for key in job.get('meta', {}):
                                        meta = job['meta'][key]                                       
                                        if key == 'sample_id':
                                            okey = 'Operational sample accession'
                                        else:
                                            okey = key

                                        if isinstance(meta, list):
                                            filemeta[okey] = meta[i]
                                        else:
                                            filemeta[okey] = meta

                                    filemeta['File type'] = 'Processed data file'
                                    filemeta['File format'] = ext

                                    output_files[filename] = filemeta

                    if output_files:
                        outputs[step['name']] = output_files


            input_files = []
            meta_data   = []
            for step_name, step_selection in selection.iteritems():
                for filepath in step_selection:
                    input_files.append(filepath)

                    filemeta = outputs[step_name][filepath]
                    filemeta.update(runmeta)
                    meta_data.append(filemeta)

            cfg = Pipeline.load_cfg(pipeline_specs['irods_lz'])
            cfg['config']['steps']['irods_mvtolz'] = {
                'input_files' : input_files,
                'meta_data'   : meta_data
            }
            cfg['config']['steps']['irods_monitorlz'] = {
                'prun_id' : run['run_id']
            }

            cfg['config']['pipeline']['project_name'] = run['meta']['project_name']
            cfg['config']['pipeline']['description'] = 'Archive data for run %s' %run['run_id']
            cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods'

            # Get id from DB
            db_info = dbmodel.PipelineDb(cfg['name'], cfg, Pipeline.ordered_steps(cfg), user)
            cfg['run_id'] = db_info.run_id

            ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (cfg['label'], cfg['run_id'], user))
            return pm.add_pipeline(cfg, user)
Example #19
0
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 Pypers is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with Pypers.  If not, see <http://www.gnu.org/licenses/>.
 """

import getpass
import nespipe.utils.utils as ut

from nespipe.utils import security

if __name__ == '__main__':

    doc = """
    Create a new  authentication tokem
    """

    user = getpass.getuser()
    token = security.read_token()
    if not security.validate_token(token):
        ut.pretty_print("get new toke....")
        token = security.get_new_token(user)
    else:
        ut.pretty_print('%s already has a valid token' % user)
Example #20
0
 (at your option) any later version.

 Pypers is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with Pypers.  If not, see <http://www.gnu.org/licenses/>.
 """

import getpass
import nespipe.utils.utils as ut

from nespipe.utils import security

if __name__ == '__main__':

    doc="""
    Create a new  authentication tokem
    """

    user = getpass.getuser()
    token = security.read_token()
    if not security.validate_token(token):
        ut.pretty_print("get new toke....")
        token = security.get_new_token(user)
    else:
        ut.pretty_print('%s already has a valid token' % user)