def get_unbound_inputs(cls, cfg): """ Get the unbound inputs """ cfg = cls.load_cfg(cfg) dag = cls.create_dag(cfg) # Step parameters uinputs = defaultdict(dict) for stepname, classname in cfg['dag']['nodes'].iteritems(): step = Step.create(classname) input_keys = step.keys('inputs', req_only=True) if input_keys: for pred in dag.predecessors(stepname): # Remove any key that is already bound for binding in dag[pred][stepname].get('bindings', []): key = binding.split('.')[1] #maybe it has been already removed if key in input_keys: input_keys.remove(key) if input_keys: uinputs[stepname] = input_keys return uinputs
def create_steps(cfg): stepobjs = {} if 'sys_path' in cfg: sys.path.insert(0, cfg['sys_path']) for stepname, classname in cfg['dag']['nodes'].iteritems(): stepobjs[stepname] = Step.create(classname) if 'sys_path' in cfg: del sys.path[0] return stepobjs
def get_metainfo(self, step_name): """ Return a dictionary with generic information about pipeline and step """ info = {} info['pipeline'] = { 'name': self.name, 'version': self.__version__ } info['user'] = { 'login': self.user, 'fullname': pwd.getpwnam(self.user).pw_gecos } step_class = self.dag.node[step_name]['class_name'] stepobj = Step.create(step_class) info['step'] = { 'name': step_name, 'class': step_class, 'version': stepobj.__version__ } return info
def get_refgenomes(cls, cfg, unbound=None): """ Return a 2 level dictionary containing the path of the reference genome grouped by labels. A label is a combination of species, version and variation { "label1": { "stepname1" : { "input_key1" : "/path1"}, "stepname2" : { "input_key1" : "/path2"} } "label2": { "stepname1" : { "input_key1 : "/path3"}, "stepname2" : { "input_key1" : "/path4"} } } The "unbound" dictionary contains the steps that have unbound inputs: if set, only those steps will be considered """ refs = defaultdict(dict) tools = set() # Collect all tools that require a ref. genome for stepname, classname in cfg['dag']['nodes'].iteritems(): if unbound == None or stepname in unbound: step = Step.create(classname) for ref in step.get_refgenome_tools(): tools.add(ref['tool']) refs[stepname][ref['name']] = ref['tool'] # Get corresponding ref genomes refs_by_label = {} for ref in mongo.get_refgenomes(tools): label = "%s %s" % (ref['_id']['species'], ref['_id']['version']) if 'variation' in ref['_id']: label += " (%s)" % ref['_id']['variation'] for stepname in refs: if not label in refs_by_label: refs_by_label[label] = {} refs_by_label[label][stepname] = {} for param_key in refs[stepname]: tool = refs[stepname][param_key] if tool in ref['paths']: refs_by_label[label][stepname][param_key] = ref[ 'paths'][tool] return refs_by_label
def get_refgenomes(cls, cfg, unbound=None): """ Return a 2 level dictionary containing the path of the reference genome grouped by labels. A label is a combination of species, version and variation { "label1": { "stepname1" : { "input_key1" : "/path1"}, "stepname2" : { "input_key1" : "/path2"} } "label2": { "stepname1" : { "input_key1 : "/path3"}, "stepname2" : { "input_key1" : "/path4"} } } The "unbound" dictionary contains the steps that have unbound inputs: if set, only those steps will be considered """ refs = defaultdict(dict) tools = set() # Collect all tools that require a ref. genome for stepname, classname in cfg['dag']['nodes'].iteritems(): if unbound==None or stepname in unbound: step = Step.create(classname) for ref in step.get_refgenome_tools(): tools.add(ref['tool']) refs[stepname][ref['name']] = ref['tool'] # Get corresponding ref genomes refs_by_label = {} for ref in mongo.get_refgenomes(tools): label = "%s %s" % (ref['_id']['species'], ref['_id']['version']) if 'variation' in ref['_id']: label += " (%s)" % ref['_id']['variation'] for stepname in refs: if not label in refs_by_label: refs_by_label[label] = {} refs_by_label[label][stepname] = {} for param_key in refs[stepname]: tool = refs[stepname][param_key] if tool in ref['paths']: refs_by_label[label][stepname][param_key] = ref['paths'][tool] return refs_by_label
def get_metainfo(self, step_name): """ Return a dictionary with generic information about pipeline and step """ info = {} info['pipeline'] = {'name': self.name, 'version': self.__version__} info['user'] = { 'login': self.user, 'fullname': pwd.getpwnam(self.user).pw_gecos } step_class = self.dag.node[step_name]['class_name'] stepobj = Step.create(step_class) info['step'] = { 'name': step_name, 'class': step_class, 'version': stepobj.__version__ } return info
def validate_config(cls, cfg, user): """ Check if all the config params are ok """ retval = defaultdict(dict) s_errors = defaultdict(dict) #try: cfg = cls.load_cfg(cfg) params = cls.get_params(cfg) unb_inputs = cls.get_unbound_inputs(cfg) #validate step section for stepname in params['steps']: if stepname is not 'inputs': classname = cfg['dag']['nodes'][stepname] stepobj = Step.create(classname) if stepname in cfg['config']['steps']: required_keys = [] required_keys.extend(unb_inputs.get(stepname, [])) required_keys.extend(stepobj.keys(['params'], req_only=True)) stepcfg = cfg['config']['steps'][stepname] for key in required_keys: if key in stepcfg: param_spec = stepobj.key_spec(key) error_msg = stepobj.validate_value(stepcfg[key], param_spec['type'], param_spec['name']) if error_msg: s_errors[stepname][key] = error_msg else: s_errors[stepname][key] = 'missing value' else: for key in stepobj.keys(['params'], req_only=True): s_errors[stepname][key] = 'missing value' if stepname in unb_inputs: for key in unb_inputs[stepname]: s_errors[stepname][key] = 'missing value' #validate pipeline section p_errors = {} if not cfg['config']['pipeline']['project_name']: p_errors['project_name'] = 'missing value' if not cfg['config']['pipeline']['description']: p_errors['description'] = 'missing value' if not cfg['config']['pipeline']['output_dir']: p_errors['output_dir'] = 'missing value' else: output_dir = cfg['config']['pipeline']['output_dir'] if not output_dir.startswith('/'): p_errors['output_dir'] = '%s : not an absolute path' % output_dir if not isinstance(output_dir, basestring): p_errors['output_dir'] = '%s : invalid type, found %s, expected %s' % (output_dir, type(output_dir), 'str') #elif not ut.has_write_access(output_dir): # p_errors['output_dir'] = '%s : not writable by user' % (output_dir) if s_errors: retval['steps'] = s_errors if p_errors: retval['pipeline'] = p_errors return retval
def validate_config(cls, cfg, user): """ Check if all the config params are ok """ retval = defaultdict(dict) s_errors = defaultdict(dict) #try: cfg = cls.load_cfg(cfg) params = cls.get_params(cfg) unb_inputs = cls.get_unbound_inputs(cfg) #validate step section for stepname in params['steps']: if stepname is not 'inputs': classname = cfg['dag']['nodes'][stepname] stepobj = Step.create(classname) if stepname in cfg['config']['steps']: required_keys = [] required_keys.extend(unb_inputs.get(stepname, [])) required_keys.extend( stepobj.keys(['params'], req_only=True)) stepcfg = cfg['config']['steps'][stepname] for key in required_keys: if key in stepcfg: param_spec = stepobj.key_spec(key) error_msg = stepobj.validate_value( stepcfg[key], param_spec['type'], param_spec['name']) if error_msg: s_errors[stepname][key] = error_msg else: s_errors[stepname][key] = 'missing value' else: for key in stepobj.keys(['params'], req_only=True): s_errors[stepname][key] = 'missing value' if stepname in unb_inputs: for key in unb_inputs[stepname]: s_errors[stepname][key] = 'missing value' #validate pipeline section p_errors = {} if not cfg['config']['pipeline']['project_name']: p_errors['project_name'] = 'missing value' if not cfg['config']['pipeline']['description']: p_errors['description'] = 'missing value' if not cfg['config']['pipeline']['output_dir']: p_errors['output_dir'] = 'missing value' else: output_dir = cfg['config']['pipeline']['output_dir'] if not output_dir.startswith('/'): p_errors[ 'output_dir'] = '%s : not an absolute path' % output_dir if not isinstance(output_dir, basestring): p_errors[ 'output_dir'] = '%s : invalid type, found %s, expected %s' % ( output_dir, type(output_dir), 'str') #elif not ut.has_write_access(output_dir): # p_errors['output_dir'] = '%s : not writable by user' % (output_dir) if s_errors: retval['steps'] = s_errors if p_errors: retval['pipeline'] = p_errors return retval