def _tagToDictAndCache(self, aspect, tag): if tag.taskName: try: t = yield tasks_tx.loadTask(tag.taskName) state = t.state except: state = None else: state = None d = {} d.update({"files": tag.files, "metadata": tag.metadata}) d.update( { "tag_name": tag.tagName, "file_count": len(tag.files), "pipelines": [], "task_name": tag.taskName, "state": state, "phantom": config.configToDict(tag.phantom) if tag.phantom else None, } ) yield self.cache.save(d) self.changed(aspect, d)
def pipelineToDict(p): return dict( name=p.name, taskName=p.taskName, pid=p.pid, ptype=p.ptypeStr(), config=[kv for kv in config.configToDict(p.config).iteritems()])
def tagToDict(t): return { "tag_name": t.tagName, "files": t.files, "metadata": dict(t.metadata), "phantom": config.configToDict(t.phantom) if t.phantom else None, "task_name": t.taskName, }
def tagToDict(t): return { 'tag_name': t.tagName, 'files': t.files, 'metadata': dict(t.metadata), 'phantom': config.configToDict(t.phantom) if t.phantom else None, 'task_name': t.taskName }
def handleCredentialConfig(request): conf = config.configToDict(request.credential.credInstance.conf) conf = func.updateDict(conf, {'general.ctype': request.credential.credential.getCType()}) queue.returnQueueSuccess(request.mq, request.body['return_queue'], conf) return defer_pipe.ret(request)
def pipelineToDict(p): return dict( name=p.name, taskName=p.taskName, pid=p.pid, ptype=p.ptypeStr(), config=[kv for kv in config.configToDict(p.config).iteritems()], )
def handleCredentialConfig(request): conf = config.configToDict(request.credential.credInstance.conf) conf = func.updateDict( conf, {'general.ctype': request.credential.credential.getCType()}) queue.returnQueueSuccess(request.mq, request.body['return_queue'], conf) return defer_pipe.ret(request)
def loadTagFile(fname): """ Loads a tagfile, returns a config object of attributes Also considering a .phantom type which would represent files that don't really exist. I think this makes sense as you should be able to tarnsfer .phantom files around but .metadata's should be generated when you make a tag Will explain more abou this in a wiki page somewhere... """ ## # Phantom filse are in a format that configFromStream can read. This is because phantom files # are expected to be written and modified by humans. .metadata files on the other hand # are just expected to be the produce of a machine storing information so uses json if os.path.exists(fname + '.phantom'): ## # Put everythin under phantom # We want to do it lazily too since we will be adding # data it can access later phantom = configFromMap( { 'phantom_tag': True, 'phantom': configToDict( configFromStream(open(fname + '.phantom'), lazy=True)) }, lazy=True) else: phantom = configFromMap({}) ## # If the fname actually exists, open its meta data + files # if the fname does not exist but the phantom does, return the phantom # otherwise, throw an exception about missing the tagfile if os.path.exists(fname): if os.path.exists(fname + '.metadata'): metadata = configFromMap( {'metadata': json.loads(open(fname + '.metadata').read())}, phantom, lazy=True) else: metadata = configFromMap({}, phantom) return configFromMap( {'files': [f.strip() for f in open(fname) if f.strip()]}, metadata, lazy=True) elif not os.path.exists(fname) and os.path.exists(fname + '.phantom'): if os.path.exists(fname + '.metadata'): metadata = configFromMap( {'metadata': json.loads(open(fname + '.metadata').read())}, phantom, lazy=True) return metadata else: return phantom else: raise MissingTagFileError(fname)
def pipelineSSToDict(pss): return dict(name=pss.name, taskName=pss.taskName, pid=pss.pid, ptype=pss.ptypeStr(), config=config.configToDict(pss.config), complete=pss.complete, total=pss.total, state=pss.state)
def _extractInnerPipelineConfig(batchConfig): batchDict = config.configToDict(batchConfig) innerPipelineConfigDict = dict([(k.split('.', 1)[1], v) for k, v in batchDict.iteritems() if k.startswith('batch_pipeline.')]) for k in ['pipeline.PIPELINE_WRAPPER_NAME', 'pipeline.PIPELINE_NAME']: innerPipelineConfigDict[k] = batchDict[k] return innerPipelineConfigDict
def pipelineToDict(p): return {'pipeline_id': p.pipelineId, 'pipeline_name': p.pipelineName, 'user_name': p.userName, 'protocol': p.protocol, 'checksum': p.checksum, 'task_name': p.taskName, 'queue': p.queue, 'children': p.children, 'config': config_.configToDict(p.config)}
def saveCredential(credName, description, ctype, cert, pkey, metadata, conf): return credentials.saveCredential('localhost', 'local', credName, description, ctype, cert, pkey, metadata, config.configToDict(conf))
def _documentFromPipeline(p): jsonConf = json.dumps(config_.configToDict(p.config)) return {'pipeline_id': p.pipelineId, 'pipeline_name': p.pipelineName, 'user_name': p.userName, 'protocol': p.protocol, 'checksum': p.checksum, 'task_name': p.taskName, 'queue': p.queue, 'children': p.children, 'config': jsonConf}
def clusterToDocument(self, cluster): return dict(user_name=cluster.userName, cluster_name=cluster.clusterName, state=cluster.state, cred_name=cluster.credName, config=json.dumps(config.configToDict(cluster.config)), start_task=cluster.startTask, add_instance_tasks=cluster.addInstanceTasks, exec_nodes=cluster.execNodes, data_nodes=cluster.dataNodes, master=cluster.master)
def pipelineSSToDict(pss): return dict( name=pss.name, taskName=pss.taskName, pid=pss.pid, ptype=pss.ptypeStr(), config=config.configToDict(pss.config), complete=pss.complete, total=pss.total, state=pss.state, )
def createExecDataFile(conf, master, masterMachineConf): """ Creates a exec data file as the perl start_cluster works This is very similar to createMasterDataFile, should be refactored a bit """ outName = os.path.join('/tmp', str(time.time())) ## # Going to load the master machine.conf and modify node type masterConf = config.configFromStream(open(masterMachineConf), lazy=True) masterConf = config.configFromMap({'NODE_TYPE': EXEC_NODE}, masterConf, lazy=True) fout = open(outName, 'w') fout.write('\n'.join([ k + '=' + str(v) for k, v in config.configToDict(masterConf).iteritems() ])) fout.close() template = open(conf('cluster.exec_user_data_tmpl')).read() clusterPrivateKey = open(conf('cluster.cluster_private_key')).read() outf = [] runSingleProgramEx('ssh-keygen -y -f ' + conf('cluster.cluster_private_key'), outf.append, None, log=True) if conf('general.ctype') == 'ec2': template = template.replace('<TMPL_VAR NAME=MASTER_DNS>', master['private_dns']) else: template = template.replace('<TMPL_VAR NAME=MASTER_DNS>', master['public_dns']) clusterPublicKey = ''.join(outf) template = template.replace('<TMPL_VAR NAME=CLUSTER_PRIVATE_KEY>', clusterPrivateKey) template = template.replace('<TMPL_VAR NAME=CLUSTER_PUBLIC_KEY>', clusterPublicKey) template = template.replace('<TMPL_VAR NAME=MACHINE_CONF>', open(outName).read().replace('${', '\\${')) os.remove(outName) outf = os.path.join(conf('general.secure_tmp'), 'exec_user_data.sh') open(outf, 'w').write(template) return outf
def pipelineToDict(self, pipeline): protocolConf = protocol_format.load(self.machineConf, pipeline.config("pipeline.PIPELINE_TEMPLATE")) inputTagsList = [ pipeline.config(k).split(",") for k, v in protocolConf if v.get("type").split()[0] in ["dataset", "blastdb_dataset", "paired_dataset", "singleton_dataset"] and pipeline.config(k) ] inputTags = [] for i in inputTagsList: inputTags.extend(i) possibleOutputTags = set( [ pipeline.pipelineName + "_" + t.strip() for t in pipeline.config("output.TAGS_TO_DOWNLOAD", default="").split(",") ] ) query = [{"tag_name": t} for t in possibleOutputTags] tags = yield www_tags.loadTagsBy("localhost", "local", pipeline.userName, {"$or": query}, False) tags = set([t["tag_name"] for t in tags]) outputTags = list(tags & possibleOutputTags) pipelineTask = yield tasks_tx.loadTask(pipeline.taskName) pipelineWrapper = pipeline_misc.determineWrapper( self.machineConf, pipeline.config("pipeline.PIPELINE_TEMPLATE") ) pipelineDict = { "pipeline_id": pipeline.pipelineId, "pipeline_name": pipeline.pipelineName, "user_name": pipeline.userName, "wrapper": pipeline.protocol == pipelineWrapper, "protocol": pipeline.config("pipeline.PIPELINE_TEMPLATE"), "checksum": pipeline.checksum, "task_name": pipeline.taskName, "queue": pipeline.queue, "children": pipeline.children, "state": pipelineTask.state, "num_steps": pipelineTask.numTasks, "num_complete": pipelineTask.completedTasks, "input_tags": inputTags, "output_tags": outputTags, "pipeline_desc": pipeline.config("pipeline.PIPELINE_DESC", default=""), "config": config.configToDict(pipeline.config, lazy=True), } defer.returnValue(pipelineDict)
def pipelineToDict(p): return { 'pipeline_id': p.pipelineId, 'pipeline_name': p.pipelineName, 'user_name': p.userName, 'protocol': p.protocol, 'checksum': p.checksum, 'task_name': p.taskName, 'queue': p.queue, 'children': p.children, 'config': config_.configToDict(p.config) }
def credentialToDict(cred): """ The main difference here is the ctype is turned into a string representation of the class/module name """ return dict(name=cred.name, desc=cred.desc, ctype=cred.getCType(), cert=cred.cert, pkey=cred.pkey, active=cred.active, metadata=cred.metadata, conf=config.configToDict(cred.conf))
def _documentFromPipeline(p): jsonConf = json.dumps(config_.configToDict(p.config)) return { 'pipeline_id': p.pipelineId, 'pipeline_name': p.pipelineName, 'user_name': p.userName, 'protocol': p.protocol, 'checksum': p.checksum, 'task_name': p.taskName, 'queue': p.queue, 'children': p.children, 'config': jsonConf }
def runPipeline(host, cluster, parentName, bareRun, conf, queue=None, overwrite=False): return performQuery( host, RUN_URL, dict( cluster=cluster, config=config.configToDict(conf), parent_pipeline=parentName, queue=queue, bare_run=bareRun, overwrite=overwrite, ), )
def runPipelineConfig(taskName, name, pipeline, conf, queue=None): """ Takes a config object representing a pipeline options, validates those options in pipeline.OPTIONS and passes the results onto runPipelineWithConfig """ ## # Mocheezmo way to have it load a conf file. This will be removed in the future tmpConfigName = os.path.join("/tmp", str(time.time()) + ".config") options = list(pipeline.OPTIONS) options.append(("conf", "", "--conf", "Conf file (DO NOT SPECIFY, FOR INTERNAL USE)", const("/tmp/machine.conf"))) options.append( ( "CONFIG_FILE", "-c", "--CONFIG_FILE", "Config file for the pipeline. Specify this if you do not want to specify options on the comamnd line", const(tmpConfigName), ) ) ## # Load up machine.conf and apply it to our current config conf = config.configFromConfig( conf, config.configFromStream(open("/tmp/machine.conf"), config.configFromEnv()), lazy=True ) vals = {} for o in options: vals[o[0]] = cli.applyOption(conf(o[0], default=None), o, conf) conf = config.configFromMap(vals, conf) ## # For some ergatis trickery we then need to output this config to a temp file so ergatis can pull variables from it confDict = config.configToDict(conf) confVals = {} cv = [(".".join(k.split(".")[:-1]), k.split(".")[-1], v) for k, v in confDict.iteritems()] for s, k, v in cv: confVals.setdefault(s, {})[k] = v fout = open(tmpConfigName, "w") for s, d in confVals.iteritems(): if s not in ["", "env"]: fout.write("[" + s + "]\n") for k, v in d.iteritems(): fout.write("%s=%s\n" % (k, str(v))) fout.close() return runPipelineWithConfig(taskName, name, pipeline, conf, queue)
def pipelineToDict(self, pipeline): protocolConf = protocol_format.load(self.machineConf, pipeline.config('pipeline.PIPELINE_TEMPLATE')) inputTagsList = [pipeline.config(k).split(',') for k, v in protocolConf if v.get('type').split()[0] in ['dataset', 'blastdb_dataset', 'paired_dataset', 'singleton_dataset'] and pipeline.config(k)] inputTags = [] for i in inputTagsList: inputTags.extend(i) possibleOutputTags = set([pipeline.pipelineName + '_' + t.strip() for t in pipeline.config('output.TAGS_TO_DOWNLOAD', default='').split(',')]) query = [{'tag_name': t} for t in possibleOutputTags] tags = yield www_tags.loadTagsBy('localhost', 'local', pipeline.userName, {'$or': query}, False) tags = set([t['tag_name'] for t in tags]) outputTags = list(tags & possibleOutputTags) pipelineTask = yield tasks_tx.loadTask(pipeline.taskName) pipelineWrapper = pipeline_misc.determineWrapper(self.machineConf, pipeline.config('pipeline.PIPELINE_TEMPLATE')) pipelineDict = {'pipeline_id': pipeline.pipelineId, 'pipeline_name': pipeline.pipelineName, 'user_name': pipeline.userName, 'wrapper': pipeline.protocol == pipelineWrapper, 'protocol': pipeline.config('pipeline.PIPELINE_TEMPLATE'), 'checksum': pipeline.checksum, 'task_name': pipeline.taskName, 'queue': pipeline.queue, 'children': pipeline.children, 'state': pipelineTask.state, 'num_steps': pipelineTask.numTasks, 'num_complete': pipelineTask.completedTasks, 'input_tags': inputTags, 'output_tags': outputTags, 'pipeline_desc': pipeline.config('pipeline.PIPELINE_DESC', default=''), 'config': config.configToDict(pipeline.config, lazy=True), } defer.returnValue(pipelineDict)
def runPipelineConfig(taskName, name, pipeline, conf, queue=None): """ Takes a config object representing a pipeline options, validates those options in pipeline.OPTIONS and passes the results onto runPipelineWithConfig """ ## # Mocheezmo way to have it load a conf file. This will be removed in the future tmpConfigName = os.path.join('/tmp', str(time.time()) + '.config') options = list(pipeline.OPTIONS) options.append( ('conf', '', '--conf', 'Conf file (DO NOT SPECIFY, FOR INTERNAL USE)', const('/tmp/machine.conf'))) options.append(( 'CONFIG_FILE', '-c', '--CONFIG_FILE', 'Config file for the pipeline. Specify this if you do not want to specify options on the comamnd line', const(tmpConfigName))) ## # Load up machine.conf and apply it to our current config conf = config.configFromConfig(conf, config.configFromStream( open('/tmp/machine.conf'), config.configFromEnv()), lazy=True) vals = {} for o in options: vals[o[0]] = cli.applyOption(conf(o[0], default=None), o, conf) conf = config.configFromMap(vals, conf) ## # For some ergatis trickery we then need to output this config to a temp file so ergatis can pull variables from it confDict = config.configToDict(conf) confVals = {} cv = [('.'.join(k.split('.')[:-1]), k.split('.')[-1], v) for k, v in confDict.iteritems()] for s, k, v in cv: confVals.setdefault(s, {})[k] = v fout = open(tmpConfigName, 'w') for s, d in confVals.iteritems(): if s not in ['', 'env']: fout.write('[' + s + ']\n') for k, v in d.iteritems(): fout.write('%s=%s\n' % (k, str(v))) fout.close() return runPipelineWithConfig(taskName, name, pipeline, conf, queue)
def runPipeline(host, cluster, parentName, bareRun, conf, queue=None, overwrite=False): return performQuery( host, RUN_URL, dict(cluster=cluster, config=config.configToDict(conf), parent_pipeline=parentName, queue=queue, bare_run=bareRun, overwrite=overwrite))
def createExecDataFile(conf, master, masterMachineConf): """ Creates a exec data file as the perl start_cluster works This is very similar to createMasterDataFile, should be refactored a bit """ outName = os.path.join('/tmp', str(time.time())) ## # Going to load the master machine.conf and modify node type masterConf = config.configFromStream(open(masterMachineConf), lazy=True) masterConf = config.configFromMap({'NODE_TYPE': EXEC_NODE}, masterConf, lazy=True) fout = open(outName, 'w') fout.write('\n'.join([k + '=' + str(v) for k, v in config.configToDict(masterConf).iteritems()])) fout.close() template = open(conf('cluster.exec_user_data_tmpl')).read() clusterPrivateKey = open(conf('cluster.cluster_private_key')).read() outf = [] runSingleProgramEx('ssh-keygen -y -f ' + conf('cluster.cluster_private_key'), outf.append, None, log=True) if conf('general.ctype') == 'ec2': template = template.replace('<TMPL_VAR NAME=MASTER_DNS>', master['private_dns']) else: template = template.replace('<TMPL_VAR NAME=MASTER_DNS>', master['public_dns']) clusterPublicKey = ''.join(outf) template = template.replace('<TMPL_VAR NAME=CLUSTER_PRIVATE_KEY>', clusterPrivateKey) template = template.replace('<TMPL_VAR NAME=CLUSTER_PUBLIC_KEY>', clusterPublicKey) template = template.replace('<TMPL_VAR NAME=MACHINE_CONF>', open(outName).read().replace('${', '\\${')) os.remove(outName) outf = os.path.join(conf('general.secure_tmp'), 'exec_user_data.sh') open(outf, 'w').write(template) return outf
def loadTagFile(fname): """ Loads a tagfile, returns a config object of attributes Also considering a .phantom type which would represent files that don't really exist. I think this makes sense as you should be able to tarnsfer .phantom files around but .metadata's should be generated when you make a tag Will explain more abou this in a wiki page somewhere... """ ## # Phantom filse are in a format that configFromStream can read. This is because phantom files # are expected to be written and modified by humans. .metadata files on the other hand # are just expected to be the produce of a machine storing information so uses json if os.path.exists(fname + '.phantom'): ## # Put everythin under phantom # We want to do it lazily too since we will be adding # data it can access later phantom = configFromMap({'phantom_tag': True, 'phantom': configToDict(configFromStream(open(fname + '.phantom'), lazy=True))}, lazy=True) else: phantom = configFromMap({}) ## # If the fname actually exists, open its meta data + files # if the fname does not exist but the phantom does, return the phantom # otherwise, throw an exception about missing the tagfile if os.path.exists(fname): if os.path.exists(fname + '.metadata'): metadata = configFromMap({'metadata': json.loads(open(fname + '.metadata').read())}, phantom, lazy=True) else: metadata = configFromMap({}, phantom) return configFromMap({'files': [f.strip() for f in open(fname) if f.strip()]}, metadata, lazy=True) elif not os.path.exists(fname) and os.path.exists(fname + '.phantom'): if os.path.exists(fname + '.metadata'): metadata = configFromMap({'metadata': json.loads(open(fname + '.metadata').read())}, phantom, lazy=True) return metadata else : return phantom else: raise MissingTagFileError(fname)
def _tagToDictAndCache(self, aspect, tag): if tag.taskName: try: t = yield tasks_tx.loadTask(tag.taskName) state = t.state except: state = None else: state = None d = {} d.update({'files': tag.files, 'metadata': tag.metadata}) d.update({'tag_name': tag.tagName, 'file_count': len(tag.files), 'pipelines': [], 'task_name': tag.taskName, 'state': state, 'phantom': config.configToDict(tag.phantom) if tag.phantom else None}) yield self.cache.save(d) self.changed(aspect, d)
def validatePipelineConfig(host, cluster, bareRun, conf): return performQuery( host, VALIDATE_URL, dict(cluster=cluster, bare_run=bareRun, config=config.configToDict(conf)))
def validatePipelineConfig(host, cluster, bareRun, conf): return performQuery(host, VALIDATE_URL, dict(cluster=cluster, bare_run=bareRun, config=config.configToDict(conf)))