def tagData(state, tagName, taskName, files, metadata, action, recursive, expand, compressDir, filterF=None, deleteOnExpand=False): if not os.path.exists(state.conf('tags.tags_directory')): yield commands.runProcess(['mkdir', '-p', state.conf('tags.tags_directory')]) files = yield _generateFileList(files, recursive, expand, deleteOnExpand) if action == ACTION_APPEND: try: tag = yield state.tagPersist.loadTag(tagName) metadata = func.updateDict(tag.metadata, metadata) oldFiles = set(tag.files) except persist.TagNotFoundError: oldFiles = set() else: oldFiles = set() if 'tag_base_dir' not in metadata: metadata['tag_base_dir'] = '/' files = [f for f in files if f not in oldFiles and (not filterF or filterF and filterF(f))] files += oldFiles # Remove any dups files = list(set(files)) tag = persist.Tag(tagName=tagName, files=files, metadata=metadata, phantom=None, taskName=taskName) if compressDir: compressedFile = yield _compressFiles(tag, compressDir) tag.metadata = func.updateDict(tag.metadata, {'compressed': True, 'compressed_file': compressedFile}) else: tag.metadata = func.updateDict(tag.metadata, {'compressed': False}) yield state.tagPersist.saveTag(tag) # The tag we saved at phantom set to None, but this could be a # phantom tag, in which case we are going to reload it from disk # then cache that in order to load any phantom information tag = yield state.tagPersist.loadTag(tag.tagName) defer.returnValue(tag)
def _handleTaskTagData(request): yield tasks_tx.updateTask( request.body['task_name'], lambda t: t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) if 'urls' in request.body and request.body['urls']: metadata = func.updateDict(request.body['metadata'], {'urls': request.body['urls']}) else: metadata = request.body['metadata'] yield tagData(request.state, request.body['tag_name'], request.body['task_name'], request.body.get('files', []), metadata, request.body['action'], request.body.get('recursive', False), request.body.get('expand', False), request.body.get('compress_dir', None), filterF=_restrictDirs) yield tasks_tx.updateTask(request.body['task_name'], lambda t: t.progress()) defer.returnValue(request)
def instantiateCredential(conf, cred): """ Takes a credential and instanitates it. It returns a Record that has all of the information users of that instantiated credential will need """ if not conf('config_loaded', default=False): conf = config.configFromConfig( conf, base=config.configFromStream( open(conf('conf_file', default=DEFAULT_CONFIG_FILE)), base=conf)) certFile = os.path.join( conf('general.secure_tmp'), cred.name + '_cert.pem') keyFile = os.path.join(conf('general.secure_tmp'), cred.name + '_key.pem') if not os.path.exists(certFile) or open(certFile).read() != cred.cert: open(certFile, 'w').write(cred.cert) if not os.path.exists(keyFile) or open(keyFile).read() != cred.pkey: open(keyFile, 'w').write(cred.pkey) newCred = functional.Record( name=cred.name, conf=conf, cert=certFile, pkey=keyFile, ec2URL=None, env={}) if 'ec2_url' in cred.metadata: newCred = newCred.update( env=functional.updateDict(newCred.env, dict(EC2_URL=cred.metadata['ec2_url']))) yield _createGroups(newCred) yield _createKeypair(newCred) defer.returnValue(newCred)
def loadAllPipelinesBy(self, criteria, userName): """ Loads all pipelines that match the the provided criteria and returns a list of them. """ return self.loadAllPipelinesByAdmin( func.updateDict(criteria, {'user_name': userName}))
def _handleTaskTagData(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) if 'urls' in request.body and request.body['urls']: metadata = func.updateDict(request.body['metadata'], {'urls': request.body['urls']}) else: metadata = request.body['metadata'] yield tagData(request.state, request.body['tag_name'], request.body['task_name'], request.body.get('files', []), metadata, request.body['action'], request.body.get('recursive', False), request.body.get('expand', False), request.body.get('compress_dir', None), filterF=_restrictDirs) yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.progress()) defer.returnValue(request)
def loadAllPipelinesBy(self, criteria, userName): """ Loads all pipelines that match the the provided criteria and returns a list of them. """ return self.loadAllPipelinesByAdmin(func.updateDict(criteria, {'user_name': userName}))
def _save(): conn = pymongo.Connection() conn.clovr.pipelines.save(func.updateDict( {'_id': pipeline.userName + '_' + pipeline.pipelineName}, _documentFromPipeline(pipeline)), safe=True) return pipeline
def instantiateCredential(conf, cred): """ Takes a credential and instanitates it. It returns a Record that has all of the information users of that instantiated credential will need """ if not conf('config_loaded', default=False): conf = config.configFromMap({'config_loaded': True}, base=config.configFromStream(open( conf('general.conf_file', default=DEFAULT_CONFIG_FILE)), base=conf)) certFile = os.path.join(conf('general.secure_tmp'), cred.name + '_cert.pem') keyFile = os.path.join(conf('general.secure_tmp'), cred.name + '_key.pem') if not os.path.exists(certFile) or open(certFile).read() != cred.cert: open(certFile, 'w').write(cred.cert) if not os.path.exists(keyFile) or open(keyFile).read() != cred.pkey: open(keyFile, 'w').write(cred.pkey) newCred = functional.Record(cert=certFile, pkey=keyFile, ec2URL=None, env={}) if 'ec2_url' in cred.metadata: return (conf, newCred.update(env=functional.updateDict( newCred.env, dict(EC2_URL=cred.metadata['ec2_url'])))) else: return (conf, newCred)
def handleCredentialConfig(request): conf = config.configToDict(request.credential.credInstance.conf) conf = func.updateDict( conf, {'general.ctype': request.credential.credential.getCType()}) queue.returnQueueSuccess(request.mq, request.body['return_queue'], conf) return defer_pipe.ret(request)
def handleCredentialConfig(request): conf = config.configToDict(request.credential.credInstance.conf) conf = func.updateDict(conf, {'general.ctype': request.credential.credential.getCType()}) queue.returnQueueSuccess(request.mq, request.body['return_queue'], conf) return defer_pipe.ret(request)
def __init__(self, tagName, files, metadata, phantom, taskName): metadata = func.updateDict(metadata, {'task_name': taskName}) func.Record.__init__(self, tagName=tagName, files=files, metadata=metadata, phantom=phantom, taskName=taskName)
def initialize(self): self.cache = yield mongo_cache.createCache('tags_lite_cache', lambda d : func.updateDict(d, {'_id': d['tag_name']})) self.tagsCache.addDependent(self) # Force any already-cached values to be cached tags = yield self.tagsCache.cache.query({}) for tagDict in tags: self.workQueue.add(self._removeDetailAndCache, 'load', tagDict)
def dump(cluster): """ Dumps a cluster to MongoDB """ clovr = pymongo.Connection().clovr clusters = clovr.clusters clusters.save(updateDict(cluster, dict(_id=cluster["name"])))
def dump(cluster): """ Dumps a cluster to MongoDB """ clovr = pymongo.Connection().clovr clusters = clovr.clusters clusters.save(updateDict(cluster, dict(_id=cluster['name'])))
def initialize(self): self.cache = yield mongo_cache.createCache( "tags_lite_cache", lambda d: func.updateDict(d, {"_id": d["tag_name"]}) ) self.tagsCache.addDependent(self) # Force any already-cached values to be cached tags = yield self.tagsCache.cache.query({}) for tagDict in tags: self.workQueue.add(self._removeDetailAndCache, "load", tagDict)
def _downloadTag(request): remoteTag = yield www_tags.loadTag('localhost', request.body['src_cluster'], request.body['user_name'], request.body['tag_name']) srcClusters = yield www_clusters.listClusters( 'localhost', {'cluster_name': request.body['src_cluster']}, request.body['user_name']) srcCluster = srcClusters[0] dstClusters = yield www_clusters.listClusters( 'localhost', {'cluster_name': request.body['dst_cluster']}, request.body['user_name']) dstCluster = dstClusters[0] dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'], remoteTag['tag_name']) baseDirFiles, nonBaseDirFiles = _partitionFiles( remoteTag['files'], remoteTag['metadata']['tag_base_dir']) if baseDirFiles: yield rsync.rsyncFrom(srcCluster['master']['public_dns'], remoteTag['metadata']['tag_base_dir'], dstTagPath, baseDirFiles, dstCluster['config']['rsync.options'], dstCluster['config']['rsync.user'], log=True) if nonBaseDirFiles: yield rsync.rsyncFrom(srcCluster['master']['public_dns'], '/', dstTagPath, nonBaseDirFiles, dstCluster['config']['rsync.options'], dstCluster['config']['rsync.user'], log=True) remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] + [ os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles ]) defer.returnValue( persist.Tag(tagName=remoteTag['tag_name'], files=remoteFiles, metadata=func.updateDict(remoteTag['metadata'], {'tag_base_dir': dstTagPath}), phantom=remoteTag['phantom'], taskName=None))
def abort(transactionid, headers=None): """STOMP abort transaction command. Rollback whatever actions in this transaction. transactionid: This is the id that all actions in this transaction. """ return Frame(cmd='ABORT', headers=func.updateDict(noneOrEmptyDict(headers), {'transaction': transactionid})).pack()
def initialize(self): cacheId = lambda d: func.updateDict(d, {"_id": d["user_name"] + "_" + d["pipeline_name"]}) self.cache = yield mongo_cache.createCache("pipelines_cache", cacheId) self.persistManager.addDependent(self) self.tagNotify.addDependent(self) pipelines = yield defer_utils.tryUntil( 10, lambda: self.persistManager.loadAllPipelinesByAdmin({}), onFailure=defer_utils.sleep(2) ) for pipeline in pipelines: self.workQueue.add(self._pipelineToDictAndCache, "load", pipeline)
def unsubscribe(dest, headers=None): """STOMP unsubscribe command. dest: This is the channel we wish to subscribe to Tell the server we no longer wish to receive any further messages for the given subscription. """ headers = func.updateDict(noneOrEmptyDict(headers), {'destination': dest}) return Frame(cmd='UNSUBSCRIBE', headers=headers).pack()
def ack(messageid, headers=None): """STOMP acknowledge command. Acknowledge receipt of a specific message from the server. messageid: This is the id of the message we are acknowledging, what else could it be? ;) """ headers = func.updateDict(noneOrEmptyDict(headers), {'message-id': messageid}) return Frame(cmd='ACK', headers=headers).pack()
def abort(transactionid, headers=None): """STOMP abort transaction command. Rollback whatever actions in this transaction. transactionid: This is the id that all actions in this transaction. """ return Frame( cmd='ABORT', headers=func.updateDict(noneOrEmptyDict(headers), {'transaction': transactionid})).pack()
def _downloadTag(request): remoteTag = yield www_tags.loadTag('localhost', request.body['src_cluster'], request.body['user_name'], request.body['tag_name']) srcClusters = yield www_clusters.listClusters('localhost', {'cluster_name': request.body['src_cluster']}, request.body['user_name']) srcCluster = srcClusters[0] dstClusters = yield www_clusters.listClusters('localhost', {'cluster_name': request.body['dst_cluster']}, request.body['user_name']) dstCluster = dstClusters[0] dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'], remoteTag['tag_name']) baseDirFiles, nonBaseDirFiles = _partitionFiles(remoteTag['files'], remoteTag['metadata']['tag_base_dir']) if baseDirFiles: yield rsync.rsyncFrom(srcCluster['master']['public_dns'], remoteTag['metadata']['tag_base_dir'], dstTagPath, baseDirFiles, dstCluster['config']['rsync.options'], dstCluster['config']['rsync.user'], log=True) if nonBaseDirFiles: yield rsync.rsyncFrom(srcCluster['master']['public_dns'], '/', dstTagPath, nonBaseDirFiles, dstCluster['config']['rsync.options'], dstCluster['config']['rsync.user'], log=True) remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] + [os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles]) defer.returnValue(persist.Tag(tagName=remoteTag['tag_name'], files=remoteFiles, metadata=func.updateDict(remoteTag['metadata'], {'tag_base_dir': dstTagPath}), phantom=remoteTag['phantom'], taskName=None))
def __call__(self): """ This returns: (onComplete, [(stream1, func1), .. (streamn, funcn)]) Where onComplete is any cleanup that needs to happen once all the streams are consumed and stream1 is a stream and func1 is the function to call upon data coming in for that stream """ if self.log: logPrint(self.cmd) env = self.env if self.addEnv and not self.env: # Copy the current environment because we'll be modifying it env = functional.updateDict(dict(os.environ), self.addEnv) elif self.addEnv: env = functional.updateDict(dict(self.env), self.addEnv) pipe = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=env) self.pipe = pipe return (self.onComplete, [(pipe.stdout, self.stdoutf), (pipe.stderr, self.stderrf)])
def initialize(self): cacheId = lambda d : func.updateDict(d, {'_id': d['user_name'] + '_' + d['pipeline_name']}) self.cache = yield mongo_cache.createCache('pipelines_cache', cacheId) self.persistManager.addDependent(self) self.tagNotify.addDependent(self) pipelines = yield defer_utils.tryUntil(10, lambda : self.persistManager.loadAllPipelinesByAdmin({}), onFailure=defer_utils.sleep(2)) for pipeline in pipelines: self.workQueue.add(self._pipelineToDictAndCache, 'load', pipeline)
def subscribe(dest, ack='auto', headers=None): """STOMP subscribe command. dest: This is the channel we wish to subscribe to ack: 'auto' | 'client' If the ack is set to client, then messages received will have to have an acknowledge as a reply. Otherwise the server will assume delivery failure. """ return Frame(cmd='SUBSCRIBE', headers=func.updateDict(noneOrEmptyDict(headers), {'ack': ack, 'destination': dest})).pack()
def initialize(self): self.cache = yield mongo_cache.createCache("tags_cache", lambda d: func.updateDict(d, {"_id": d["tag_name"]})) self.persistManager.addDependent(self) # If there are a lot of tags we want to parallelize caching them self.workQueue.parallel = 100 # Force all tags to be cached tags = yield self.persistManager.listTags() for tagName in tags: yield self.persistManager.loadTag(tagName) yield defer_work_queue.waitForCompletion(self.workQueue) # Now that we are done, set it back to 1 self.workQueue.parallel = 1
def _(request): if request.body['cluster'] == 'local': defer.returnValue(request) else: clusters = yield clusters_client.listClusters({'cluster_name': request.body['cluster']}, request.body['user_name']) cluster = clusters[0] ret = yield http.performQuery(cluster['master']['public_dns'], url, func.updateDict(request.body, {'cluster': 'local'}), timeout=10, tries=3) defer_pipe.emit(request.update(response=ret))
def performQuery(mq, queryQueue, request): retQueue = queue.randomQueueName('credentials') d = defer.Deferred() def _handleMsg(mq, m): mq.unsubscribe(retQueue) ret = json.loads(m.body) if ret['success']: return d.callback(ret['data']) else: d.errback(queue.RemoteError(ret['data']['name'], ret['data']['msg'], ret['data']['stacktrace'])) mq.subscribe(_handleMsg, retQueue) mq.send(queryQueue, json.dumps(func.updateDict(request, {'return_queue': retQueue}))) return d
def initialize(self): self.cache = yield mongo_cache.createCache('tags_cache', lambda d : func.updateDict(d, {'_id': d['tag_name']})) self.persistManager.addDependent(self) # If there are a lot of tags we want to parallelize caching them self.workQueue.parallel = 100 # Force all tags to be cached tags = yield self.persistManager.listTags() for tagName in tags: yield self.persistManager.loadTag(tagName) yield defer_work_queue.waitForCompletion(self.workQueue) # Now that we are done, set it back to 1 self.workQueue.parallel = 1
def subscribe(dest, ack='auto', headers=None): """STOMP subscribe command. dest: This is the channel we wish to subscribe to ack: 'auto' | 'client' If the ack is set to client, then messages received will have to have an acknowledge as a reply. Otherwise the server will assume delivery failure. """ return Frame(cmd='SUBSCRIBE', headers=func.updateDict(noneOrEmptyDict(headers), { 'ack': ack, 'destination': dest })).pack()
def handleWWWPipelineList(request): """ Input: { cluster: string user_name: string ?criteria: { key/value } ?detail: boolean } Output: pipeline """ pipelinesDict = yield request.state.pipelinesCache.cache.query(func.updateDict({'user_name': request.body['user_name']}, request.body.get('criteria', {}))) if not request.body.get('detail', False): pipelinesDict = map(removeDetail, pipelinesDict) defer.returnValue(request.update(response=pipelinesDict))
def performQuery(mq, queryQueue, request): retQueue = queue.randomQueueName('credentials') d = defer.Deferred() def _handleMsg(mq, m): mq.unsubscribe(retQueue) ret = json.loads(m.body) if ret['success']: return d.callback(ret['data']) else: d.errback( queue.RemoteError(ret['data']['name'], ret['data']['msg'], ret['data']['stacktrace'])) mq.subscribe(_handleMsg, retQueue) mq.send(queryQueue, json.dumps(func.updateDict(request, {'return_queue': retQueue}))) return d
def send(dest, body, headers=None): """STOMP send command. dest: This is the channel we wish to subscribe to msg: This is the message body to be sent. transactionid: This is an optional field and is not needed by default. """ headers = func.updateDict(noneOrEmptyDict(headers), {'content-length': len(body), 'destination': dest}) return Frame(cmd='SEND', headers=headers, body=body).pack()
def _(request): if request.body['cluster'] == 'local': defer.returnValue(request) else: clusters = yield clusters_client.listClusters( {'cluster_name': request.body['cluster']}, request.body['user_name']) cluster = clusters[0] ret = yield http.performQuery(cluster['master']['public_dns'], url, func.updateDict( request.body, {'cluster': 'local'}), timeout=10, tries=3) defer_pipe.emit(request.update(response=ret))
def handleWWWPipelineList(request): """ Input: { cluster: string user_name: string ?criteria: { key/value } ?detail: boolean } Output: pipeline """ pipelinesDict = yield request.state.pipelinesCache.cache.query( func.updateDict({'user_name': request.body['user_name']}, request.body.get('criteria', {}))) if not request.body.get('detail', False): pipelinesDict = map(removeDetail, pipelinesDict) defer.returnValue(request.update(response=pipelinesDict))
def main(options, _args): conf = func.updateDict(dict([c.split('=') for c in options('general.config_options')]), {'cluster.master_type': options('general.master_instance_type'), 'cluster.exec_type': options('general.exec_instance_type'), 'cluster.master_bid_price': options('general.master_bid_price'), 'cluster.exec_bid_price': options('general.exec_bid_price')}) taskName = startCluster(options('general.host'), options('general.cluster'), options('general.num_exec'), 0, options('general.cred'), conf) if options('general.print_task_name'): print taskName else: runTaskStatus(taskName)
def send(dest, body, headers=None): """STOMP send command. dest: This is the channel we wish to subscribe to msg: This is the message body to be sent. transactionid: This is an optional field and is not needed by default. """ headers = func.updateDict(noneOrEmptyDict(headers), { 'content-length': len(body), 'destination': dest }) return Frame(cmd='SEND', headers=headers, body=body).pack()
def instantiateCredential(conf, cred): """ Takes a credential and instanitates it. It returns a Record that has all of the information users of that instantiated credential will need """ if not conf("config_loaded", default=False): conf = config.configFromMap( {"config_loaded": True}, base=config.configFromStream(open(conf("general.conf_file", default=DEFAULT_CONFIG_FILE)), base=conf), ) certFile = os.path.join(conf("general.secure_tmp"), cred.name + "_cert.pem") keyFile = os.path.join(conf("general.secure_tmp"), cred.name + "_key.pem") if not os.path.exists(certFile) or open(certFile).read() != cred.cert: open(certFile, "w").write(cred.cert) if not os.path.exists(keyFile) or open(keyFile).read() != cred.pkey: open(keyFile, "w").write(cred.pkey) newCred = functional.Record(cert=certFile, pkey=keyFile, ec2URL=None, env={}) if "ec2_url" in cred.metadata: return (conf, newCred.update(env=functional.updateDict(newCred.env, dict(EC2_URL=cred.metadata["ec2_url"])))) else: return (conf, newCred)
def handleWWWPipelineDelete(request): """ Input: { cluster: string user_name: string criteria: { key/value } ?dry_run: boolean } Output: List of pipeline dictionaries """ if not request.body['criteria']: raise EmptyCriteriaError() pipelinesDict = yield request.state.pipelinesCache.cache.query(func.updateDict({'user_name': request.body['user_name']}, request.body['criteria'])) if not request.body.get('dry_run', False): for pipeline in pipelinesDict: yield request.state.pipelinePersist.removePipeline(pipeline['user_name'], pipeline['pipeline_name']) defer.returnValue(request.update(response=pipelinesDict))
def _realizePhantom(ctype, baseDir, phantom): phantomConfig = config.configFromMap(func.updateDict(phantom, {'ctype': ctype, 'base_dir': baseDir}), lazy=True) download = str(phantomConfig('cluster.%s.url' % ctype, default=phantomConfig('cluster.%s.command' % ctype, default=phantomConfig('cluster.ALL.command')))) if download.startswith('http://'): #_downloadHttp(ctype, baseDir, download, phantomConfig) pass elif download.startswith('s3://'): ## # We might ened to modify realizePhantom to take a conf that will have our s3 credentails in it #_downloadS3(ctype, baseDir, download, phantomConfig) pass else: ## # It's a command: return _runCommand(ctype, baseDir, download, phantomConfig)
def performQueryNoParse(host, url, var, headers=None, timeout=30, tries=4, debug=False): if tries == 0: raise RetriesFailed() if headers is None: headers = {} d = defer_utils.tryUntil(tries, lambda : getPage(('http://' + host + url).encode('utf_8'), method='POST', postdata=urllib.urlencode({'request': json.dumps(var)}), headers=func.updateDict(headers, {'Content-Type': 'application/x-www-form-urlencoded'}), connectionTimeout=timeout, timeout=timeout), onFailure=defer_utils.sleep(10)) def _error(f): log.err(f) return f d.addErrback(_error) return d
def render_GET(self, request): if 'request' not in request.args or not request.args['request']: return MissingRequestError() req = json.loads(request.args['request'][0]) retQueue = queue.randomQueueName('www-data') newReq = func.updateDict(req, dict(return_queue=retQueue, user_name=req.get('user_name', 'guest'))) d = defer.Deferred() def _timeout(): d.errback(Exception('Waiting for request failed')) delayed = reactor.callLater(TIMEOUT, _timeout) def _handleMsg(mq, m): try: d.callback(m.body) except Exception, err: log.err(err)
def render_GET(self, request): if 'request' not in request.args or not request.args['request']: return MissingRequestError() req = json.loads(request.args['request'][0]) retQueue = queue.randomQueueName('www-data') newReq = func.updateDict( req, dict(return_queue=retQueue, user_name=req.get('user_name', 'guest'))) d = defer.Deferred() def _timeout(): d.errback(Exception('Waiting for request failed')) delayed = reactor.callLater(TIMEOUT, _timeout) def _handleMsg(mq, m): try: d.callback(m.body) except Exception, err: log.err(err)
def handleRealizePhantom(request): yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_RUNNING).update(numTasks=1)) localClusters = yield www_clusters.listClusters('localhost', {'cluster_name': 'local'}, request.body['user_name']) localCluster = localClusters[0] ctype = localCluster['config']['general.ctype'] dstTagPath = os.path.join(localCluster['config']['dirs.upload_dir'], request.body['tag_name']) metadata = func.updateDict(request.body['metadata'], {'tag_base_dir': dstTagPath}) yield commands.runProcess(['mkdir', '-p', dstTagPath]) try: yield _realizePhantom(ctype, dstTagPath, request.body['phantom']) except RunCommandError, err: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.addMessage(tasks_tx.task.MSG_ERROR, str(err))) raise err
def handleWWWPipelineDelete(request): """ Input: { cluster: string user_name: string criteria: { key/value } ?dry_run: boolean } Output: List of pipeline dictionaries """ if not request.body['criteria']: raise EmptyCriteriaError() pipelinesDict = yield request.state.pipelinesCache.cache.query( func.updateDict({'user_name': request.body['user_name']}, request.body['criteria'])) if not request.body.get('dry_run', False): for pipeline in pipelinesDict: yield request.state.pipelinePersist.removePipeline( pipeline['user_name'], pipeline['pipeline_name']) defer.returnValue(request.update(response=pipelinesDict))
def tagData(state, tagName, taskName, files, metadata, action, recursive, expand, compressDir, filterF=None, deleteOnExpand=False): if not os.path.exists(state.conf('tags.tags_directory')): yield commands.runProcess( ['mkdir', '-p', state.conf('tags.tags_directory')]) files = yield _generateFileList(files, recursive, expand, deleteOnExpand) if action == ACTION_APPEND: try: tag = yield state.tagPersist.loadTag(tagName) metadata = func.updateDict(tag.metadata, metadata) oldFiles = set(tag.files) except persist.TagNotFoundError: oldFiles = set() else: oldFiles = set() if 'tag_base_dir' not in metadata: metadata['tag_base_dir'] = '/' files = [ f for f in files if f not in oldFiles and (not filterF or filterF and filterF(f)) ] files += oldFiles # Remove any dups files = list(set(files)) tag = persist.Tag(tagName=tagName, files=files, metadata=metadata, phantom=None, taskName=taskName) if compressDir: compressedFile = yield _compressFiles(tag, compressDir) tag.metadata = func.updateDict(tag.metadata, { 'compressed': True, 'compressed_file': compressedFile }) else: tag.metadata = func.updateDict(tag.metadata, {'compressed': False}) yield state.tagPersist.saveTag(tag) # The tag we saved at phantom set to None, but this could be a # phantom tag, in which case we are going to reload it from disk # then cache that in order to load any phantom information tag = yield state.tagPersist.loadTag(tag.tagName) defer.returnValue(tag)
def dump(pipeline): """ Dumps pipeline info to mongodb """ pipelines = pymongo.Connection().clovr.pipelines pipelines.save(func.updateDict(dict(_id=pipeline['name']), pipeline))
def runProcess(cmdArgs, stdoutf=None, stderrf=None, expected=None, initialText=None, addEnv=None, newEnv=None, workingDir=None, uid=None, gid=None, log=False): """ The only required function is cmdArgs. cmdArgs is a list of strings, cmdArgs[0] must be the executable. stdoutf and stderrf are functions that will be called with the input data. There is no guarantee the input data will be line terminated expected is a list of integers that are valid exit codes for the application initialText is any text to be sent to the program before closing stdin on it addEnv allows one to add keys to the current applications environment newEnv specifies a totally new environment to run the child under. The current applications env is the default value workingDir is what directory to run the child process in uid and gid are numeric user id and group id to run program as This returns a deferred which will be fired on program exit """ cmdArgs = [str(c) for c in cmdArgs] if newEnv is None: newEnv = dict(os.environ) if addEnv: newEnv = func.updateDict(newEnv, addEnv) if expected is None: expected = [0] pp = NonInteractiveProcessProtocol(stdoutf=stdoutf, stderrf=stderrf, expected=expected, initialText=initialText) kwargs = {} if workingDir: kwargs['path'] = workingDir if uid: kwargs['uid'] = uid if gid: kwargs['gid'] = gid if log: logger.msg('Running command: ' + ' '.join(cmdArgs)) reactor.spawnProcess(pp, executable=cmdArgs[0], args=cmdArgs, env=newEnv, **kwargs) def _error(_): raise ProgramRunError(cmdArgs) pp.deferred.addErrback(_error) return pp.deferred
def _updateOrCreateDict(d, updates): if d is None: d = {} return func.updateDict(d, updates)
def _uploadTag(request): localTag = yield request.state.tagPersist.loadTag(request.body['tag_name']) srcClusters = yield www_clusters.listClusters('localhost', {'cluster_name': request.body['src_cluster']}, request.body['user_name']) srcCluster = srcClusters[0] dstClusters = yield www_clusters.listClusters('localhost', {'cluster_name': request.body['dst_cluster']}, request.body['user_name']) dstCluster = dstClusters[0] # We want the trailing '/' so everyone knows it's a directory dstTagPath = os.path.join(dstCluster['config']['dirs.upload_dir'], localTag.tagName) + '/' # Change dstTagPath to use /mnt/nostaging instead of mnt/staging as the destination if the # tag has a nostaging metadata value #if localTag.metadata['nostaging']: # dstTagPath = os.path.join(dstCluster['config']['dirs.nostaging_upload_dir'], localTag.tagName) + '/' if request.body['dst_type'] == 'local' : upload_dir = '/mnt/nostaging/' if 'dirs.nostaging_upload_dir' in dstCluster['config']: upload_dir = dstCluster['config']['dirs.nostaging_upload_dir'] yield _makeDirsOnCluster(dstCluster, [upload_dir]) dstTagPath = os.path.join(upload_dir, localTag.tagName) + '/' baseDirFiles, nonBaseDirFiles = _partitionFiles(localTag.files, localTag.metadata['tag_base_dir']) if baseDirFiles: yield rsync.rsyncTo(dstCluster['master']['public_dns'], localTag.metadata['tag_base_dir'], dstTagPath, baseDirFiles, srcCluster['config']['rsync.options'], srcCluster['config']['rsync.user'], log=True) if nonBaseDirFiles: yield rsync.rsyncTo(dstCluster['master']['public_dns'], '/', dstTagPath, nonBaseDirFiles, srcCluster['config']['rsync.options'], srcCluster['config']['rsync.user'], log=True) remoteFiles = ([os.path.join(dstTagPath, f) for f in baseDirFiles] + [os.path.join(dstTagPath, _makePathRelative(f)) for f in nonBaseDirFiles]) metadata = localTag.metadata if metadata.get('urls', []) and not metadata.get('urls_realized', False): tag = yield _realizeUrls(request) remoteFiles.extend(tag['files']) metadata = func.updateDict(metadata, {'urls_realized': True}) # If we are dealing with an adhoc cluster here there is a chance that # we are attempting to transfer to a local VM's shared folder which does # support chown/chmod so we will skip this step if dstCluster['master']['instance_type'] is not None: yield ssh.runProcessSSH(dstCluster['master']['public_dns'], 'chown -R %s %s' % (dstCluster['config']['vappio.user'], dstTagPath), None, log.err, srcCluster['config']['ssh.user'], srcCluster['config']['ssh.options']) defer.returnValue(persist.Tag(tagName=localTag.tagName, files=remoteFiles, metadata=func.updateDict(metadata, {'tag_base_dir': dstTagPath}), phantom=localTag.phantom, taskName=None))
def removeClustersBy(self, criteria, userName): criteria = func.updateDict(criteria, {'user_name': { '$in': [userName, None] }}) return self.removeClustersByAdmin(criteria)
def tagData(tagsDir, tagName, tagBaseDir, files, recursive, expand, compress, append, overwrite, metadata=None, filterF=None): """ Tag a list of files with the name. The files can contain direcotires, and if recursive is set the contends of the directories will become part of the tag rather than just the name tagBaseDir is the name of the directory that is not part of the actual tag heirarchy expand will cause any archives listed to be expanded and the contents of the archive to be added compress will compress the files that have been put in the tag. compress should be the path to the directory the compressed file should be put. append will add to a tagName if it already exists, only unique names will be kept though filterF - if you want to filter any of the files as they are added to the file list provide a filter function that will be called on each individual file name. The file will be added if filter returns True This returns the tag that was created """ if metadata is None: metadata = {} if not os.path.exists(tagsDir): runSystemEx('mkdir -p ' + tagsDir) outName = os.path.join(tagsDir, tagName) if os.path.exists(outName) and not append and not overwrite: raise Exception('Tag already exists') ## # Keep a set of all old entries in the file, when we walk the generator we'll # we'll check to see if the file already exists in here if append and os.path.exists(outName): oldFiles = set([l.strip() for l in open(outName)]) else: oldFiles = set() files = [ f for f in generateFileList(files, recursive, expand) if f not in oldFiles and (not filterF or filterF and filterF(f)) ] if overwrite: ## # If we are just overwritign the file, no need to old the list of oldFiles # Technically it shouldn't matter but if the old file list is really large # the lookup could be expensive outFile = open(outName, 'w') oldFiles = set() else: outFile = open(outName, 'a') outFile.write('\n'.join(files)) outFile.write('\n') outFile.close() # # If we are compressing the files then, load the tag back up # so we have all of the files there if compress: outTar = str(os.path.join(compress, tagName + '.tar')) outGzip = outTar + '.gz' if os.path.exists(outGzip): os.remove(outGzip) runSystemEx('mkdir -p ' + compress) files = loadTagFile(outName)('files') baseDirFiles, nonBaseDirFiles = partitionFiles(files, tagBaseDir) if baseDirFiles: for fs in func.chunk(20, baseDirFiles): cmd = [ 'tar', '-C', tagBaseDir, '-rf', outTar, ] cmd.extend([removeBase('/', f) for f in fs]) runSystemEx(' '.join(cmd), log=True) if nonBaseDirFiles: for fs in func.chunk(20, nonBaseDirFiles): cmd = [ 'tar', '-C', '/', '-rf', outTar, ] cmd.extend([removeBase('/', f) for f in fs]) runSystemEx(' '.join(cmd), log=True) # # It's possible we have no values here, if so, the tar was not created # and should be ignored if os.path.exists(outTar): runSystemEx('gzip ' + outTar, log=True) metadata = func.updateDict(metadata, { 'compressed': True, 'compressed_file': outGzip }) # # If tagBaseDir is set it means we have some metadata to write if tagBaseDir: metadata['tag_base_dir'] = tagBaseDir if append and os.path.exists(outName + '.metadata'): tmd = json.loads(open(outName + '.metadata').read()) metadata = func.updateDict(tmd, metadata) outFile = open(outName + '.metadata', 'w') outFile.write(json.dumps(metadata, indent=1) + '\n') outFile.close() return loadTagFile(outName)
def _realizeUrls(request): localTag = yield request.state.tagPersist.loadTag(request.body['tag_name']) # If we have urls we create a fake phantom tag fakePhantom = {'cluster.ALL.command': 'reliableDownloader.py -m 300 -t 20 -b ${base_dir} ' + ' '.join(localTag.metadata['urls'])} taskName = yield www_tags.realizePhantom('localhost', request.body['dst_cluster'], request.body['user_name'], localTag.tagName, fakePhantom, func.updateDict(localTag.metadata, {'urls_realized': True})) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) if request.body['dst_cluster'] == 'local': yield tag_mq_data.tagData(request.state, request.body['tag_name'], request.body['task_name'], files=localTag.files, action=tag_mq_data.ACTION_APPEND, metadata={}, recursive=False, expand=False, compressDir=None) else: localTask = yield www_tags.tagData('localhost', request.body['dst_cluster'], request.body['user_name'], action=tag_mq_data.ACTION_APPEND, tagName=localTag.tagName, files=localTag.files, metadata={}, recursive=False, expand=False, compressDir=None) localTask = yield tasks_tx.loadTask(request.body['task_name']) endState, tsk = yield tasks_tx.blockOnTaskAndForward('localhost', request.body['dst_cluster'], taskName, localTask) if endState == tasks_tx.task.TASK_FAILED: yield tasks_tx.updateTask(request.body['task_name'], lambda t : t.setState(tasks_tx.task.TASK_FAILED)) raise RealizePhantomError(request.body['tag_name']) # Load the tag up and return it so we can have the files it created tag = yield www_tags.loadTag('localhost', request.body['dst_cluster'], request.body['user_name'], request.body['tag_name']) defer.returnValue(tag)
def dump(credential): return pymongo.Connection().clovr.credentials.save(updateDict(dict(_id=credential['name']), credential))