def submit_new_job(srv, gromacs_config, cerise_config): """ Create a new job using the provided `srv` and `cerise_config`. The job's input is extracted from the `gromacs_config`. """ print("Creating Cerise-client job") job = create_lie_job(srv, gromacs_config, cerise_config) # Associate a CWL workflow with the job job.set_workflow(cerise_config['cwl_workflow']) print("CWL worflow is: {}".format(cerise_config['cwl_workflow'])) # run the job in the remote print("Running the job in a remote machine using docker: {}".format( cerise_config['docker_image'])) # submit the job and register it job.run() # Collect data srv_data = collect_srv_data(cc.service_to_dict(srv), gromacs_config, cerise_config) return_value(srv_data)
def flatten(self, session=None): # type: (CommonSession) -> bool if self.cached: return_value(True) if session.component_config.static.vendor == self.vendor and session.component_config.static.component == self.component: self._retrieve_local( os.path.join(session.component_schemas_path(), 'resources'), self.schema_path, self.versions) else: yield self._retrieve_wamp(session) success = True for version, schema in self.cached.items(): flattened = yield self._recurse_subschemas(schema, session) self.cached[version] = flattened['schema'] if not flattened['success']: success = False break if not success: self.cached = {} return_value(success)
def get(self, uri, clean_cache=True, **kwargs): """ Retrieve the JSON Schema describing an MDStudio endpoint (request or response) or resource based on a WAMP or MDStudio schema URI. The method returns a Twisted deferred object for which the results can be obtained using `yield`. :param uri: MDStudio endpoint or resource JSON Schema URI to retrieve :type uri: :py:str :param kwargs: additional keyword arguments are passed to the `schema_uri_to_dict` function :type kwargs: :py:dict :return: Schema as Twisted deferred object """ # Parse uri elements to dictionary uri_dict = schema_uri_to_dict(uri, **kwargs) uri = dict_to_schema_uri(uri_dict) # Clean the uri cache if clean_cache: self._schema_cache = {} # Recursively call the MDStudio schema endpoint to obtain schema's yield self._recursive_schema_call(uri_dict) return_value(self._build_schema(self._schema_cache.get(uri, {})))
def run_async_protein_protein_md(self, request, claims): """ Run asynchronous Gromacs MD of a protein-ligand system in solvent """ output = yield self.run_async_gromacs_gromacs(request, claims) return_value(output)
def flatten(self, session=None): # type: (CommonSession) -> bool if self.cached: return_value(True) ldir = self.search_dir(session) if self.schema_subdir: ldir = os.path.join(ldir, self.schema_subdir) try: self._retrieve_local(ldir, self.schema_path, self.versions) except FileNotFoundError as ex: raise RegisterException( 'Tried to access schema "{}/{}" with versions {}, ' 'but the schema was not found:\n{}'.format( ldir, self.schema_path, self.versions, str(ex))) success = True for version, schema in self.cached.items(): flattened = yield self._recurse_subschemas(schema, session) self.cached[version] = flattened['schema'] if not flattened['success']: success = False break if not success: self.cached = {} return_value(success)
def find_many(self, filter, *args, **kwargs): # type: (DocumentType, Optional[ProjectionOperators], Optional[int], Optional[int], SortOperators, Optional[Fields]) -> Cursor @chainable def get_results(filter, paging, meta, self=self, args=args, **kwargs): if paging or isinstance(paging, dict): paging['total'] = yield self.model.count(filter) paging['page'] = meta['page'] paging['lastPage'] = paging['total'] // ( meta['page'] * meta['limit'] - 1) results = yield self.model.find_many(filter, *args, **kwargs['db']).to_list() return_value(results) results, prev_meta, next_meta = yield paginate_cursor( filter, get_results, **kwargs) return_value((results, prev_meta, next_meta))
def wait_till_running(srv, job_name): """wait until the job is running""" job = srv.get_job_by_name(job_name) while job.state.lower() == 'waiting': sleep(2) return_value('running')
def query_simulation_results(request, cerise_db): """ Check the status of a given :param request: Cerise managed remote job settings. :type request: :py:dict :param cerise_db: MongoDB db to store the information related to the Cerise services and jobs. """ results = {} task_id = request['task_id'] try: if 'name' in request: srv_data = request else: # Search for the service srv_data = yield cerise_db.find_one('cerise', {'task_id': task_id})['result'] # Start service if necessary srv = cc.service_from_dict(srv_data) job = srv.get_job_by_name(srv_data['task_id']) status = job.state # Job is still running if any(status.lower() == x for x in ["waiting", "running"]): status = 'running' # Job done elif status.lower() == 'success': output = wait_extract_clean(job, srv, srv_data['workdir'], srv_data['clean_remote']) results = serialize_files(output) status = 'completed' # Shutdown Service if there are no other jobs running yield try_to_close_service(srv_data) # Job fails else: print("Job {} has FAILED!\nCheck output at: {}".format( request['task_id'], srv_data['workdir'])) output = wait_extract_clean(job, srv, srv_data['workdir'], srv_data['clean_remote']) status = 'failed' return_value({ 'status': status, 'task_id': task_id, 'results': results }) except cc.errors.JobNotFound: msg = "Job with configuration:\n{}\nWas not found!".format(request) raise RuntimeError(msg)
def call(self, procedure, request, claims=None, context=None, **kwargs): if context is None: context = self.default_call_context claims = context.get_claims(claims) claims['uri'] = procedure claims['action'] = 'call' request = deepcopy(request) convert_obj_to_json(request) claims['requestHash'] = request_hash(request) signed_claims = yield super(CommonSession, self).call(u'mdstudio.auth.endpoint.sign', claims) if signed_claims is None: claims.pop('requestHash') raise CallException( 'Claims were not signed. You are not authorized for signing: \n{}' .format(json.dumps(claims, indent=2))) def make_original_call(): return Chainable( super(CommonSession, self).call(u'{}'.format(procedure), request, signed_claims=signed_claims, **kwargs)) try: result = yield make_original_call() except ApplicationError: result = APIResult(error='Call to {uri} failed'.format( uri=procedure)) if 'expired' in result: signed_claims = yield super(CommonSession, self).call( u'mdstudio.auth.endpoint.sign', claims) try: result = yield make_original_call() except ApplicationError: result = APIResult(error='Call to {uri} failed'.format( uri=procedure)) if 'expired' in result: raise CallException(result['expired']) if 'error' in result: raise CallException(result['error']) if 'warning' in result: self.log.warn(result['warning']) return_value(result.get('data', None))
def _refresh(self): more = yield self.wrapper.more(cursor_id=self._id) self._id = more.get('cursorId', None) last_entry = self._data.popleft() if self._fields: self._fields.convert_call(last_entry) self._data = deque(more['results']) self._alive = self._id is not None and more['alive'] and len( self._data) > 0 self._refreshing = False return_value(last_entry)
def _register_scopes(self): return_value(True) if self.function_scopes: res = yield self.call( 'mdstudio.auth.endpoint.oauth.registerscopes.{}'.format( self.component_info.get('namespace')), {'scopes': self.function_scopes}) self.log.info('Registered {count} scopes for {package}', count=len(self.function_scopes), package=self.component_info['package_name'])
def run_async_gromacs_gromacs(self, request, claims): """ async version of the `run_gromacs_gromacs` function. """ cerise_config, gromacs_config = self.setup_environment(request) cerise_config['clean_remote'] = request.get('clean_remote_workdir', True) output = yield call_async_cerise_gromit(gromacs_config, cerise_config, self.db) return_value(output)
def run_gromacs_ti(self, request, claims): """ """ task_id = self.component_config.session.session_id request.update({"task_id": task_id}) self.log.info("starting lie_ti task_id:{}".format(task_id)) output = {'answer': 42} status = 'failed' if output is None else 'completed' return_value({'status': status, 'output': output})
def test_kwargs(self): class Test: @make_deferred def add(self, a, b): return a + b test = Test() test_add = test.add(**{'a': 12, 'b': 3}) self.assertEqual((yield test_add), 15) return_value({})
def run_ligand_solvent_md(self, request, claims): """ Run Gromacs MD of ligand in solvent TODO: Still requires the protein topology and positional restraint (include) files. Makes no sense for ligand in solvent but required by gromit somehow. """ # Protein structure not needed. Explicitly set to None request['protein_file'] = None output = yield self.run_gromacs_gromacs(request, claims) return_value(output)
def test_no_args(self): class Test: @make_deferred def test(self): return 3 test = Test() test_test = test.test() self.assertIsInstance(test_test, Chainable) self.assertEqual(3, (yield test_test)) return_value({})
def test_args(self): class Test: @make_deferred def add(self, a, b): return a + b test = Test() test_add = test.add(6, 9) self.assertIsInstance(test_add, Chainable) self.assertEqual((yield test_add), 15) return_value({})
def call_wrapped(self, request, claims): meta = None id = None if 'next' in request: id = request['next'] elif 'previous' in request: id = request['previous'] if id: meta = json.loads( self.instance.session.cache.extract('cursor#{}'.format(id))) if meta.get('uuid') != id: return_value( APIResult( error= 'You tried to get a cursor that either doesn\'t exist, or is expired. Please check your code.' )) if not meta: meta = None paging = {'uri': self.uri} if 'paging' in request and 'limit' in request['paging']: paging['limit'] = request['paging']['limit'] result, prev, nxt = yield self.wrapped( self.instance, request, claims['claims'], **{ 'paging': paging, 'meta': meta }) if prev: prev_uuid = uuid.uuid4() prev['uuid'] = prev_uuid paging['previous'] = prev_uuid self.instance.session.cache.put('cursor#{}'.format(prev_uuid), timedelta(minutes=10), json.dumps(prev)) if next: next_uuid = uuid.uuid4() nxt['uuid'] = next_uuid paging['next'] = next_uuid self.instance.session.cache.put('cursor#{}'.format(next_uuid), timedelta(minutes=10), json.dumps(nxt)) if not ('paging' in request or 'addPageInfo' in request['paging'] or request['paging']['addPageInfo']): paging = {'uri': paging['uri']} return_value({'results': result, 'paging': paging})
def run_async_ligand_solvent_md(self, request, claims): """ Run Gromacs MD of ligand in solvent. Invoke a ligand solvent simulation and returns immediate, returning to the caller information for querying the results. TODO: Still requires the protein topology and positional restraint (include) files. Makes no sense for ligand in solvent but required by gromit somehow. """ # Protein structure not needed. Explicitly set to None request['protein_file'] = None output = yield self.run_async_gromacs_gromacs(request, claims) return_value(output)
def query_gromacs_results(self, request, claims): """ Check the status of the simulation and return the results if available. The request should at least contain a task_id stored in the cerise job DB. The response is a typical async_gromacs response, a 'Future' object. """ output = yield query_simulation_results(request, self.db) for key, value in request.items(): if key not in output: output[key] = value return_value(output)
def _handler(*args, **kwargs): signed_claims = kwargs.pop('signed_claims', None) assert signed_claims, "Subscribe was called without claims" claims = yield super(CommonSession, self).call('mdstudio.auth.endpoint.verify', signed_claims) if not ('error' in claims or 'expired' in claims): claims = claims['claims'] if not self.authorize_request(topic, claims): self.log.warn("Unauthorized publish to {topic}", topic=topic) else: return_value((yield handler(*args, claims=claims, **kwargs)))
def get_results(filter, paging, meta, self=self, args=args, **kwargs): if paging or isinstance(paging, dict): paging['total'] = yield self.model.count(filter) paging['page'] = meta['page'] paging['lastPage'] = paging['total'] // ( meta['page'] * meta['limit'] - 1) results = yield self.model.find_many(filter, *args, **kwargs['db']).to_list() return_value(results)
def publish(self, topic, claims=None, context=None, options=None): if context is None: context = self.default_call_context claims = context.get_claims(claims) signed_claims = yield super(CommonSession, self).call(u'mdstudio.auth.endpoint.sign', claims) options = options or PublishOptions(acknowledge=True, exclude_me=False) result = yield super( CommonSession, self).publish(topic, signed_claims=signed_claims, options=options) # type: Publication return_value(result)
def find_one_and_delete(self, filter, projection=None, sort=None, fields=None): # type: (DocumentType, Optional[ProjectionOperators], SortOperators, Optional[Fields]) -> Union[Optional[dict], Chainable] fields = self.fields(fields) result = self.wrapper.find_one_and_delete(self.collection, filter=filter, projection=projection, sort=sort, fields=fields) result = yield self.wrapper.extract(result, 'result') if fields: fields.convert_call(result) return_value(result)
def call_cerise_gromit(gromacs_config, cerise_config, cerise_db): """ Use cerise to run gromacs in a remote cluster, see: http://cerise-client.readthedocs.io/en/latest/ :param gromacs_config: gromacs simulation parameters :type gromacs_config: :py:dict :param cerise_config: cerise-client process settings. :type cerise_config: :py:dict :param cerise_db: MongoDB db to store the information related to the Cerise services and jobs. :returns: MD output file paths :rtype: :py:dict """ srv_data = None try: # Run Jobs srv = create_service(cerise_config) srv_data = yield submit_new_job(srv, gromacs_config, cerise_config) # Register Job srv_data['clean_remote'] = cerise_config['clean_remote'] register_srv_job(srv_data, cerise_db) # extract results output = yield query_simulation_results(srv_data, cerise_db) # Update job state in DB update_srv_info_at_db(srv_data, cerise_db) except Exception as e: print("simulation failed due to: {0}".format(e)) output = {'status': 'failed', 'task_id': cerise_config['task_id']} finally: # Shutdown Service if there are no other jobs running if srv_data is not None: yield try_to_close_service(srv_data) while not output.get('status', 'failed') in ('completed', 'failed'): output = yield query_simulation_results(srv_data, cerise_db) sleep(30) return_value(output)
def _refresh(self, direction): if direction == self.Direction.Forward: more = yield self._session.call(self._uri, {'next': self._next}) last_entry = self._data[self._current] self._data = more['results'] self._current = 0 else: more = yield self._session.call(self._uri, {'previous': self._previous}) last_entry = self._data[self._current - 1] self._data = more['results'] + self._data[:self._current] self._current += len(more['results']) self._next = more['paging'].get('next', None) self._previous = more['paging'].get('previous', None) self._alive = (self._next is not None or self._previous is not None) self._refreshing = False return_value(last_entry)
def run_gromacs_gromacs(self, request, claims): """ First it calls gromit to compute the Ligand-solute energies, then calls gromit to calculate the protein-ligand energies. The Cerise-client infrastructure is used to perform the computations in a remote server, see: http://cerise-client.readthedocs.io/en/master/index.html This function expects the following keywords files to call gromit: * cerise_file * protein_file (optional) * protein_top * ligand_file * topology_file * residues The cerise_file is the path to the file containing the configuration information required to start a Cerise service. Further include files (e.g. *itp files) can be included as a list: include=[atom_types.itp, another_itp.itp] To perform the energy decomposition a list of the numerical residues identifiers is expected, for example: residues=[1, 5, 7, 8] Note: the protein_file arguments is optional if you do not provide it the method will perform a SOLVENT LIGAND MD if you provide the `protein_file` it will perform a PROTEIN-LIGAND MD. """ cerise_config, gromacs_config = self.setup_environment(request) cerise_config['clean_remote'] = request.get('clean_remote_workdir', True) # Run the MD and retrieve the energies output = yield call_cerise_gromit(gromacs_config, cerise_config, self.db) return_value(output)
def call_async_cerise_gromit(gromacs_config, cerise_config, cerise_db): """ Use cerise to run gromacs in a remote cluster, see: http://cerise-client.readthedocs.io/en/latest/ It returns inmediately and provided the user the information to query for the results. :param gromacs_config: gromacs simulation parameters :type gromacs_config: :py:dict :param cerise_config: cerise-client process settings. :type cerise_config: :py:dict :param cerise_db: MongoDB db to store the information related to the Cerise services and jobs. :returns: MD output file paths :rtype: :py:dict """ srv_data = None try: # Run Jobs srv = create_service(cerise_config) srv_data = yield submit_new_job(srv, gromacs_config, cerise_config) srv_data['status'] = yield wait_till_running(srv, srv_data['task_id']) # Register Job srv_data['clean_remote'] = cerise_config['clean_remote'] register_srv_job(srv_data, cerise_db) except Exception as e: print("simulation failed due to: {0}".format(e)) return_value({'status': 'failed', 'task_id': cerise_config['task_id']}) output = { 'status': 'running', 'task_id': srv_data['task_id'], 'query_url': 'mdgroup.mdstudio_gromacs.endpoint.query_gromacs_results' } return_value(output)
def _recurse_subschemas(self, schema, session): success = True if isinstance(schema, dict): ref = schema.pop('$ref', None) if ref: ref_decomposition = re.match(r'(\w+)://(.+)', ref) if not ref_decomposition: raise RegisterException( '$ref value in the schema must hold a valid resource uri. This may be given as ' 'resource://<uri>, endpoint://<uri>, or https://<url>, you specified "{}"' .format(ref)) subschema = self._schema_factory(ref_decomposition.group(1), ref_decomposition.group(2)) if (yield subschema.flatten(session)): schema.update(subschema.to_schema()) else: success = False if success: for k, v in schema.items(): recursed = yield self._recurse_subschemas(v, session) if not recursed['success']: success = False break schema[k] = recursed['schema'] elif isinstance(schema, list): for v in schema: success = success and (yield self._recurse_subschemas( v, session))['success'] return_value({'schema': schema, 'success': success})
def find_one_and_replace(self, filter, replacement, upsert=False, projection=None, sort=None, return_updated=False, fields=None): # type: (DocumentType, DocumentType, bool, Optional[ProjectionOperators], SortOperators, bool, Optional[Fields]) -> Union[Optional[dict], Chainable] fields = self.fields(fields) result = self.wrapper.find_one_and_replace( self.collection, filter=filter, replacement=replacement, upsert=upsert, projection=projection, sort=sort, return_updated=return_updated, fields=fields) result = yield self.wrapper.extract(result, 'result') if fields: fields.convert_call(result) return_value(result)