def get_probanno(self, ctx, input): # ctx is the context object # return variables are: output #BEGIN get_probanno ''' Convert a probabilistic annotation object into a human-readbable table. @param ctx Current context object @param input Dictionary with input parameters for function @return Dictionary keyed by gene to a list of tuples with roleset and likelihood @raise WrongVersionError when ProbAnno object version number is invalid ''' input = self._checkInputArguments(ctx, input, ['probanno', 'probanno_workspace'], { 'probanno_version': None } ) wsClient = Workspace(self.config["workspace_url"], token=ctx['token']) probAnnoObjectId = make_object_identity(input["probanno_workspace"], input["probanno"], input['probanno_version']) objectList = wsClient.get_objects( [ probAnnoObjectId ] ) probAnnoObject = objectList[0] if probAnnoObject['info'][2] != ProbAnnoType: message = 'ProbAnno object type %s is not %s for object %s' %(probAnnoObject['info'][2], ProbAnnoType, probAnnoObject['info'][1]) ctx.log_err(message) raise WrongVersionError(message) output = probAnnoObject["data"]["roleset_probabilities"] #END get_probanno # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method get_probanno return value ' + 'output is not type dict as required.') # return the results return [output]
def get_rxnprobs(self, ctx, input): # ctx is the context object # return variables are: output #BEGIN get_rxnprobs ''' Convert a reaction probability object into a human-readable table. @param ctx Current context object @param input Dictionary with input parameters for function @return List of reaction_probability tuples @raise WrongVersionError when RxnProbs object version number is invalid ''' # Sanity check on input arguments input = self._checkInputArguments(ctx, input, [ "rxnprobs", "rxnprobs_workspace" ], { 'rxnprobs_version': None, 'sort_field': 'rxnid' } ) wsClient = Workspace(self.config["workspace_url"], token=ctx['token']) rxnProbsObjectId = make_object_identity(input["rxnprobs_workspace"], input["rxnprobs"], input['rxnprobs_version']) objectList = wsClient.get_objects( [ rxnProbsObjectId ] ) rxnProbsObject = objectList[0] if rxnProbsObject['info'][2] != RxnProbsType: message = 'RxnProbs object type %s is not %s for object %s' %(rxnProbsObject['info'][2], RxnProbsType, rxnProbsObject['info'][1]) ctx.log_err(message) raise WrongVersionError(message) output = rxnProbsObject["data"]["reaction_probabilities"] if input['sort_field'] == 'rxnid': output.sort(key=lambda tup: tup[0]) elif input['sort_field'] == 'probability': output.sort(key=lambda tup: tup[1], reverse=True) #END get_rxnprobs # At some point might do deeper type checking... if not isinstance(output, list): raise ValueError('Method get_rxnprobs return value ' + 'output is not type list as required.') # return the results return [output]
def annotate(self, ctx, input): # ctx is the context object # return variables are: jobid #BEGIN annotate ''' Compute probabilistic annotations from the specified genome object. The input dictionary must contain the following keys: genome: Name of genome object genome_workspace: Workspace from which to grab the Genome object probanno: Name of probanno object to output probanno_workspace: Workspace to which to save the ProbAnno object The following keys are optional: verbose: Print lots of messages on the progress of the algorithm @param ctx Current context object @param input Dictionary with input parameters for function @return Job ID of job started to compute annotation likelihoods ''' input = self._checkInputArguments(ctx, input, [ "genome", "genome_workspace", "probanno", "probanno_workspace"], { "verbose" : False } ) # Make sure the static database files are ready. self._checkDatabaseFiles(ctx) # Set log level to INFO when verbose parameter is enabled. if input['verbose']: ctx.set_log_level(log.DEBUG) # Make sure the Genome object is available. wsClient = Workspace(self.config["workspace_url"], token=ctx['token']) genomeIdentity = make_object_identity(input['genome_workspace'], input['genome']) wsClient.get_object_info( [ genomeIdentity ], 0 ) # Create a user and job state client and authenticate as the user. ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=ctx['token']) # Create a job to track running probabilistic annotation. description = 'pa-annotate for genome %s to probanno %s for user %s' %(input['genome'], input['probanno'], ctx['user_id']) progress = { 'ptype': 'task', 'max': 5 } jobid = ujsClient.create_and_start_job(ctx['token'], 'initializing', description, progress, timestamp(3600)) ctx.log_info('Job '+jobid+' started for genome '+input['genome']+' to probanno '+input['probanno']) # Run the job on the local machine. if self.config["job_queue"] == "local": # Create working directory for job and build file names. jobDirectory = make_job_directory(self.config['work_folder_path'], jobid) jobDataFilename = os.path.join(jobDirectory, 'jobdata.json') outputFilename = os.path.join(jobDirectory, 'stdout.log') errorFilename = os.path.join(jobDirectory, 'stderr.log') # Save data required for running the job. jobData = { 'id': jobid, 'input': input, 'context': ctx, 'config': self.config } json.dump(jobData, open(jobDataFilename, "w"), indent=4) # Start worker to run the job. jobScript = os.path.join(os.environ['KB_TOP'], 'bin/pa-runjob') cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDirectory, outputFilename, errorFilename) status = os.system(cmdline) ctx.log_info('Job %s is running on local host, status %d' %(jobid, status)) #END annotate # At some point might do deeper type checking... if not isinstance(jobid, basestring): raise ValueError('Method annotate return value ' + 'jobid is not type basestring as required.') # return the results return [jobid]
def calculate(self, ctx, input): # ctx is the context object # return variables are: output #BEGIN calculate ''' Compute reaction probabilities from a probabilistic annotation. The input dictionary must contain the following keys: probanno: Name of ProbAnno object to input probanno_workspace: Workspace from which to grab the ProbAnno object rxnprobs: Name of RxnProbs object rxnprobs_workspace: Workspace to which to save the RxnProbs object The following keys are optional: verbose: Print lots of messages on the progress of the algorithm template_model: Name of TemplateModel object template_workspace: Workspace from which to grab TemplateModel object @param ctx Current context object @param input Dictionary with input parameters for function @return Object info for RxnProbs object @raise WrongVersionError when ProbAnno object version number is invalid @raise ValueError when template_workspace input argument is not specified ''' # Sanity check on input arguments input = self._checkInputArguments(ctx, input, ["probanno", "probanno_workspace", "rxnprobs", "rxnprobs_workspace"], { "verbose" : False , "template_model" : None, "template_workspace" : None } ) # Make sure the static database files are ready. self._checkDatabaseFiles(ctx) # Set log level to INFO when verbose parameter is enabled. if input['verbose']: ctx.set_log_level(log.DEBUG) # Create a workspace client. wsClient = Workspace(self.config["workspace_url"], token=ctx['token']) # Get the ProbAnno object from the specified workspace. probannoObjectId = make_object_identity(input["probanno_workspace"], input["probanno"]) objectList = wsClient.get_objects( [ probannoObjectId ] ) probannoObject = objectList[0] if probannoObject['info'][2] != ProbAnnoType: message = "ProbAnno object type %s is not %s for object %s" %(probannoObject['info'][2], ProbAnnoType, probannoObject['info'][1]) ctx.log_err(message) raise WrongVersionError(message) genome = probannoObject["data"]["genome"] # Create a temporary directory for storing intermediate files when debug is turned on. if ctx.get_log_level() >= log.DEBUG2: workFolder = tempfile.mkdtemp("", "calculate-%s-" %(genome), self.config["work_folder_path"]) ctx.log_debug('Intermediate files saved in '+workFolder) else: workFolder = None # When a template model is specified, use it to build dictionaries for roles, # complexes, and reactions instead of retrieving from static database files. complexesToRoles = None reactionsToComplexes = None if input["template_model"] is not None or input["template_workspace"] is not None: if not(input["template_model"] is not None and input["template_workspace"] is not None) : message = "Template model workspace is required if template model ID is provided" ctx.log_err(message) raise ValueError(message) # Create a dictionary to map a complex to a list of roles and a dictionary # to map a reaction to a list of complexes. The dictionaries are specific to # the specified template model instead of covering everything in the central # data model. complexesToRoles = dict() reactionsToComplexes = dict() # Get the list of RoleComplexReactions for the template model from the # fba modeling service. The RoleComplexReactions structure has a list # of ComplexReactions structures for the given role. And each ComplexReactions # structure has a list of reactions for the given complex. fbaClient = fbaModelServices(self.config['fbamodeling_url'], token=ctx['token']) roleComplexReactionsList = fbaClient.role_to_reactions( { 'templateModel': input['template_model'], 'workspace': input['template_workspace'] } ) # Build the two dictionaries from the returned list. for rcr in roleComplexReactionsList: for complex in rcr['complexes']: complexId = re.sub(r'cpx0*(\d+)', r'kb|cpx.\1', complex['name']) # Convert ModelSEED format to KBase format if complexId in complexesToRoles: complexesToRoles[complexId].append(rcr['name']) else: complexesToRoles[complexId] = [ rcr['name'] ] for reaction in complex['reactions']: reactionId = reaction['reaction'] if reactionId in reactionsToComplexes: reactionsToComplexes[reactionId].append(complexId) else: reactionsToComplexes[reactionId] = [ complexId ] # Calculate per-gene role probabilities. roleProbs = self._rolesetProbabilitiesToRoleProbabilities(ctx, input, genome, probannoObject["data"]["roleset_probabilities"], workFolder) # Calculate whole cell role probabilities. # Note - eventually workFolder will be replaced with a rolesToReactions call totalRoleProbs = self._totalRoleProbabilities(ctx, input, genome, roleProbs, workFolder) # Calculate complex probabilities. complexProbs = self._complexProbabilities(ctx, input, genome, totalRoleProbs, workFolder, complexesToRequiredRoles = complexesToRoles) # Calculate reaction probabilities. reactionProbs = self._reactionProbabilities(ctx, input, genome, complexProbs, workFolder, rxnsToComplexes = reactionsToComplexes) # If the reaction probabilities were not calculated using the data from the fba modeling service # via the template model, we need to convert from the KBase ID format to the ModelSEED format. if input["template_model"] is None: reactionList = list() for index in range(len(reactionProbs)): reactionList.append(reactionProbs[index][0]) EntityAPI = CDMI_EntityAPI(self.config["cdmi_url"]) numAttempts = 4 while numAttempts > 0: try: numAttempts -= 1 reactionData = EntityAPI.get_entity_Reaction( reactionList, [ "source_id" ] ) if len(reactionList) == len(reactionData): numAttempts = 0 except HTTPError as e: pass for index in range(len(reactionProbs)): rxnId = reactionProbs[index][0] reactionProbs[index][0] = reactionData[rxnId]['source_id'] # Create a reaction probability object objectData = dict() objectData["genome"] = probannoObject["data"]["genome"] objectData['genome_workspace'] = probannoObject['data']['genome_workspace'] if input["template_model"] is None: objectData['template_model'] = 'None' else: objectData["template_model"] = input["template_model"] if input["template_workspace"] is None: objectData['template_workspace'] = 'None' else: objectData["template_workspace"] = input["template_workspace"] objectData["probanno"] = input['probanno'] objectData['probanno_workspace'] = input['probanno_workspace'] objectData["id"] = input["rxnprobs"] objectData["reaction_probabilities"] = reactionProbs objectMetaData = { "num_reaction_probs": len(objectData["reaction_probabilities"]) } objectProvData = dict() objectProvData['time'] = timestamp(0) objectProvData['service'] = os.environ['KB_SERVICE_NAME'] objectProvData['service_ver'] = ServiceVersion objectProvData['method'] = 'calculate' objectProvData['method_params'] = input.items() objectProvData['input_ws_objects'] = [ '%s/%s/%d' %(probannoObject['info'][7], probannoObject['info'][1], probannoObject['info'][4]) ] objectSaveData = dict(); objectSaveData['type'] = RxnProbsType objectSaveData['name'] = input["rxnprobs"] objectSaveData['data'] = objectData objectSaveData['meta'] = objectMetaData objectSaveData['provenance'] = [ objectProvData ] objectInfo = wsClient.save_objects( { 'workspace': input["rxnprobs_workspace"], 'objects': [ objectSaveData ] } ) output = objectInfo[0] #END calculate # At some point might do deeper type checking... if not isinstance(output, list): raise ValueError('Method calculate return value ' + 'output is not type list as required.') # return the results return [output]
def runAnnotate(self, job): ''' Run an annotate job to create a ProbAnno typed object. A ProbAnno typed object is created in four steps: (1) extract amino acid sequences from a Genome typed object to a fasta file, (2) run a BLAST search using the amino acid sequences against the subsystem BLAST database, (3) calculate annotation likelihood scores for each roleset implied by the functions of proteins in subsystems, and (4) save the likelihood scores to a ProbAnno typed object. The Job dictionary contains three main sections: (1) input parameters to the annotate() function, (2) context of server instance running the annotate() function, and (3) config variables of server. @param job Job dictionary created by server's annotate() function @return Nothing (although job is marked as complete) ''' # The input parameters and user context for annotate() were stored in the job data for the job. input = job["input"] if input['verbose']: self.logger.set_log_level(log.DEBUG) self.ctx = job["context"] self.config = job['config'] # Create a DataParser object for working with the static database files. self.dataParser = DataParser(self.config) status = None try: # Make sure the database files are available. self.dataParser.checkIfDatabaseFilesExist() # Make sure the job directory exists. workFolder = make_job_directory(self.config['work_folder_path'], job['id']) # Create a user and job state client and authenticate as the user. ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=self.ctx['token']) # Get the Genome object from the specified workspace. try: ujsClient.update_job_progress(job['id'], self.ctx['token'], 'getting genome object', 1, timestamp(3600)) except: pass wsClient = Workspace(self.config["workspace_url"], token=self.ctx['token']) genomeObjectId = make_object_identity(input["genome_workspace"], input["genome"]) objectList = wsClient.get_objects( [ genomeObjectId ] ) genomeObject = objectList[0] # Convert Genome object to fasta file. try: ujsClient.update_job_progress(job['id'], self.ctx['token'], 'converting Genome object to fasta file', 1, timestamp(3600)) except: pass fastaFile = self._genomeToFasta(input, genomeObject, workFolder) # Run blast using the fasta file. try: ujsClient.update_job_progress(job['id'], self.ctx['token'], 'running blast', 1, timestamp(3600)) except: pass blastResultFile = self._runBlast(input, fastaFile, workFolder) # Calculate roleset probabilities. try: ujsClient.update_job_progress(job['id'], self.ctx['token'], 'calculating roleset probabilities', 1, timestamp(300)) except: pass rolestringTuples = self._rolesetProbabilitiesMarble(input, blastResultFile, workFolder) # Build ProbAnno object and store in the specified workspace. try: ujsClient.update_job_progress(job['id'], self.ctx['token'], 'building ProbAnno object', 1, timestamp(120)) except: pass output = self._buildProbAnnoObject(input, genomeObject, blastResultFile, rolestringTuples, workFolder, wsClient) # Mark the job as done. status = "done" tb = None self._log(log.INFO, 'Job '+job['id']+' finished for genome '+input['genome']+' to probanno '+input['probanno']) except: tb = traceback.format_exc() sys.stderr.write('\n'+tb) status = "failed" self._log(log.ERR, 'Job '+job['id']+' failed for genome '+input['genome']+' to probanno '+input['probanno']) # Mark the job as complete with the given status. ujsClient.complete_job(job['id'], self.ctx['token'], status, tb, { }) # Remove the temporary work directory. if self.logger.get_log_level() < log.DEBUG2 and status == 'done': try: shutil.rmtree(workFolder) except OSError: # For some reason deleting the directory was failing in production. Rather than have all jobs look like they failed # I catch and log the exception here (since the user still gets the same result if the directory remains intact) msg = 'Unable to delete temporary directory %s\n' %(workFolder) sys.stderr.write('WARNING: '+msg) self._log(log.WARNING, msg) return