def get(self, id, id1): """Get workflows This method allows getting the available workflows for a specific pipeline. Parameters ---------- id: str project identifier id1: str pipeline identifier Returns ------- list: list of workflows """ # Agraph pipelineStr = localNSpace + "projects/" + str( id) + "/pipelines/" + str(id1) queryString = "SELECT DISTINCT ?proc3 ?pip2 ?execStep ?workflowURI WHERE {{<" + pipelineStr + "> obo:BFO_0000051 ?proc3. ?pip2 obo:BFO_0000051 ?proc3; obo:NGS_0000076 ?execStep. ?proc3 obo:NGS_0000081 ?procIndex3. ?execStep obo:NGS_0000079 ?workflowURI; obo:NGS_0000081 ?procIndex3.} UNION {<" + pipelineStr + "> obo:BFO_0000051 ?proc1. ?proc1 obo:RO_0002233 ?inputs1. ?proc2 obo:RO_0002234 ?inputs1; obo:NGS_0000081 ?procIndex2. ?pip2 obo:BFO_0000051 ?proc2; obo:BFO_0000051 ?proc3. ?proc3 obo:NGS_0000081 ?procIndex3. ?pip2 obo:NGS_0000076 ?execStep. ?execStep obo:NGS_0000079 ?workflowURI; obo:NGS_0000081 ?stepIndex. FILTER (?procIndex3 <= ?procIndex2 && ?stepIndex = ?procIndex3). }} ORDER BY ?pip2 ASC(?procIndex3)" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["execStep", "workflowURI"]) result.close() return jsonResult, 200
def get(self): """Get protocols in workflow This method allows getting all the protocols that are connected to a given workflow. Requires the workflow identifier Returns ------- list: list with the associated protocols """ args = workflow_get_parser.parse_args() # Agraph workflowsid = args.workflow_id workflowURI = localNSpace + "workflows/" + str(workflowsid) queryString = "SELECT ?protocol ?index WHERE { <" + workflowURI + """> obo:NGS_0000078 ?step. ?step obo:NGS_0000077 ?protocol; obo:NGS_0000081 ?index.}""" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["index", "protocol"]) result.close() return jsonResult, 200
def get(self): """Get properties fields This method returns the fields of the properties to be able to now which fields are required to build a protocol. Returns ------- list: list of property fields. """ args = project_get_parser.parse_args() if args.uri: queryString = "SELECT ?plabel WHERE {" + args.uri + """ ?s ?allprop. ?allprop owl:onProperty ?property. ?property rdfs:label ?plabel.}""" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["plabel"]) result.close() return jsonResult, 200 else: return 404
def get(self): """Get protocol properties This method allows getting all the protocol properties. Returns ------- list: list of protocol properties """ # Agraph args = project_get_parser.parse_args() if args.uri: queryString = "SELECT ?plabel ?rangeClass WHERE {"\ + args.uri+""" rdfs:subClassOf* ?parents. ?parents ?s ?allprop. ?allprop owl:onProperty ?p . ?p rdfs:label ?plabel. OPTIONAL {?p rdfs:range ?rangeClass.}}""" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["plabel", "rangeClass"]) result.close() return jsonResult, 200 else: return 404
def get(self): prtocURI = "<http://purl.obolibrary.org/obo/OBI_0000272>" queryString = "SELECT ?protocols ?sons WHERE {?sons rdfs:subClassOf* "\ +prtocURI+""" . ?protocols a ?sons. }""" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["protocols", "sons"]) return jsonResult, 200
def get(self, id, id2): """Get job identifier This method allows getting the job identifier of a given process. it requires the processes ids. Parameters ---------- id: str project identifier id2: str pipeline identifier Returns ------- list: list of job identifiers for the required processes """ args = parser_get_jobid.parse_args() processes = args.processes_ids.split(',') job_ids = [] for x in processes: try: processURI = localNSpace+"projects/"+str(id)+"/pipelines/" + \ str(id2)+"/processes/"+str(x) queryString = "SELECT ?jobid " \ "WHERE {<"+processURI+"> obo:NGS_0000089 ?jobid}" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult2 = parseAgraphQueryRes(result, ["jobid"]) jsonResult2[0]["process_id"] = x job_ids.append(jsonResult2) except Exception: return 404 return job_ids
def get(self): """Get protocol list This method allows getting a list of the available protocols. Returns ------- list: list of the protocols """ # Agraph queryString = """SELECT DISTINCT ?protocTypeLabel ?protocType WHERE {?protocType rdfs:subClassOf* obo:OBI_0000272; ?s ?allprop. ?protocType rdfs:label ?protocTypeLabel. FILTER NOT EXISTS {?something rdfs:subClassOf ?protocType}}""" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["protocTypeLabel", "protocType"]) result.close() return jsonResult, 200
def get(self, id, id2, id3): """Get outputs of processes This method allows getting all the outputs of a given process. Parameters ---------- id: str project identifier id2: str pipeline identifier id3: str process identifier Returns ------- list: list with paths to the output of processes """ try: procStr = localNSpace + "projects/" + str(id) + "/pipelines/" + \ str(id2) + "/processes/" + str(id3) queryString = "SELECT (str(?file1) as ?file_1) (str(?file2) as ?file_2) (str(?file3) as ?file_3) (str(?file4) as ?file_4) (str(?status) as ?statusStr) WHERE {<" + procStr + "> obo:RO_0002234 ?out. <" + procStr + "> obo:RO_0002234 ?in. ?in a ?type.?type rdfs:label ?typelabel. OPTIONAL { ?in obo:NGS_0000092 ?file1; obo:NGS_0000093 ?file2; obo:NGS_0000094 ?file3; obo:NGS_0000096 ?file4.} OPTIONAL {?in obo:NGS_0000097 ?status.} }" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes( result, ["file_1", "file_2", "file_3", "file_4", "statusStr"]) result.close() return jsonResult, 200 except Exception as e: print e return 404
def get(self, id, id2): """Get processes of pipeline This method allows getting all the processes of a given pipeline from NGSOnto. It requires the project if and the pipeline id Parameters ---------- id: str project identifier id2: str pipeline identifier Returns ------- list: list of processes from the pipeline """ pipelineStr = localNSpace + "projects/" + str( id) + "/pipelines/" + str(id2) pipelineURI = dbconAg.createURI(pipelineStr) # function need to check if the first input of the pipeline is a # material sample, if not get previous pipeline and check again matSampleReached = False safetyTrigger = 0 ListProcess = [] ListPipeline = [] while not matSampleReached: # is first pipeline input a material sample? queryString = "SELECT ?process" \ " ?process2 ?pipeline2 {"+str(pipelineURI)+\ " obo:BFO_0000051 ?process. ?process obo:NGS_0000081"\ " '1'^^<http://www.w3.org/2001/XMLSchema#int> ;" \ " obo:RO_0002233 ?input. ?process2 obo:RO_0002234" \ " ?input. ?pipeline2 obo:BFO_0000051 ?process2. } " tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes( result, ["process2", "pipeline2", "process"]) if len(jsonResult) > 0: ListProcess.append((jsonResult[0])["process2"]) ListPipeline.append((jsonResult[0])["pipeline2"]) pipelineURI = dbconAg.createURI((jsonResult[0])["pipeline2"]) else: matSampleReached = True result.close() safetyTrigger += 1 if safetyTrigger > 10: matSampleReached = True i = 0 finalListProc = [] while i < len(ListPipeline): pipeline = ListPipeline[i] lastproc = ListProcess[i] queryString = "SELECT ?process" \ " ?index {"+str(pipeline)+\ " obo:BFO_0000051 ?process. ?process" \ " obo:NGS_0000081 ?index.} ORDER BY ASC(?index)" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["process"]) result.close() for item in jsonResult: finalListProc.append(item["process"]) if lastproc in item["process"]: break i += 1 pipelineURI = dbconAg.createURI(pipelineStr) queryString = "SELECT ?process" \ " ?index {"+str(pipelineURI)+\ " obo:BFO_0000051 ?process. ?process obo:NGS_0000081" \ " ?index.} ORDER BY ASC(?index)" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["process"]) result.close() for item in jsonResult: finalListProc.append(item["process"]) return finalListProc, 200
def post(self, id, id2): """Add processes to pipeline This method allows adding processes to a pipeline by linking the protocol ids with the processes. It requires the project id, pipeline id, protocol ids and strain identifier. Parameters ---------- id: str project identifier id2: str pipeline identifier Returns ------- list: list of processes identifiers """ args = process_post_parser.parse_args() pipelineStr = localNSpace + "projects/" + str( id) + "/pipelines/" + str(id2) # get number of processes already mapped on the pipeline hasPart = dbconAg.createURI(namespace=obo, localname="BFO_0000051") pipelineURI = dbconAg.createURI(pipelineStr) statements = dbconAg.getStatements(pipelineURI, hasPart, None) jsonResult = parseAgraphStatementsRes(statements) statements.close() numberOfProcesses = len(jsonResult) print "Request 1", str(id2) # get all ordered workflows from pipeline queryString = "SELECT (str(?proc) " \ "as ?StrProc) (str(?index) as ?StrIndex)" \ " WHERE{<"+pipelineStr+"> obo:BFO_0000051 ?proc." \ " ?proc obo:NGS_0000081 ?index.}" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() procJsonResult = parseAgraphQueryRes(result, ["StrProc", "StrIndex"]) result.close() numberOfProcesses = len(procJsonResult) print "Request 2", str(id2) # get all ordered workflows from pipeline queryString = "SELECT ?execStep ?stepIndex" \ " ?workflowURI ?execStep " \ "WHERE {<"+pipelineStr+"> obo:NGS_0000076 ?execStep." \ " ?execStep obo:NGS_0000079" \ " ?workflowURI; obo:NGS_0000081" \ " ?stepIndex3} ORDER BY" \ " ASC(?stepIndex)" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes( result, ["stepIndex", "workflowURI", "execStep"]) result.close() print "Request 3", str(id2) # get all protocols per workflow listOrderedProtocolsURI = [] listOrderedProcessTypes = [] listOrderedMessageTypes = [] for result in jsonResult: workflowURI = result["workflowURI"] queryString = "SELECT ?protocStep ?stepIndex" \ " ?protocolURI ?type " \ "WHERE {"+workflowURI+\ " obo:NGS_0000078 ?protocStep. ?protocStep" \ " obo:NGS_0000077 ?protocolURI; obo:NGS_0000081" \ " ?stepIndex. ?protocolURI a ?type. ?type rdfs:label"\ " ?typelabel.} ORDER BY ASC(?stepIndex)" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result3 = tupleQuery.evaluate() jsonResult2 = parseAgraphQueryRes( result3, ["stepIndex", "protocolURI", "type"]) result3.close() for results in jsonResult2: for k, v in protocolsTypes.items(): if v in results["type"]: listOrderedProtocolsURI.append(results["protocolURI"]) listOrderedProcessTypes.append(processTypes[k]) listOrderedMessageTypes.append(processMessages[k]) print "Request 4 all protocols", str(id2) # Starts at 500 in case does not exists messageid = 500 # TEST query string queryString = """SELECT ?index {?message rdf:type/rdfs:subClassOf* obo:NGS_0000061; obo:NGS_0000081 ?index} order by desc(?index) limit 1""" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() for bindingSet in result: messageid = int(str(bindingSet[0]).split('"')[1]) print "Request 5", str(id2) result.close() if args.strain_id != "null": strainid = args.strain_id rpipid = args.real_pipeline_id ppipid = rpipid ppropid = id pprocid = 0 else: ppipid = args.parent_pipeline_id ppropid = args.parent_project_id pprocid = args.parent_process_id rpipid = args.real_pipeline_id if ppipid == rpipid: for proc_json in procJsonResult: if int(proc_json["StrIndex"].replace('"', '')) > int(pprocid): todelUri = dbconAg.createURI( "<" + proc_json["StrProc"].replace('"', "") + ">") hasOutputRel = dbconAg.createURI(namespace=obo, localname="RO_0002234") statements = dbconAg.getStatements(todelUri, hasOutputRel, None) jsonResult = parseAgraphStatementsRes(statements) statements.close() todelUri2 = jsonResult[0]["obj"] todelUri2 = dbconAg.createURI(todelUri2) dbconAg.remove(todelUri2, None, None) dbconAg.remove(todelUri, None, None) dbconAg.remove(None, None, todelUri) statements = dbconAg.getStatements(todelUri, None, None) jsonResult = parseAgraphStatementsRes(statements) statements.close() numberOfProcesses -= 1 print "Request 6", str(id2) try: addedProcesses = numberOfProcesses hasOutputRel = dbconAg.createURI(namespace=obo, localname="RO_0002234") hasInputRel = dbconAg.createURI(namespace=obo, localname="RO_0002233") index = dbconAg.createURI(namespace=obo, localname="NGS_0000081") isRunOfProtocl = dbconAg.createURI(namespace=obo, localname="NGS_0000091") # prev process to link (strain URI most of times) if args.strain_id != "null": prevMessageURI = dbconAg.createURI( namespace=localNSpace, localname="strains/strain_" + str(strainid)) strainTypeURI = dbconAg.createURI( 'http://rdf.ebi.ac.uk/terms/biosd/Sample') dbconAg.add(prevMessageURI, RDF.TYPE, strainTypeURI) processes_ids = [] processid = addedProcesses # Case new run while addedProcesses < len(listOrderedProcessTypes): processid += 1 messageid += 1 processURI = dbconAg.createURI( namespace=localNSpace + "projects/", localname=str(id) + "/pipelines/" + str(id2) + "/processes/" + str(processid)) messageURI = dbconAg.createURI( namespace=localNSpace + "projects/", localname=str(id) + "/pipelines/" + str(id2) + "/messages/" + str(messageid)) processTypeURI = dbconAg.createURI( listOrderedProcessTypes[addedProcesses]) messageTypeURI = dbconAg.createURI( listOrderedMessageTypes[addedProcesses]) protocolTypeURI = dbconAg.createURI( listOrderedProtocolsURI[addedProcesses]) indexProp = dbconAg.createURI(namespace=obo, localname="NGS_0000081") indexInt = dbconAg.createLiteral((addedProcesses + 1), datatype=XMLSchema.INT) messageindexInt = dbconAg.createLiteral((messageid), datatype=XMLSchema.INT) # get specific process input type and uri queryString = """SELECT (STR(?out) as ?messageURI) WHERE {<"""+localNSpace+"projects/"+str(id)+"/pipelines/"+\ str(rpipid)+"""> obo:BFO_0000051 ?proc. ?proc obo:NGS_0000081 ?index; obo:RO_0002234 ?out} order by desc(?out)""" print queryString tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result5 = tupleQuery.evaluate() jsonResult2 = parseAgraphQueryRes(result5, ["messageURI"]) result5.close() for results in jsonResult2: prevMessageURI = dbconAg.createURI( results["messageURI"].replace('"', '')) break # add process and link to pipeline dbconAg.add(processURI, RDF.TYPE, processTypeURI) dbconAg.add(pipelineURI, hasPart, processURI) stmt1 = dbconAg.createStatement(processURI, indexProp, indexInt) dbconAg.add(stmt1) # create output and input/output link messages to process dbconAg.add(messageURI, RDF.TYPE, messageTypeURI) dbconAg.add(messageURI, index, messageindexInt) dbconAg.add(processURI, hasOutputRel, messageURI) dbconAg.add(processURI, isRunOfProtocl, protocolTypeURI) dbconAg.add(processURI, hasInputRel, prevMessageURI) # prevMessageURI=messageURI addedProcesses += 1 processes_ids.append(processid) print "Request 7", str(id2) return processes_ids except Exception as e: print e return 404
def get(self): """Get job status Get status of a process by performing a query to the NGSOnto. It requires: project_id, pipeline_id, process_id, job_id Returns ------- dict: information regarding the current job status """ args = job_get_parser.parse_args() job_ids = args.job_id.split(",") process_ids = args.process_id.split(",") store_jobs_in_db = [] all_results = [] all_std_out = [] all_paths = [] all_wrkdirs = [] pipeline_with_errors = False # Check if nextflow pipeline has error on nextflow submission pipeline_location = os.path.join(args.job_location, "jobs", "{}-{}".format(args.project_id, args.pipeline_id)) try: with open(os.path.join(pipeline_location, ".nextflow.log")) as file: for i, l in enumerate(file): if "[main] ERROR" in l: pipeline_with_errors = True except Exception as e: print e for k in range(0, len(job_ids)): job_id = job_ids[k] process_id = process_ids[k] results = [[],[]] store_in_db = False final_status = "" file2Path = [] try: procStr = localNSpace + "projects/" + str(args.project_id) + \ "/pipelines/" + str(args.pipeline_id) + "/processes/"\ + str(process_id) queryString = "SELECT (str(?typelabel) as ?label) (str(?file1)"\ " as ?file_1) (str(?file2) as ?file_2) " \ "(str(?file3) as ?file_3) (str(?file4) as " \ "?file_4) (str(?status) as ?statusStr) " \ "WHERE{<"+procStr+"> obo:RO_0002234 ?in. ?in a " \ "?type.?type rdfs:label ?typelabel. OPTIONAL { " \ "?in obo:NGS_0000092 ?file1; obo:NGS_0000093 " \ "?file2; obo:NGS_0000096 ?file4; obo:NGS_0000094"\ " ?file3. } OPTIONAL {?in obo:NGS_0000097 " \ "?status.} }" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes( result, ["statusStr", "file_2", "file_4"]) result.close() if pipeline_with_errors: final_status = "FAILED" elif "pass" in jsonResult[0]["statusStr"]: final_status = "COMPLETED" elif "None" in jsonResult[0]["statusStr"]: final_status = "PD" elif "running" in jsonResult[0]["statusStr"]: final_status = "R" elif "pending" in jsonResult[0]["statusStr"]: final_status = "PD" elif "warning" in jsonResult[0]["statusStr"]: final_status = "WARNING" elif "fail" in jsonResult[0]["statusStr"]: final_status = "FAILED" elif "error" in jsonResult[0]["statusStr"]: final_status = "FAILED" try: for r in jsonResult: file2Path.append( '/'.join(r["file_4"].split("/")[-3:-1])) except Exception as p: print p file2Path = [] except Exception as e: final_status = "NEUTRAL" stdout = [job_id, final_status] all_std_out.append(stdout) store_jobs_in_db.append(store_in_db) all_results.append(results[0]) all_paths.append(results[1]) all_wrkdirs.append(file2Path) results = { 'stdout': all_std_out, 'store_in_db': store_jobs_in_db, 'results': all_results, 'paths': all_paths, 'job_id': job_ids, 'all_wrkdirs': all_wrkdirs, 'process_ids': process_ids } return results, 200
def post(self, id, id1): """Add workflow This method allows adding a workflow to a pipeline. Requires workflow id and its location in the pipeline Parameters ---------- id: str project identifier id1: str pipeline identifier Returns ------- code: 201 if successfully added. """ # Agraph args = pipeline_post_parser.parse_args() wkflid = args.workflow_id prtjctid = id pplid = id1 step = args.step wkflid = wkflid.split(',') step = step.split(',') # check if workflow is on pipeline pipelineStr = localNSpace+"projects/"+str(prtjctid)+"/pipelines/"+\ str(pplid) queryString = "SELECT ?execStep (STR(?intstep) as ?step) WHERE {<" + pipelineStr + "> obo:NGS_0000076 ?execStep. ?execStep obo:NGS_0000081 ?intstep.}" tupleQuery = dbconAg.prepareTupleQuery(QueryLanguage.SPARQL, queryString) result = tupleQuery.evaluate() jsonResult = parseAgraphQueryRes(result, ["execStep", "step"]) result.close() for result in jsonResult: aux1 = result["execStep"] aux2 = result["step"] step_converted = map(int, step) if int(aux2.replace('"', '')) in step_converted \ or int(aux2.replace('"', '')) > max(step_converted): toremove = dbconAg.createURI(aux1) dbconAg.remove(None, None, toremove) dbconAg.remove(toremove, None, None) counter = -1 for i in wkflid: counter += 1 # add new workflow exStepType = dbconAg.createURI(namespace=obo, localname="NGS_0000074") workflowURI = dbconAg.createURI(namespace=localNSpace, localname="workflows/" + str(i)) executeRel = dbconAg.createURI(namespace=obo, localname="NGS_0000076") pipelineURI = dbconAg.createURI( namespace=localNSpace + "projects/", localname=str(prtjctid) + "/pipelines/" + str(pplid)) exStepURI = dbconAg.createURI(namespace=localNSpace + "projects/", localname=str(prtjctid) + "/pipelines/" + str(pplid) + "/step/" + str(step[counter])) indexInt = dbconAg.createLiteral((step[counter]), datatype=XMLSchema.INT) indexProp = dbconAg.createURI(namespace=obo, localname="NGS_0000081") hasWorkflRel = dbconAg.createURI(namespace=obo, localname="NGS_0000079") dbconAg.add(exStepURI, RDF.TYPE, exStepType) stmt1 = dbconAg.createStatement(exStepURI, indexProp, indexInt) dbconAg.add(stmt1) # link pipeline to step dbconAg.add(pipelineURI, executeRel, exStepURI) # add workflow + link to step workflowType = dbconAg.createURI(namespace=obo, localname="OBI_0500000") dbconAg.add(workflowURI, RDF.TYPE, workflowType) dbconAg.add(exStepURI, hasWorkflRel, workflowURI) return 201