def getReferencePath(self, referenceName): refPath = os.path.join(self.server.referencePath, referenceName) if os.path.isdir(refPath): return refPath else: msg = 'Unable to access path for reference [%s]. Attempted %s' % (referenceName, refPath) MU.logMsg(self, msg, 'warning')
def submitSingleJob(self, job, projID='None'): # Get or create the inputs inputs = self._makeInputs(job) if inputs: # Create the job jobData = {'name' : 'Milhouse%s_%s' % (projID, job.id), 'createdBy' : 'MilhouseUser', 'description' : 'Milhouse%s_%s %s' % (projID, job.id, job.protocol), #job.protocol.get('name')), 'protocolName' : job.protocol, #job.protocol.get('name'), 'referenceSequenceName' : job.reference, #job.reference.get('name'), 'groupNames' : ['all'], 'inputIds' : inputs } # Submit the job params = {'data' : json.dumps(jobData)} conn = SecondaryServerConnector(self.server) url = urljoin(self.server.apiRootPath, 'jobs', 'create') resp = conn.makeRequest(url, 'POST', params) print 'RESPONSE IS', resp if not resp.get('success') == False: MU.logMsg(self, 'Successfully created secondary job', 'info') return resp else: MU.logMsg(self, 'Failed to create secondary job', 'error')
def limsCodeFromCellPath(cellPath): try: nums = cellPath.split('/')[-2:] [int(x) for x in nums] return '-'.join(nums) except ValueError: MU.logMsg('LIMSMapper', 'Unable to find LIMS Code from cell path %s' % cellPath, 'info')
def getProtocolFile(self, protocolFile): protocolXML = os.path.join(self.server.protocolPath, protocolFile) if os.path.isfile(protocolXML): return protocolXML else: msg = 'Unable to access file for protocol [%s]. Attempted %s' % (protocolFile, protocolXML) MU.logMsg(self, msg, 'warning')
def getJobPathFromID(self, jobID): jobID = self.normalizeJobID(jobID) jobPath = self.getJobDiskPath(self.server.jobDataPath, jobID) if os.path.isdir(jobPath): return jobPath else: msg = 'Unable to access job path for job ID [%s]. Attempted [%s]' % (jobID, jobPath) MU.logMsg(self, msg, 'warning')
def getJobFile(self, jobID, filename, dataFile=False): jobPath = self.getJobPathFromID(jobID) if jobPath: jobfile = os.path.join(jobPath, 'data', filename) if dataFile else os.path.join(jobPath, filename) if os.path.isfile(jobfile): return jobfile else: msg = 'Unable to access job file [%s] for job [%s]. Attempted %s' % (filename, jobID, jobfile) MU.logMsg(self, msg, 'warning')
def getReferenceFile(self, referenceName): refPath = self.getReferencePath(referenceName) if refPath: refFile = os.path.join(refPath, 'reference.info.xml') if os.path.isfile(refFile): return refFile else: msg = 'Unable to access file for reference [%s]. Attempted %s' % (referenceName, refFile) MU.logMsg(self, msg, 'warning')
def _makeResponse(self, resp, asDict=True): if asDict and isinstance(resp, dict): return resp elif isinstance(resp, list): return resp if asDict: try: return json.dumps(resp) except ValueError as err: msg = 'Unable to return response as dictionary: %s' % err MU.logMsg(self, msg, 'warning') return resp return resp
def csvToRecArray(csv): if isinstance(csv, n.recarray): return csv elif isinstance(csv, str): if os.path.isfile(csv): try: return MU.getRecArrayFromCSV(csv, caseSensitive=True) except ValueError as err: MU.logMsg('CsvToRecArray', 'Incorrectly formatted CSV file:\n %s' % err, 'error') else: MU.logMsg('CsvToRecArray', 'CSV file provided does not exist! %s' % csv, 'error') else: raise ValidationError('Unable to convert CSV definition input [%s] into RecArray' % csv)
def create(server, disk=None): # Create the server db model if it doesn't exist if isinstance(server, str): if os.path.isfile(str): server = MU.serverConfToDict(server) else: msg = 'Unable to create server with definition: %s' % (server) MU.logMsg('SecondaryJobServiceFactory', msg, 'error') raise SecondaryJobServiceError(msg) if isinstance(server, dict): try: server, created = SecondaryAnalysisServer.objects.get_or_create(**server) if created: MU.logMsg('SecondaryJobServiceFactory', 'Created new SecondaryAnalysisServer %s' % server, 'info') except Exception as err: msg = 'Unable to create server with definition: %s. ErrorMsg: %s' % (server, err) MU.logMsg('SecondaryJobServiceFactory', msg, 'error') raise SecondaryJobServiceError(msg) if re.findall('^martin', server.serverName.lower()): # This is specific to PacBio only, do not set this to true for non-PacBio installs!! from pbmilhouse.MartinJobHandler import MartinJobServiceFactory return MartinJobServiceFactory.create(server, disk) if disk is None: if server.serverHost and server.serverPort: sjsApi = SecondaryJobServiceAPI(server) ping, msg = sjsApi.testConnection() if ping: logmsg = 'Using Server API Instance, Web Service Ping: %s' % msg MU.logMsg('SecondaryJobServiceFactry', logmsg, 'info') disk = False else: logmsg = 'Using Server Disk Instance, Web Service Ping: %s' % msg, MU.logMsg('SecondaryJobServiceFactory', 'info') disk = True else: disk = True # All other Milhouse instance should go through this path if disk: return SecondaryJobServiceDisk(server) else: return SecondaryJobServiceAPI(server)
def create(server, disk=None): if disk is None: if server.serverHost and server.serverPort: mjsApi = MartinJobServiceAPI(server) ping, msg = mjsApi.testConnection() if ping: logmsg = 'Using Server API Instance, Web Service Ping: %s' % msg MU.logMsg('MartinJobServiceFactory', logmsg, 'info') disk = False else: logmsg = 'Using Server Disk Instance, Web Service Ping: %s' % msg MU.logMsg('MartinJobServiceFactory', logmsg, 'info') disk = True else: disk = True if disk: return MartinJobServiceDisk(server) else: return MartinJobServiceAPI(server)
def makeRequest(self, url, method='GET', params=None, responseAsDict=True): ''' Make requests to remote servers, used for API calls''' # Set basic request variables params = urllib.urlencode(params) if params else urllib.urlencode({}) postHeaders = {'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain'} # Attempt request to specified url try: conn = self._getConnection() if method=='POST': print 'URL: %s; PARAMS: %s' % (url, params) conn.request(method, url, params, postHeaders) time.sleep(2) else: conn.request(method, url, params) #time.sleep(2) except socket.error as msg: MU.logMsg(self, msg, 'warning') conn.close() return self._makeResponse({'success': False, 'errorMsg': msg}, responseAsDict) resp = conn.getresponse() if resp.status != httplib.OK: msg = 'Server Error Code: %d, Server error reason: %s' % (resp.status, resp.reason) conn.close() MU.logMsg(self, msg, 'warning') return self._makeResponse({'success': False, 'errorMsg': msg}, responseAsDict) else: MU.logMsg(self, 'Successfully completed server request', 'info') respString = resp.read() try: useResp = json.loads(respString) except ValueError, err: MU.logMsg(self, 'Unable to load response string to dictionary: %s' % err, 'warning') useResp = respString conn.close() MU.logMsg(self, 'Request returned: %s' % useResp, 'debug') return self._makeResponse(useResp, responseAsDict)
def _makeInputs(self, job): inputPaths = [c.path for c in job.cells.all()] allMetadata = all([glob.glob('%s/*.metadata.xml' % i) for i in inputPaths]) if allMetadata: params = {'data' : inputPaths} conn = SecondaryServerConnector(self.server) url = urljoin(self.server.apiRootPath, 'inputs', 'import') resp = conn.makeRequest(url, 'POST', params) print 'RESPONSE IS', resp if not resp.get('success') == False: msg = 'Successfully created all secondary job inputs' MU.logMsg(self, msg, 'info') return resp else: MU.logMsg(self, 'Failed to create secondary job inputs', 'error') else: inputs = [] for cell in job.cells.all(): cellParams = {'collectionPathUri' : cell.path, 'runName' : 'unknown', 'groupNames' : ['all'] } params = {'data' : json.dumps(cellParams)} conn = SecondaryServerConnector(self.server) url = urljoin(self.server.apiRootPath, 'inputs', 'create') resp = conn.makeRequest(url, 'POST', params) print 'RESPONSE IS', resp if not resp.get('success') == False: inputs.append(resp) if len(inputs) == len(job.cells.all()): MU.logMsg(self, 'Successfully created secondary job inputs', 'info') return inputs else: MU.logMsg(self, 'Failed to create secondary job inputs', 'error')
def resubmitSingleJob(self, job): MU.logMsg(self, 'Attempting to re-submit Martin job %s' % job.jobID, 'info') jobData = {'tSelectedJob': str(job.jobID)} params = {'job_data': json.dumps(jobData)} # Submit the job conn = SecondaryServerConnector(self.server) resp = conn.makeRequest('/resubmitJob', 'POST', params) if not resp.get('success') == False: MU.logMsg(self, 'Resubmitting Martin Job %s' % job.jobID, mode='info') return job.jobID else: MU.logMsg(self, 'Job Re-Submission Failed: %s' % resp.get('errorMsg'), 'error')
def submitSingleJob(self, job, projID='None'): MU.logMsg(self, 'Attempting to submit milhouse secondary job %s to Martin server' % job.id, 'info') jobData = self._modelToSubmissionDict(job, projID) ref = job.reference #job.reference.get('name') jobData['useLIMST'] = True if 'limstemplate' in ref.lower() else False # Submit the job params = {'job_data': json.dumps(jobData)} conn = SecondaryServerConnector(self.server) resp = conn.makeRequest('/JobService/submit', 'POST', params) print 'RESPONSE IS', resp if not resp.get('success') == False: jobID = resp['job_id'] MU.logMsg(self, 'Saving Martin Job ID: %s' % jobID, 'info') job.jobID = jobID job.save() return jobID else: MU.logMsg(self, 'Job Submission Failed. ErrorMsg: %s' % resp.get('errorMsg'), 'error')
def createSecondaryJobs(): secondaryJobObjects = {} # Do it by condition conditions = n.unique(csv["Name"]) newJobDefs = {} for cond in conditions: condRows = csv[csv["Name"] == cond] # If this is a new secondary job, populate the necessary database tables if csvType == "newJob": uniqueJobs = n.unique( zip( condRows["SecondaryServerName"], condRows["SecondaryProtocol"], condRows["SecondaryReference"], ) ) for job in uniqueJobs: msg = "Creating SecondaryJob for job info: %s" % str(job) MU.logMsg(classString, msg, "info") # First make the job, but don't save it serverName = job[0] protocol = job[1] reference = job[2] secondaryServer = SecondaryAnalysisServer.objects.get(serverName=serverName) sjs = SecondaryJobServiceFactory.create(secondaryServer) # protocolEntry = sjs.getModelProtocolInfo(job[1]) # referenceEntry = sjs.getModelReferenceInfo(job[2]) # # jobDef = {'protocol' : simplejson.dumps(protocolEntry), # 'reference' : simplejson.dumps(referenceEntry), # 'server' : secondaryServer} jobDef = {"protocol": protocol, "reference": reference, "server": secondaryServer} # Now add the cells jobRows = condRows[ (condRows["SecondaryServerName"] == serverName) & (condRows["SecondaryProtocol"] == protocol) & (condRows["SecondaryReference"] == reference) ] jobCells = n.unique(zip(jobRows["SMRTCellPath"], jobRows["PrimaryFolder"])) smrtCells = [] for c in jobCells: path, primaryFolder = tuple(c) msg = "Creating or accessing SMRTCell for data path: %s" % os.path.join(path, primaryFolder) MU.logMsg(classString, msg, "info") if os.path.exists(path): # This is a data path limsCode = LIMSMapper.limsCodeFromCellPath(path) cell = SMRTCell.objects.get_or_create( path=path, primaryFolder=primaryFolder, limsCode=limsCode ) smrtCells.append(cell[0]) else: # this is a LIMS Code dataPath = LIMSMapper.cellPathFromLimsCode(path) cell = SMRTCell.objects.get_or_create( path=dataPath, primaryFolder=primaryFolder, limsCode=path ) smrtCells.append(cell[0]) # Add the SMRT Cells jobDef["cells"] = smrtCells hasJob = False for pk, jd in newJobDefs.iteritems(): if jobDef == jd: hasJob = True jobObj = SecondaryJob.objects.get(id=pk) if not hasJob: cells = jobDef.pop("cells") jobObj = SecondaryJob(**jobDef) jobObj.save() jobObj.cells.add(*cells) jobDef["cells"] = cells newJobDefs[jobObj.id] = jobDef msg = "Successfully created and saved SecondaryJob: %s" % str(model_to_dict(jobObj)) MU.logMsg(classString, msg, "info") # Link secondary job to condition if not secondaryJobObjects.has_key(cond): secondaryJobObjects[cond] = [jobObj] else: secondaryJobObjects[cond].append(jobObj) else: # Job already exists for job, serverName in zip(condRows["SecondaryJobID"], condRows["SecondaryServerName"]): server = SecondaryAnalysisServer.objects.get(serverName=serverName) newJob, created = SecondaryJob.objects.get_or_create(jobID=job, server=server) # Add other job info in here if job was newly created... if True: # created: sjs = SecondaryJobServiceFactory.create(server) jobID = newJob.jobID jobInfo = sjs.getModelJobInfo(jobID) # Add protocol and reference info # protocol = SecondaryJobService.getSingleItem(jobInfo.get('protocol', {'name' : 'unknown'})) # reference = SecondaryJobService.getSingleItem(jobInfo.get('reference', {'name' : 'unknown'})) # newJob.protocol = simplejson.dumps(protocol) # newJob.reference = simplejson.dumps(reference) newJob.protocol = jobInfo.get("protocol", "unknown") newJob.reference = jobInfo.get("reference", "unknown") newJob.save() # Get the SMRT Cells smrtCells = jobInfo.get("inputs", []) smrtCellObjs = [] for c in smrtCells: cell = SMRTCell.objects.get_or_create( path=c["SMRTCellPath"], primaryFolder=c["PrimaryFolder"], limsCode=c["LIMSCode"] ) smrtCellObjs.append(cell[0]) # Now add the SMRT Cells to the new job [newJob.cells.add(x) for x in smrtCellObjs] # Link secondary job to condition if not secondaryJobObjects.has_key(cond): secondaryJobObjects[cond] = [newJob] else: secondaryJobObjects[cond].append(newJob) return secondaryJobObjects
def create(definition, projectDict=None): classString = "ProjectFactoryCreate" MU.logMsg(classString, "Submitting project definition for validation and object creation", "info") validator = ExperimentDefinitionValidator(definition) definition = validator.getExperimentDefinition() isValid, validMsg = validator.validateDefinition() MU.logMsg(classString, "Project definition validation result: %s" % validMsg, "info") if not isValid: msg = "Invalid definition, cannot create project! %s" % validMsg MU.logMsg(classString, msg, "error") raise ProjectError(msg) # If valid csv, start to populate database objects csv = definition csvType = validator.getDefinitionType() def createProject(pDict): if pDict is None: pDict = {} projects = Project.objects.count() if projects: lastProject = Project.objects.all().order_by("-projectID")[0] projectID = lastProject.projectID + 1 else: projectID = 1 project = Project( projectID=projectID, name=pDict.get("name", "dummy"), description=pDict.get("description", "dummy"), tags=pDict.get("tags", None), status=pDict.get("status", enums.getChoice(enums.PROJECT_STATUS, "INSTANTIATED")), ) project.save() return project def createSecondaryJobs(): secondaryJobObjects = {} # Do it by condition conditions = n.unique(csv["Name"]) newJobDefs = {} for cond in conditions: condRows = csv[csv["Name"] == cond] # If this is a new secondary job, populate the necessary database tables if csvType == "newJob": uniqueJobs = n.unique( zip( condRows["SecondaryServerName"], condRows["SecondaryProtocol"], condRows["SecondaryReference"], ) ) for job in uniqueJobs: msg = "Creating SecondaryJob for job info: %s" % str(job) MU.logMsg(classString, msg, "info") # First make the job, but don't save it serverName = job[0] protocol = job[1] reference = job[2] secondaryServer = SecondaryAnalysisServer.objects.get(serverName=serverName) sjs = SecondaryJobServiceFactory.create(secondaryServer) # protocolEntry = sjs.getModelProtocolInfo(job[1]) # referenceEntry = sjs.getModelReferenceInfo(job[2]) # # jobDef = {'protocol' : simplejson.dumps(protocolEntry), # 'reference' : simplejson.dumps(referenceEntry), # 'server' : secondaryServer} jobDef = {"protocol": protocol, "reference": reference, "server": secondaryServer} # Now add the cells jobRows = condRows[ (condRows["SecondaryServerName"] == serverName) & (condRows["SecondaryProtocol"] == protocol) & (condRows["SecondaryReference"] == reference) ] jobCells = n.unique(zip(jobRows["SMRTCellPath"], jobRows["PrimaryFolder"])) smrtCells = [] for c in jobCells: path, primaryFolder = tuple(c) msg = "Creating or accessing SMRTCell for data path: %s" % os.path.join(path, primaryFolder) MU.logMsg(classString, msg, "info") if os.path.exists(path): # This is a data path limsCode = LIMSMapper.limsCodeFromCellPath(path) cell = SMRTCell.objects.get_or_create( path=path, primaryFolder=primaryFolder, limsCode=limsCode ) smrtCells.append(cell[0]) else: # this is a LIMS Code dataPath = LIMSMapper.cellPathFromLimsCode(path) cell = SMRTCell.objects.get_or_create( path=dataPath, primaryFolder=primaryFolder, limsCode=path ) smrtCells.append(cell[0]) # Add the SMRT Cells jobDef["cells"] = smrtCells hasJob = False for pk, jd in newJobDefs.iteritems(): if jobDef == jd: hasJob = True jobObj = SecondaryJob.objects.get(id=pk) if not hasJob: cells = jobDef.pop("cells") jobObj = SecondaryJob(**jobDef) jobObj.save() jobObj.cells.add(*cells) jobDef["cells"] = cells newJobDefs[jobObj.id] = jobDef msg = "Successfully created and saved SecondaryJob: %s" % str(model_to_dict(jobObj)) MU.logMsg(classString, msg, "info") # Link secondary job to condition if not secondaryJobObjects.has_key(cond): secondaryJobObjects[cond] = [jobObj] else: secondaryJobObjects[cond].append(jobObj) else: # Job already exists for job, serverName in zip(condRows["SecondaryJobID"], condRows["SecondaryServerName"]): server = SecondaryAnalysisServer.objects.get(serverName=serverName) newJob, created = SecondaryJob.objects.get_or_create(jobID=job, server=server) # Add other job info in here if job was newly created... if True: # created: sjs = SecondaryJobServiceFactory.create(server) jobID = newJob.jobID jobInfo = sjs.getModelJobInfo(jobID) # Add protocol and reference info # protocol = SecondaryJobService.getSingleItem(jobInfo.get('protocol', {'name' : 'unknown'})) # reference = SecondaryJobService.getSingleItem(jobInfo.get('reference', {'name' : 'unknown'})) # newJob.protocol = simplejson.dumps(protocol) # newJob.reference = simplejson.dumps(reference) newJob.protocol = jobInfo.get("protocol", "unknown") newJob.reference = jobInfo.get("reference", "unknown") newJob.save() # Get the SMRT Cells smrtCells = jobInfo.get("inputs", []) smrtCellObjs = [] for c in smrtCells: cell = SMRTCell.objects.get_or_create( path=c["SMRTCellPath"], primaryFolder=c["PrimaryFolder"], limsCode=c["LIMSCode"] ) smrtCellObjs.append(cell[0]) # Now add the SMRT Cells to the new job [newJob.cells.add(x) for x in smrtCellObjs] # Link secondary job to condition if not secondaryJobObjects.has_key(cond): secondaryJobObjects[cond] = [newJob] else: secondaryJobObjects[cond].append(newJob) return secondaryJobObjects def createConditions(secondaryJobs, project): conditionObjects = [] for cond, jobs in secondaryJobs.iteritems(): condRows = csv[csv["Name"] == cond] extraCols = filter(lambda x: x.startswith("p_"), condRows.dtype.names) extrasDict = {} for col in extraCols: extrasDict[col] = list(n.unique(condRows[col])) extrasJSON = simplejson.dumps(extrasDict) extractBy = ["( %s )" % e for e in n.unique(condRows["ExtractBy"])] extractByString = " | ".join(extractBy) condObj = Condition( name=cond, extractBy=extractByString, extrasDict=extrasJSON, project=project, status=enums.getChoice(enums.CONDITION_STATUS, "INSTANTIATED"), ) condObj.save() # Add secondary jobs to condition object condObj.secondaryJob.add(*jobs) conditionObjects.append(condObj) return conditionObjects project = createProject(projectDict) jobs = createSecondaryJobs() createConditions(jobs, project) return MProject(project)