def executeJob(self, job): time.sleep(5) job['status'] = 'RUNNING' (responseCode, job) = self.service.saveJob(job) if responseCode != 200: raise Exception("could not save job status.") filePath = conf.MOCK_DATA_PATH[job['queryType']] with open(filePath, 'r') as handle: data = json.loads(handle.read()) result_set = GeqeAPI.rawResultToResultDocument(job, data) (response, result_set) = self.service.saveResultset(result_set) if response != 200: job['status'] = 'FAILED' self.service.saveJob(job) print str(result_set) raise Exception("Could not save result set. error: " + str(response)) if 'modelSavePath' in job and job['modelSavePath'] is not None: # save the model meta data modelData = { "name": job['name'], "username": job["username"], "queryType": job["queryType"], "modelSavePath": job['modelSavePath'], "siteListId": job["siteListId"], "datasetId": job["datasetId"] } (response, modelData) = self.service.saveModelData(modelData) if response != 200: job['status'] = 'FAILED' self.service.saveJob(job) raise Exception("Could not save model metadata: " + str(response) + " \n" + str(modelData)) # save the results set into elastic search if conf.ES_HOST is not None: elaticsearchConnetor = GeqeAPI.ElasticSearchHelper( conf.ES_HOST, port=conf.ES_PORT) (response, es_result) = elaticsearchConnetor.addResultSet(result_set) if response != 201: job['status'] = 'FAILED' self.service.saveJob(job) raise Exception("Could not save result set to es. error: " + str(response) + " \n" + str(result_set)) time.sleep(5) job['status'] = 'SUCCESS' job['resultsetId'] = result_set['id'] (response, job) = self.service.saveJob(job) if response != 200: raise Exception("could not save job status.") return True
def executeJob(self, job): time.sleep(5) job["status"] = "RUNNING" (responseCode, job) = self.service.saveJob(job) if responseCode != 200: raise Exception("could not save job status.") filePath = conf.MOCK_DATA_PATH[job["queryType"]] with open(filePath, "r") as handle: data = json.loads(handle.read()) result_set = GeqeAPI.rawResultToResultDocument(job, data) (response, result_set) = self.service.saveResultset(result_set) if response != 200: job["status"] = "FAILED" self.service.saveJob(job) print str(result_set) raise Exception("Could not save result set. error: " + str(response)) if "modelSavePath" in job and job["modelSavePath"] is not None: # save the model meta data modelData = { "name": job["name"], "username": job["username"], "queryType": job["queryType"], "modelSavePath": job["modelSavePath"], "siteListId": job["siteListId"], "datasetId": job["datasetId"], } (response, modelData) = self.service.saveModelData(modelData) if response != 200: job["status"] = "FAILED" self.service.saveJob(job) raise Exception("Could not save model metadata: " + str(response) + " \n" + str(modelData)) # save the results set into elastic search if conf.ES_HOST is not None: elaticsearchConnetor = GeqeAPI.ElasticSearchHelper(conf.ES_HOST, port=conf.ES_PORT) (response, es_result) = elaticsearchConnetor.addResultSet(result_set) if response != 201: job["status"] = "FAILED" self.service.saveJob(job) raise Exception("Could not save result set to es. error: " + str(response) + " \n" + str(result_set)) time.sleep(5) job["status"] = "SUCCESS" job["resultsetId"] = result_set["id"] (response, job) = self.service.saveJob(job) if response != 200: raise Exception("could not save job status.") return True
} ] } """ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("serviceUrl", help="loop back data service url ie http://localhost:5500") parser.add_argument("inputFile", help="polygon file to upload") parser.add_argument("--name", help="name of the dataset") parser.add_argument("--username", help="name of the user account.") args = parser.parse_args() dataConnector = GeqeAPI.GeqeRestHelper(args.serviceUrl) if args.name is None or args.username is None: print 'name and username are required' parser.print_help() sys.exit(1) with open(args.inputFile) as handle: data = json.loads(handle.read()) data['name'] = args.name data['username'] = args.username for site in data['sites']: if 'dates' in site and len(site['dates']) > 0: for daterange in site['dates']:
def executeJob(dataConnector, elaticsearchConnetor, job, dataset, sitelist): #Declare Spark Context conf = SparkConf().setAppName("GeqeRunner") conf.set('spark.driver.maxResultSize', '0') sc = SparkContext(conf=conf) sqlContext = SQLContext(sc) #Create polygon list and broadcast variable based on it lPolygon = shapeReader.readInShapeDocument( sitelist) if sitelist is not None else None queryType = job['queryType'] jobname = job['username'] + '_' + job['name'] modelSavePath = job['modelSavePath'] if 'modelSavePath' in job else None datasetPath = None inputPartitions = None if job['queryType'] == 'location': datasetPath = dataset['location_path'] inputPartitions = dataset['location_partitions'] elif job['queryType'] == 'event': datasetPath = dataset['event_path'] inputPartitions = dataset['event_partitions'] else: raise ValueError("invalid query type: " + str(job['queryType'])) dictionaryPath = dataset['dictionaryPath'] nDataType = int(dataset['type']) modelData = None if 'geqeModelId' in job and job['geqeModelId'] != "": (response, modelData) = dataConnector.getModelData(job['geqeModelId']) if response != 200: raise Exception("Could not load geqeModel") #iif modelData['dictionaryPath'] != dictionaryPath: # raise ValueError("dataset dictionary and model dictionary do not match") sNum = job['limit'] if 'limit' in job else 25 result = None if modelData: modelPath = modelData['modelSavePath'] + '/' + modelData['name'] result = run_refindSimilarPlaces(sc, sqlContext, jobname, datasetPath, dictionaryPath, inputPartitions, sNum, modelPath, bByDate=queryType == 'event') elif 'location' == queryType: result = run_findSimilarPlaces(sc, sqlContext, jobname, datasetPath, dictionaryPath, nDataType, inputPartitions, sNum, lPolygon, modelSavePath) elif 'event' == queryType: result = run_findSimilarEvent(sc, sqlContext, jobname, datasetPath, dictionaryPath, nDataType, inputPartitions, sNum, lPolygon, modelSavePath) else: raise Exception("Invalid query type: " + queryType) if modelSavePath is not None: # save the model meta data modelData = { "name": jobname, "username": job["username"], "queryType": job["queryType"], "modelSavePath": modelSavePath, "siteListId": job["siteListId"], "datasetId": job["datasetId"] } (response, modelData) = dataConnector.saveModelData(modelData) if response != 200: job['status'] = 'FAILED' dataConnector.saveJob(job) raise Exception("Could not save model metadata: " + str(response) + " \n" + str(modelData)) result = GeqeAPI.rawResultToResultDocument(job, result) # save the result (response, result_set) = dataConnector.saveResultset(result) if response != 200: job['status'] = 'FAILED' dataConnector.saveJob(job) raise Exception("Could not save result set. error: " + str(response) + " \n" + str(result_set)) # save the results set into elastic search if elaticsearchConnetor is not None: (response, es_result) = elaticsearchConnetor.addResultSet(result_set) if response != 201: job['status'] = 'FAILED' dataConnector.saveJob(job) raise Exception("Could not save result set to es. error: " + str(response) + " \n" + str(result_set)) # update the job info job['status'] = 'SUCCESS' job['resultsetId'] = result_set['id'] (response, job) = dataConnector.saveJob(job) if response != 200: raise Exception("could not save job status.")
raise Exception("could not save job status.") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("serviceUrl", help="loop back data service url") parser.add_argument("--elasticsearchHost", help="Host name or ip address for elastic search.") parser.add_argument("--elasticsearchPort", type=int, help="Port for elastic search defaults to 9200", default=9200) parser.add_argument("jobId", help="The job to execute") args = parser.parse_args() dataConnector = GeqeAPI.GeqeRestHelper(args.serviceUrl) elaticsearchConnetor = GeqeAPI.ElasticSearchHelper( args.elasticsearchHost, port=args.elasticsearchPort) if args.elasticsearchHost else None # Job (responseCode, job) = dataConnector.getJob(args.jobId) if 200 != responseCode: raise Exception("Could not read job: " + args.jobId + ' response: ' + str(responseCode)) job['status'] = 'RUNNING' (responseCode, job) = dataConnector.saveJob(job) if 200 != responseCode: raise Exception("Could not save job: " + args.jobId + ' response: ' + str(responseCode)) print 'JOB: ', job
def executeJob(dataConnector,elaticsearchConnetor,job,dataset,sitelist): #Declare Spark Context conf = SparkConf().setAppName("GeqeRunner") conf.set('spark.driver.maxResultSize','0') sc = SparkContext(conf = conf) sqlContext = SQLContext(sc) #Create polygon list and broadcast variable based on it lPolygon = shapeReader.readInShapeDocument(sitelist) if sitelist is not None else None queryType = job['queryType'] jobname = job['username']+'_'+job['name'] modelSavePath = job['modelSavePath'] if 'modelSavePath' in job else None datasetPath = None inputPartitions = None if job['queryType'] == 'location': datasetPath = dataset['location_path'] inputPartitions = dataset['location_partitions'] elif job['queryType'] == 'event': datasetPath = dataset['event_path'] inputPartitions = dataset['event_partitions'] else: raise ValueError("invalid query type: "+str(job['queryType'])) dictionaryPath = dataset['dictionaryPath'] nDataType = int(dataset['type']) modelData = None if 'geqeModelId' in job and job['geqeModelId'] != "": (response,modelData) = dataConnector.getModelData(job['geqeModelId']) if response != 200: raise Exception("Could not load geqeModel") #iif modelData['dictionaryPath'] != dictionaryPath: # raise ValueError("dataset dictionary and model dictionary do not match") sNum = job['limit'] if 'limit' in job else 25 result = None if modelData: modelPath = modelData['modelSavePath']+'/'+modelData['name'] result = run_refindSimilarPlaces(sc,sqlContext,jobname,datasetPath,dictionaryPath,inputPartitions,sNum,modelPath,bByDate= queryType == 'event') elif 'location' == queryType: result = run_findSimilarPlaces(sc,sqlContext,jobname,datasetPath,dictionaryPath,nDataType,inputPartitions,sNum,lPolygon,modelSavePath) elif 'event' == queryType: result = run_findSimilarEvent(sc,sqlContext,jobname,datasetPath,dictionaryPath,nDataType,inputPartitions,sNum,lPolygon,modelSavePath) else: raise Exception("Invalid query type: "+queryType) if modelSavePath is not None: # save the model meta data modelData = { "name" : jobname, "username": job["username"], "queryType": job["queryType"], "modelSavePath": modelSavePath, "siteListId" : job["siteListId"], "datasetId" : job["datasetId"] } (response,modelData) = dataConnector.saveModelData(modelData) if response != 200: job['status'] = 'FAILED' dataConnector.saveJob(job) raise Exception("Could not save model metadata: "+str(response)+" \n"+str(modelData)) result = GeqeAPI.rawResultToResultDocument(job,result) # save the result (response,result_set) = dataConnector.saveResultset(result) if response != 200: job['status'] = 'FAILED' dataConnector.saveJob(job) raise Exception("Could not save result set. error: "+str(response)+" \n"+str(result_set)) # save the results set into elastic search if elaticsearchConnetor is not None: (response,es_result) = elaticsearchConnetor.addResultSet(result_set) if response != 201: job['status'] = 'FAILED' dataConnector.saveJob(job) raise Exception("Could not save result set to es. error: "+str(response)+" \n"+str(result_set)) # update the job info job['status'] = 'SUCCESS' job['resultsetId'] = result_set['id'] (response,job) = dataConnector.saveJob(job) if response != 200: raise Exception("could not save job status.")
job["resultsetId"] = result_set["id"] (response, job) = self.service.saveJob(job) if response != 200: raise Exception("could not save job status.") return True if __name__ == "__main__": global CLUSTER_STATUS service = GeqeAPI.GeqeRestHelper(conf.LOOPBACK_SERVICE) # insert mock dataset for front end testing, ensures a usable UI for shape in [1, 2]: dataset = GeqeAPI.getMockDataSet(shape=shape) (response, dataset) = service.saveDataset(dataset) if response != 200: print "response: ", response print dataset raise Exception("Could not save MOCK dataset.") (response, clusterStatus) = service.putStatus({"host": platform.node(), "status": "RUNNING"}) if response != 200: print "response: ", response print clusterStatus raise Exception("Could not save cluster status.") thread = JobRunner(conf.LOOPBACK_SERVICE) thread.setDaemon(True) try:
def __init__(self, serviceUrl): super(JobRunner, self).__init__() self.url = serviceUrl self.service = GeqeAPI.GeqeRestHelper(self.url)
]) command.extend([self.service.serviceURL, job['id']]) command = map(str, command) with open('lastcommand.sh', 'w') as handle: handle.write(' '.join(command)) result = subprocess.call(command, stdout=stdoutFile, stderr=stderrFile) print 'result: ', str(result) stderrFile.close() stdoutFile.close() return int(result) == 0 if __name__ == '__main__': global CLUSTER_STATUS service = GeqeAPI.GeqeRestHelper(conf.LOOPBACK_SERVICE) (response, clusterStatus) = service.putStatus({ 'host': platform.node(), 'status': 'RUNNING' }) if response != 200: print 'response: ', response print clusterStatus raise Exception("Could not save cluster status.") thread = JobRunner(conf.LOOPBACK_SERVICE) thread.setDaemon(True) try: thread.start() while thread.isAlive():
""" if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( "serviceUrl", help="loop back data service url ie http://localhost:5500") parser.add_argument("username", help="geqe username") parser.add_argument("name", help="name of the dataset / job") parser.add_argument("resultSetPath", help="path to the result file.") parser.set_defaults(delete=False) args = parser.parse_args() dataConnector = GeqeAPI.GeqeRestHelper(args.serviceUrl) # create a job object job = { "name": args.name, "status": "WAITING", "queryType": "unkown", "limit": -1, "username": args.username } (response, job) = dataConnector.saveJob(job) if response != 200: print "ERROR: Could not save job. = ", response, job sys.exit(1) # save the result set
"name": args.name, "status": "WAITING", "queryType": "unkown", "limit" : -1, "username": args.username } (response,job) = dataConnector.saveJob(job) if response != 200: print "ERROR: Could not save job. = ",response,job sys.exit(1) # save the result set with open(args.resultSetPath,'r') as handle: data = json.loads(handle.read()) result_set = GeqeAPI.rawResultToResultDocument(job,data) (response,result_set) = dataConnector.saveResultset(result_set) if response != 200: print "ERROR: Could not save result. = ",response,result_set sys.exit(1) job['status'] = 'SUCCESS' job['queryType'] = result_set['type'] job['resultsetId'] = result_set['id'] (response,job) = dataConnector.saveJob(job) if response != 200: print "ERROR: Could not save job. = ",response,job sys.exit(1) print 'success'
str(response) + " \n" + str(result_set)) time.sleep(5) job['status'] = 'SUCCESS' job['resultsetId'] = result_set['id'] (response, job) = self.service.saveJob(job) if response != 200: raise Exception("could not save job status.") return True if __name__ == '__main__': global CLUSTER_STATUS service = GeqeAPI.GeqeRestHelper(conf.LOOPBACK_SERVICE) # insert mock dataset for front end testing, ensures a usable UI for shape in [1, 2]: dataset = GeqeAPI.getMockDataSet(shape=shape) (response, dataset) = service.saveDataset(dataset) if response != 200: print 'response: ', response print dataset raise Exception("Could not save MOCK dataset.") (response, clusterStatus) = service.putStatus({ 'host': platform.node(), 'status': 'RUNNING' }) if response != 200: