Esempio n. 1
0
    def executeJob(self, job):

        time.sleep(5)
        job['status'] = 'RUNNING'
        (responseCode, job) = self.service.saveJob(job)
        if responseCode != 200: raise Exception("could not save job status.")

        filePath = conf.MOCK_DATA_PATH[job['queryType']]
        with open(filePath, 'r') as handle:
            data = json.loads(handle.read())
        result_set = GeqeAPI.rawResultToResultDocument(job, data)
        (response, result_set) = self.service.saveResultset(result_set)
        if response != 200:
            job['status'] = 'FAILED'
            self.service.saveJob(job)
            print str(result_set)
            raise Exception("Could not save result set. error: " +
                            str(response))

        if 'modelSavePath' in job and job['modelSavePath'] is not None:
            # save the model meta data
            modelData = {
                "name": job['name'],
                "username": job["username"],
                "queryType": job["queryType"],
                "modelSavePath": job['modelSavePath'],
                "siteListId": job["siteListId"],
                "datasetId": job["datasetId"]
            }

            (response, modelData) = self.service.saveModelData(modelData)
            if response != 200:
                job['status'] = 'FAILED'
                self.service.saveJob(job)
                raise Exception("Could not save model metadata: " +
                                str(response) + " \n" + str(modelData))

        # save the results set into elastic search
        if conf.ES_HOST is not None:
            elaticsearchConnetor = GeqeAPI.ElasticSearchHelper(
                conf.ES_HOST, port=conf.ES_PORT)
            (response,
             es_result) = elaticsearchConnetor.addResultSet(result_set)
            if response != 201:
                job['status'] = 'FAILED'
                self.service.saveJob(job)
                raise Exception("Could not save result set to es. error: " +
                                str(response) + " \n" + str(result_set))

        time.sleep(5)
        job['status'] = 'SUCCESS'
        job['resultsetId'] = result_set['id']
        (response, job) = self.service.saveJob(job)
        if response != 200:
            raise Exception("could not save job status.")

        return True
Esempio n. 2
0
    def executeJob(self, job):

        time.sleep(5)
        job["status"] = "RUNNING"
        (responseCode, job) = self.service.saveJob(job)
        if responseCode != 200:
            raise Exception("could not save job status.")

        filePath = conf.MOCK_DATA_PATH[job["queryType"]]
        with open(filePath, "r") as handle:
            data = json.loads(handle.read())
        result_set = GeqeAPI.rawResultToResultDocument(job, data)
        (response, result_set) = self.service.saveResultset(result_set)
        if response != 200:
            job["status"] = "FAILED"
            self.service.saveJob(job)
            print str(result_set)
            raise Exception("Could not save result set. error: " + str(response))

        if "modelSavePath" in job and job["modelSavePath"] is not None:
            # save the model meta data
            modelData = {
                "name": job["name"],
                "username": job["username"],
                "queryType": job["queryType"],
                "modelSavePath": job["modelSavePath"],
                "siteListId": job["siteListId"],
                "datasetId": job["datasetId"],
            }

            (response, modelData) = self.service.saveModelData(modelData)
            if response != 200:
                job["status"] = "FAILED"
                self.service.saveJob(job)
                raise Exception("Could not save model metadata: " + str(response) + " \n" + str(modelData))

        # save the results set into elastic search
        if conf.ES_HOST is not None:
            elaticsearchConnetor = GeqeAPI.ElasticSearchHelper(conf.ES_HOST, port=conf.ES_PORT)
            (response, es_result) = elaticsearchConnetor.addResultSet(result_set)
            if response != 201:
                job["status"] = "FAILED"
                self.service.saveJob(job)
                raise Exception("Could not save result set to es. error: " + str(response) + " \n" + str(result_set))

        time.sleep(5)
        job["status"] = "SUCCESS"
        job["resultsetId"] = result_set["id"]
        (response, job) = self.service.saveJob(job)
        if response != 200:
            raise Exception("could not save job status.")

        return True
Esempio n. 3
0
      }
    ]
  }

"""


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("serviceUrl", help="loop back data service url ie http://localhost:5500")
    parser.add_argument("inputFile", help="polygon file to upload")
    parser.add_argument("--name", help="name of the dataset")
    parser.add_argument("--username", help="name of the user account.")
    args = parser.parse_args()

    dataConnector = GeqeAPI.GeqeRestHelper(args.serviceUrl)

    if args.name is None or args.username is None:
        print 'name and username are required'
        parser.print_help()
        sys.exit(1)



    with open(args.inputFile) as handle:
        data = json.loads(handle.read())
        data['name'] = args.name
        data['username'] = args.username
        for site in data['sites']:
            if 'dates' in site and len(site['dates']) > 0:
                for daterange in site['dates']:
Esempio n. 4
0
def executeJob(dataConnector, elaticsearchConnetor, job, dataset, sitelist):

    #Declare Spark Context
    conf = SparkConf().setAppName("GeqeRunner")
    conf.set('spark.driver.maxResultSize', '0')
    sc = SparkContext(conf=conf)
    sqlContext = SQLContext(sc)

    #Create polygon list and broadcast variable based on it
    lPolygon = shapeReader.readInShapeDocument(
        sitelist) if sitelist is not None else None

    queryType = job['queryType']

    jobname = job['username'] + '_' + job['name']
    modelSavePath = job['modelSavePath'] if 'modelSavePath' in job else None

    datasetPath = None
    inputPartitions = None
    if job['queryType'] == 'location':
        datasetPath = dataset['location_path']
        inputPartitions = dataset['location_partitions']
    elif job['queryType'] == 'event':
        datasetPath = dataset['event_path']
        inputPartitions = dataset['event_partitions']
    else:
        raise ValueError("invalid query type: " + str(job['queryType']))

    dictionaryPath = dataset['dictionaryPath']
    nDataType = int(dataset['type'])

    modelData = None
    if 'geqeModelId' in job and job['geqeModelId'] != "":
        (response, modelData) = dataConnector.getModelData(job['geqeModelId'])
        if response != 200: raise Exception("Could not load geqeModel")
        #iif modelData['dictionaryPath'] != dictionaryPath:
        #    raise ValueError("dataset dictionary and model dictionary do not match")

    sNum = job['limit'] if 'limit' in job else 25

    result = None
    if modelData:
        modelPath = modelData['modelSavePath'] + '/' + modelData['name']
        result = run_refindSimilarPlaces(sc,
                                         sqlContext,
                                         jobname,
                                         datasetPath,
                                         dictionaryPath,
                                         inputPartitions,
                                         sNum,
                                         modelPath,
                                         bByDate=queryType == 'event')
    elif 'location' == queryType:
        result = run_findSimilarPlaces(sc, sqlContext, jobname, datasetPath,
                                       dictionaryPath, nDataType,
                                       inputPartitions, sNum, lPolygon,
                                       modelSavePath)
    elif 'event' == queryType:
        result = run_findSimilarEvent(sc, sqlContext, jobname, datasetPath,
                                      dictionaryPath, nDataType,
                                      inputPartitions, sNum, lPolygon,
                                      modelSavePath)
    else:
        raise Exception("Invalid query type: " + queryType)

    if modelSavePath is not None:
        # save the model meta data
        modelData = {
            "name": jobname,
            "username": job["username"],
            "queryType": job["queryType"],
            "modelSavePath": modelSavePath,
            "siteListId": job["siteListId"],
            "datasetId": job["datasetId"]
        }
        (response, modelData) = dataConnector.saveModelData(modelData)
        if response != 200:
            job['status'] = 'FAILED'
            dataConnector.saveJob(job)
            raise Exception("Could not save model metadata: " + str(response) +
                            " \n" + str(modelData))

    result = GeqeAPI.rawResultToResultDocument(job, result)
    # save the result
    (response, result_set) = dataConnector.saveResultset(result)
    if response != 200:
        job['status'] = 'FAILED'
        dataConnector.saveJob(job)
        raise Exception("Could not save result set. error: " + str(response) +
                        " \n" + str(result_set))

    # save the results set into elastic search
    if elaticsearchConnetor is not None:
        (response, es_result) = elaticsearchConnetor.addResultSet(result_set)
        if response != 201:
            job['status'] = 'FAILED'
            dataConnector.saveJob(job)
            raise Exception("Could not save result set to es. error: " +
                            str(response) + " \n" + str(result_set))

    # update the job info
    job['status'] = 'SUCCESS'
    job['resultsetId'] = result_set['id']
    (response, job) = dataConnector.saveJob(job)
    if response != 200:
        raise Exception("could not save job status.")
Esempio n. 5
0
        raise Exception("could not save job status.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("serviceUrl", help="loop back data service url")
    parser.add_argument("--elasticsearchHost",
                        help="Host name or ip address for elastic search.")
    parser.add_argument("--elasticsearchPort",
                        type=int,
                        help="Port for elastic search defaults to 9200",
                        default=9200)
    parser.add_argument("jobId", help="The job to execute")
    args = parser.parse_args()

    dataConnector = GeqeAPI.GeqeRestHelper(args.serviceUrl)
    elaticsearchConnetor = GeqeAPI.ElasticSearchHelper(
        args.elasticsearchHost,
        port=args.elasticsearchPort) if args.elasticsearchHost else None

    # Job
    (responseCode, job) = dataConnector.getJob(args.jobId)
    if 200 != responseCode:
        raise Exception("Could not read job: " + args.jobId + ' response: ' +
                        str(responseCode))
    job['status'] = 'RUNNING'
    (responseCode, job) = dataConnector.saveJob(job)
    if 200 != responseCode:
        raise Exception("Could not save job: " + args.jobId + ' response: ' +
                        str(responseCode))
    print 'JOB: ', job
Esempio n. 6
0
def executeJob(dataConnector,elaticsearchConnetor,job,dataset,sitelist):

    #Declare Spark Context
    conf = SparkConf().setAppName("GeqeRunner")
    conf.set('spark.driver.maxResultSize','0')
    sc = SparkContext(conf = conf)
    sqlContext = SQLContext(sc)

    #Create polygon list and broadcast variable based on it
    lPolygon = shapeReader.readInShapeDocument(sitelist) if sitelist is not None else None

    queryType = job['queryType']

    jobname = job['username']+'_'+job['name']
    modelSavePath = job['modelSavePath'] if 'modelSavePath' in job else None

    datasetPath = None
    inputPartitions = None
    if job['queryType'] == 'location':
        datasetPath = dataset['location_path']
        inputPartitions = dataset['location_partitions']
    elif job['queryType'] == 'event':
        datasetPath = dataset['event_path']
        inputPartitions = dataset['event_partitions']
    else: raise ValueError("invalid query type: "+str(job['queryType']))

    dictionaryPath = dataset['dictionaryPath']
    nDataType = int(dataset['type'])

    modelData = None
    if 'geqeModelId' in job and job['geqeModelId'] != "":
        (response,modelData) = dataConnector.getModelData(job['geqeModelId'])
        if response != 200: raise Exception("Could not load geqeModel")
        #iif modelData['dictionaryPath'] != dictionaryPath:
        #    raise ValueError("dataset dictionary and model dictionary do not match")

    sNum = job['limit'] if 'limit' in job else 25

    result = None
    if modelData:
        modelPath = modelData['modelSavePath']+'/'+modelData['name']
        result = run_refindSimilarPlaces(sc,sqlContext,jobname,datasetPath,dictionaryPath,inputPartitions,sNum,modelPath,bByDate= queryType == 'event')
    elif 'location' == queryType:
        result = run_findSimilarPlaces(sc,sqlContext,jobname,datasetPath,dictionaryPath,nDataType,inputPartitions,sNum,lPolygon,modelSavePath)
    elif 'event' == queryType:
        result = run_findSimilarEvent(sc,sqlContext,jobname,datasetPath,dictionaryPath,nDataType,inputPartitions,sNum,lPolygon,modelSavePath)
    else:
        raise Exception("Invalid query type: "+queryType)


    if modelSavePath is not None:
        # save the model meta data
        modelData = {
            "name" : jobname,
            "username": job["username"],
            "queryType": job["queryType"],
            "modelSavePath": modelSavePath,
            "siteListId" : job["siteListId"],
            "datasetId" :  job["datasetId"]
        }
        (response,modelData) = dataConnector.saveModelData(modelData)
        if response != 200:
            job['status'] = 'FAILED'
            dataConnector.saveJob(job)
            raise Exception("Could not save model metadata: "+str(response)+" \n"+str(modelData))


    result = GeqeAPI.rawResultToResultDocument(job,result)
    # save the result
    (response,result_set) = dataConnector.saveResultset(result)
    if response != 200:
        job['status'] = 'FAILED'
        dataConnector.saveJob(job)
        raise Exception("Could not save result set. error: "+str(response)+" \n"+str(result_set))


    # save the results set into elastic search
    if elaticsearchConnetor is not None:
        (response,es_result) = elaticsearchConnetor.addResultSet(result_set)
        if response != 201:
            job['status'] = 'FAILED'
            dataConnector.saveJob(job)
            raise Exception("Could not save result set to es. error: "+str(response)+" \n"+str(result_set))

    # update the job info
    job['status'] = 'SUCCESS'
    job['resultsetId'] = result_set['id']
    (response,job) = dataConnector.saveJob(job)
    if response != 200:
        raise Exception("could not save job status.")
Esempio n. 7
0
        job["resultsetId"] = result_set["id"]
        (response, job) = self.service.saveJob(job)
        if response != 200:
            raise Exception("could not save job status.")

        return True


if __name__ == "__main__":
    global CLUSTER_STATUS

    service = GeqeAPI.GeqeRestHelper(conf.LOOPBACK_SERVICE)

    # insert mock dataset for front end testing, ensures a usable UI
    for shape in [1, 2]:
        dataset = GeqeAPI.getMockDataSet(shape=shape)
        (response, dataset) = service.saveDataset(dataset)
        if response != 200:
            print "response: ", response
            print dataset
            raise Exception("Could not save MOCK dataset.")

    (response, clusterStatus) = service.putStatus({"host": platform.node(), "status": "RUNNING"})
    if response != 200:
        print "response: ", response
        print clusterStatus
        raise Exception("Could not save cluster status.")

    thread = JobRunner(conf.LOOPBACK_SERVICE)
    thread.setDaemon(True)
    try:
Esempio n. 8
0
 def __init__(self, serviceUrl):
     super(JobRunner, self).__init__()
     self.url = serviceUrl
     self.service = GeqeAPI.GeqeRestHelper(self.url)
Esempio n. 9
0
            ])
        command.extend([self.service.serviceURL, job['id']])
        command = map(str, command)
        with open('lastcommand.sh', 'w') as handle:
            handle.write(' '.join(command))
        result = subprocess.call(command, stdout=stdoutFile, stderr=stderrFile)
        print 'result: ', str(result)
        stderrFile.close()
        stdoutFile.close()
        return int(result) == 0


if __name__ == '__main__':
    global CLUSTER_STATUS

    service = GeqeAPI.GeqeRestHelper(conf.LOOPBACK_SERVICE)

    (response, clusterStatus) = service.putStatus({
        'host': platform.node(),
        'status': 'RUNNING'
    })
    if response != 200:
        print 'response: ', response
        print clusterStatus
        raise Exception("Could not save cluster status.")

    thread = JobRunner(conf.LOOPBACK_SERVICE)
    thread.setDaemon(True)
    try:
        thread.start()
        while thread.isAlive():
Esempio n. 10
0
"""

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "serviceUrl",
        help="loop back data service url ie http://localhost:5500")
    parser.add_argument("username", help="geqe username")
    parser.add_argument("name", help="name of the dataset / job")
    parser.add_argument("resultSetPath", help="path to the result file.")

    parser.set_defaults(delete=False)
    args = parser.parse_args()

    dataConnector = GeqeAPI.GeqeRestHelper(args.serviceUrl)

    # create a job object
    job = {
        "name": args.name,
        "status": "WAITING",
        "queryType": "unkown",
        "limit": -1,
        "username": args.username
    }
    (response, job) = dataConnector.saveJob(job)
    if response != 200:
        print "ERROR: Could not save job. = ", response, job
        sys.exit(1)

    # save the result set
Esempio n. 11
0
        "name": args.name,
        "status": "WAITING",
        "queryType": "unkown",
        "limit" : -1,
        "username": args.username
    }
    (response,job) = dataConnector.saveJob(job)
    if response != 200:
        print "ERROR: Could not save job. = ",response,job
        sys.exit(1)


    # save the result set
    with open(args.resultSetPath,'r') as handle:
        data = json.loads(handle.read())
    result_set = GeqeAPI.rawResultToResultDocument(job,data)
    (response,result_set) = dataConnector.saveResultset(result_set)
    if response != 200:
        print "ERROR: Could not save result. = ",response,result_set
        sys.exit(1)

    job['status'] = 'SUCCESS'
    job['queryType'] = result_set['type']
    job['resultsetId'] = result_set['id']
    (response,job) = dataConnector.saveJob(job)
    if response != 200:
        print "ERROR: Could not save job. = ",response,job
        sys.exit(1)

    print 'success'
Esempio n. 12
0
                                str(response) + " \n" + str(result_set))

        time.sleep(5)
        job['status'] = 'SUCCESS'
        job['resultsetId'] = result_set['id']
        (response, job) = self.service.saveJob(job)
        if response != 200:
            raise Exception("could not save job status.")

        return True


if __name__ == '__main__':
    global CLUSTER_STATUS

    service = GeqeAPI.GeqeRestHelper(conf.LOOPBACK_SERVICE)

    # insert mock dataset for front end testing, ensures a usable UI
    for shape in [1, 2]:
        dataset = GeqeAPI.getMockDataSet(shape=shape)
        (response, dataset) = service.saveDataset(dataset)
        if response != 200:
            print 'response: ', response
            print dataset
            raise Exception("Could not save MOCK dataset.")

    (response, clusterStatus) = service.putStatus({
        'host': platform.node(),
        'status': 'RUNNING'
    })
    if response != 200: