Esempio n. 1
0
 def createJsonRowJob(item, tmpdir):
     jsonFilename = item['name'] + '.json'
     jsonFilepath = os.path.join(tmpdir, item['name'], jsonFilename)
     # take the only entry of the array
     jsonRow = jsonArrayHead(jsonFilepath, limit=1)[0]
     item['meta']['minerva']['json_row'] = jsonRow
Esempio n. 2
0
 def createJsonRowJob(item, tmpdir):
     jsonFilename = item['name'] + '.json'
     jsonFilepath = os.path.join(tmpdir, item['name'], jsonFilename)
     # take the only entry of the array
     jsonRow = jsonArrayHead(jsonFilepath, limit=1)[0]
     item['meta']['minerva']['json_row'] = jsonRow
Esempio n. 3
0
def run(job):
    job_model = ModelImporter.model('job', 'jobs')
    job_model.updateJob(job, status=JobStatus.RUNNING)

    try:

        configFile = os.path.join(os.path.dirname(__file__), "bsve.json")
        if os.path.exists(configFile):
            bsveConfig = json.load(open(configFile))['bsve']
        else:
            bsveConfig = {}

        kwargs = job['kwargs']
        bsveSearchParams = kwargs['params']['bsveSearchParams']
        datasetId = str(kwargs['dataset']['_id'])
        # TODO better to create a job token rather than a user token?
        token = kwargs['token']

        bsveUtility = BsveUtility(
            user=bsveConfig.get(
                'USER_NAME', os.environ.get('BSVE_USERNAME')),
            apikey=bsveConfig.get(
                'API_KEY', os.environ.get('BSVE_APIKEY')),
            secret=bsveConfig.get(
                'SECRET_KEY', os.environ.get('BSVE_SECRETKEY')),
            base=bsveConfig.get('BASE_URL')
        )

        # TODO sleeping in async thread, probably starving other tasks
        # would be better to split this into two or more parts, creating
        # additional jobs as needed
        searchResult = bsveUtility.search(bsveSearchParams)

        # write the output to a json file
        tmpdir = tempfile.mkdtemp()
        outFilepath = tempfile.mkstemp(suffix='.json', dir=tmpdir)[1]
        writer = open(outFilepath, 'w')
        writer.write(json.dumps(searchResult))
        writer.close()

        # rename the file so it will have the right name when uploaded
        # could probably be done post upload
        outFilename = 'search.json'
        humanFilepath = os.path.join(tmpdir, outFilename)
        shutil.move(outFilepath, humanFilepath)

        # connect to girder and upload the file
        # TODO will probably have to change this from local to girder worker
        # so that can work on worker machine
        # at least need host connection info
        girderPort = config.getConfig()['server.socket_port']
        client = girder_client.GirderClient(port=girderPort)
        client.token = token['_id']

        client.uploadFileToItem(datasetId, humanFilepath)

        # TODO some stuff here using models will only work on a local job
        # will have to be rewritten using girder client to work in girder worker
        # non-locally

        user_model = ModelImporter.model('user')
        user = user_model.load(job['userId'], force=True)
        item_model = ModelImporter.model('item')

        dataset = item_model.load(datasetId, level=AccessType.WRITE, user=user)
        minerva_metadata = mM(dataset)

        file_model = ModelImporter.model('file')
        existing = file_model.findOne({
            'itemId': dataset['_id'],
            'name': outFilename
        })
        if existing:
            minerva_metadata['original_files'] = [{
                '_id': existing['_id'],
                'name': outFilename
            }]
        else:
            raise (Exception('Cannot find file %s in dataset %s' %
                   (outFilename, datasetId)))

        jsonRow = jsonArrayHead(humanFilepath, limit=1)[0]
        minerva_metadata['json_row'] = jsonRow

        # Generate the geojson for this dataset and set
        # dataset_type = geojson

        geojsonFilename = 'search.geojson'
        geojsonFilepath = os.path.join(tmpdir, geojsonFilename)

        mapping = {
            "dateKeypath": "",
            "latitudeKeypath": "data.Latitude",
            "longitudeKeypath": "data.Longitude"
        }

        geojsonMapper = GeoJsonMapper(objConverter=None, mapping=mapping)
        objects = jsonObjectReader(humanFilepath)
        geojsonMapper.mapToJsonFile(tmpdir, objects, geojsonFilepath)

        client.uploadFileToItem(datasetId, geojsonFilepath)
        shutil.rmtree(tmpdir)

        minerva_metadata['mapper'] = mapping
        minerva_metadata['dataset_type'] = 'geojson'

        existing = file_model.findOne({
            'itemId': dataset['_id'],
            'name': geojsonFilename
        })
        if existing:
            minerva_metadata['geojson_file'] = {
                '_id': existing['_id'],
                'name': geojsonFilename
            }
        else:
            raise (Exception('Cannot find file %s in dataset %s' %
                   (geojsonFilename, datasetId)))

        mM(dataset, minerva_metadata)
        job_model.updateJob(job, status=JobStatus.SUCCESS)
    except Exception:
        t, val, tb = sys.exc_info()
        log = '%s: %s\n%s' % (t.__name__, repr(val), traceback.extract_tb(tb))
        # TODO only works locally
        job_model.updateJob(job, status=JobStatus.ERROR, log=log)
        raise
Esempio n. 4
0
def run(job):
    job_model = ModelImporter.model('job', 'jobs')
    job_model.updateJob(job, status=JobStatus.RUNNING)

    try:
        kwargs = job['kwargs']
        # TODO better to create a job token rather than a user token?
        token = kwargs['token']
        datasetId = str(kwargs['dataset']['_id'])

        # connect to girder and upload the file
        # TODO will probably have to change this from local to girder worker
        # so that can work on worker machine
        # at least need host connection info
        girderPort = config.getConfig()['server.socket_port']
        client = girder_client.GirderClient(port=girderPort)
        client.token = token['_id']

        # Get datasource
        source = client.getItem(kwargs['params']['sourceId'])
        esUrl = 'https://%s@%s' % (decryptCredentials(
            source['meta']['minerva']['elasticsearch_params']['credentials']),
            source['meta']['minerva']['elasticsearch_params']['host_name'])
        es = Elasticsearch([esUrl])

        # TODO sleeping in async thread, probably starving other tasks
        # would be better to split this into two or more parts, creating
        # additional jobs as needed
        searchResult = es.search(
            index=source['meta']['minerva']['elasticsearch_params']['index'],
            body=json.loads(kwargs['params']['searchParams']))

        # write the output to a json file
        tmpdir = tempfile.mkdtemp()
        outFilepath = tempfile.mkstemp(suffix='.json', dir=tmpdir)[1]
        writer = open(outFilepath, 'w')
        writer.write(json.dumps(searchResult))
        writer.close()

        # rename the file so it will have the right name when uploaded
        # could probably be done post upload
        outFilename = 'search.json'
        humanFilepath = os.path.join(tmpdir, outFilename)
        shutil.move(outFilepath, humanFilepath)

        client.uploadFileToItem(datasetId, humanFilepath)

        # TODO some stuff here using models will only work on a local job
        # will have to be rewritten using girder client to work in girder worker
        # non-locally

        user_model = ModelImporter.model('user')
        user = user_model.load(job['userId'], force=True)
        item_model = ModelImporter.model('item')
        # TODO only works locally
        dataset = item_model.load(datasetId, level=AccessType.WRITE, user=user)
        metadata = dataset['meta']
        minerva_metadata = metadata['minerva']

        # TODO only works locally
        file_model = ModelImporter.model('file')
        existing = file_model.findOne({
            'itemId': dataset['_id'],
            'name': outFilename
        })
        if existing:
            minerva_metadata['original_files'] = [{
                '_id': existing['_id'],
                'name': outFilename
            }]
        else:
            raise (Exception('Cannot find file %s in dataset %s' %
                   (outFilename, datasetId)))

        jsonRow = jsonArrayHead(humanFilepath, limit=1)[0]
        minerva_metadata['json_row'] = jsonRow

        shutil.rmtree(tmpdir)

        metadata['minerva'] = minerva_metadata
        # TODO only works locally
        item_model.setMetadata(dataset, metadata)
        # TODO only works locally
        job_model.updateJob(job, status=JobStatus.SUCCESS)
    except Exception:
        t, val, tb = sys.exc_info()
        log = '%s: %s\n%s' % (t.__name__, repr(val), traceback.extract_tb(tb))
        # TODO only works locally
        job_model.updateJob(job, status=JobStatus.ERROR, log=log)
        raise