Python ExtractObject Examples, extract_utility.ExtractObject Python Examples

Example #1

0

Show file

File: extract_check.py Project: aiddata/geo-hpc

    def __init__(self, config, boundary, dataset, data, extract_type,
                 temporal_type, version):

        self.config = config

        self.client = self.config.client

        self.c_extracts = self.client.asdf.extracts
        self.c_msr = self.client.asdf.msr

        self.base = os.path.join(config.branch_dir, "outputs/extracts",
                                 version.replace('.', '_'))

        self.boundary = boundary
        self.dataset = dataset
        self.data = data
        self.extract_type = extract_type
        self.temporal_type = temporal_type
        self.version = version

        exo = extract_utility.ExtractObject()
        self.extract_options = exo._extract_options

        if self.extract_type not in self.extract_options:
            raise Exception('invalid extract type')

        self.extract_path = None

Example #2

0

Show file

    def __init__(self, client=None):

        # self.interface = False
        # self.user_update = True

        # base path
        # self.dir_base = os.path.dirname(os.path.abspath(__file__))

        # # current datapackage fields
        # self.fields = json.load(open(self.dir_base + "/fields.json", 'r'))

        # acceptable inputs for various fields (dataset types,
        # vector formats, raster formats, etc.)
        self.types = {
            "data": {
                'raster': 'raster',
                'boundary': 'vector',
                # 'polydata': 'vector',
                # 'point': 'vector',
                # 'multipoint': 'vector',
                'release': 'vector'
                # 'document': 'other'
            },
            "file_extensions": {
                "vector": ['geojson', 'shp'],
                "raster": ['tif', 'asc']
            },
            "extracts": None,
            "group_class": ['actual', 'sub']
        }

        exo = extract_utility.ExtractObject()
        self.types['extracts'] = exo._extract_options

        # init mongo
        self.client = client
        self.c_asdf = self.client.asdf.data

Example #3

0

Show file

File: builder.py Project: MaxwellMkondiwa/geo-hpc

    if any([i not in dataset_options for i in missing_defaults]):
        sys.exit("builder.py has terminated : required option(s) missing " +
                 "from both dataset default options.")

    # gather all relevant options
    for k in required_options:
        if k in dataset_options:
            tmp_config[k] = dataset_options[k]
        else:
            tmp_config[k] = job_json['defaults'][k]

    # ==================================================

    # init / setup extract and generate qlist

    exo = extract_utility.ExtractObject(builder=True)

    exo.set_vector_path(tmp_config['bnd_absolute'])

    if tmp_config['file_mask'] == "None" and not os.path.isfile(
            tmp_config['data_base']):
        for root, dirs, files in os.walk(tmp_config['data_base']):
            for file in files:
                tmp_config['data_base'] = os.path.join(root, file)
                break
            break

    exo.set_base_path(tmp_config['data_base'])

    exo.set_years(tmp_config['years'])

Example #4

0

Show file

File: runscript.py Project: MirandaLv/extract-scripts

def tmp_worker_job(self, task_index, task_data):

    worker_tagline = 'Worker %s | Task %s - ' % (self.rank, task_index)

    # task = self.task_list[task_id]

    dataset_index = task_data[0]
    qlist_index = task_data[1]

    # dataset name
    data_name = input_json['job']['datasets'][dataset_index]['name']

    settings = input_json['job']['datasets'][dataset_index]['settings']
    item = input_json['job']['datasets'][dataset_index]['qlist'][qlist_index]

    # ==================================================

    # inputs (see jobscript_template comments for detailed descriptions
    #   of inputs)
    # * = managed by ExtractObject

    # boundary name
    bnd_name = settings['bnd_name']

    # absolute path of boundary file *
    bnd_absolute = settings['bnd_absolute']

    # folder which contains data (or data file) *
    data_base = settings['data_base']

    # string containing year information *
    year_string = settings['years']

    # file mask for dataset files *
    file_mask = settings['file_mask']

    # extract type *
    extract_type = settings['extract_type']

    # output folder
    output_base = settings['output_base']

    temporal = ''.join([str(e) for e in item[0]])
    if temporal == '':
        raster_name = data_name
        temporal = 'na'
    else:
        raster_name = data_name + "_" + temporal

    # ==================================================

    exo = extract_utility.ExtractObject()

    exo.set_vector_path(bnd_absolute)

    exo.set_base_path(data_base)

    exo.set_years(year_string)

    exo.set_file_mask(file_mask)

    if extract_type == "categorical":
        exo.set_extract_type(extract_type, settings['categories'])
    else:
        exo.set_extract_type(extract_type)

    # ==================================================

    output_dir = os.path.join(output_base, bnd_name, "cache", data_name)

    # creates directories
    try:
        os.makedirs(output_dir)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

    # ==================================================

    # generate raster path
    if exo._run_option == "1":
        raster = item[1]
    else:
        raster = exo._base_path + "/" + item[1]

    # run extract
    print((worker_tagline + 'running extract: ' +
           '\n\tvector: (%s) %s\n\traster: (%s) %s\n\tmethod: %s ') %
          (bnd_name, bnd_absolute, raster_name, raster, extract_type))

    run_data = exo.run_extract(raster)

    # generate output path
    file_name = '.'.join([data_name, temporal, exo._extract_type]) + ".csv"
    output = os.path.join(output_dir, file_name)

    run_data = exo.export_to_csv(run_data, output)
    # run_data = exo.export_to_db(run_data)

    try:
        Te_start = int(time.time())
        for _ in run_data:
            pass
        Te_run = int(time.time() - Te_start)

        extract_status = 1
        print((worker_tagline + 'completed extract in %s seconds' +
               '\n\tvector: (%s) %s\n\traster: (%s) %s\n\tmethod: %s ') %
              (Te_run, bnd_name, bnd_absolute, raster_name, raster,
               extract_type))

    except MemoryError as e:
        extract_status = -2
        print((worker_tagline + 'memory error (%s)' +
               '\n\tvector: (%s) %s\n\traster: (%s) %s\n\tmethod: %s ') %
              (extract_status, bnd_name, bnd_absolute, raster_name, raster,
               extract_type))

    except Exception as e:
        extract_status = -1
        print((worker_tagline + 'unknown error (%s)' +
               '\n\tvector: (%s) %s\n\traster: (%s) %s\n\tmethod: %s ') %
              (extract_status, bnd_name, bnd_absolute, raster_name, raster,
               extract_type))

        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stdout)

    return extract_status

Example #5

0

Show file

def tmp_worker_job(self, task_index, task_data):

    worker_tagline = "Worker {0} | Task {1} - ".format(self.rank, task_index)

    # =================================

    # inputs (see jobscript_template comments for detailed
    # descriptions of inputs)
    # * = managed by ExtractObject

    # absolute path of boundary file *
    bnd_absolute = task_data['bnd_absolute']

    # raster file or dataset directory *
    data_path = task_data['data_path']

    # extract type *
    extract_type = task_data['extract_type']

    # boundary, dataset and raster names
    bnd_name = task_data['bnd_name']
    dataset_name = task_data['dataset_name']
    data_name = task_data['data_name']

    # output directory
    output_base = task_data['output_base']

    # =================================

    exo = extract_utility.ExtractObject()

    exo.set_vector_path(bnd_absolute)

    exo.set_base_path(data_path)

    category_map = None
    if extract_type in ["categorical", "encoded"]:
        category_map = task_data['category_map']

    exo.set_extract_type(extract_type, category_map=category_map)


    # =================================

    output_dir = os.path.join(output_base, bnd_name, "cache", dataset_name)

    # creates directories
    try:
        os.makedirs(output_dir)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

    # =================================

    # generate raster path
    raster = data_path


    # run extract

    print ("{0} running extract: "
           "\n\tvector: ({2}) {3}"
           "\n\traster: ({4}) {5}"
           "\n\tmethod: {6}").format(
                worker_tagline, None, bnd_name, bnd_absolute,
                data_name, raster, extract_type)


    run_data = exo.run_feature_extract(raster, pixel_limit=job.pixel_limit)


    # generate output path
    temporal = data_name[data_name.rindex('_')+1:]
    temporal = temporal if temporal != '' else 'na'
    file_name = '.'.join([dataset_name, temporal, exo._extract_type]) + ".csv"
    output = os.path.join(output_dir, file_name)

    run_data = exo.export_to_csv(run_data, output)


    # run_data = exo.export_to_db(
    #     stats = run_data,
    #     client = client,
    #     bnd_name = bnd_name,
    #     data_name = data_name,
    #     ex_method = extract_type,
    #     classification = task_data['classification'],
    #     ex_version = version
    # )


    try:
        Te_start = int(time.time())
        for _ in run_data: pass
        Te_run = int(time.time() - Te_start)

        extract_status = 1
        print ("{0} completed extract in {1} seconds"
               "\n\tvector: ({2}) {3}"
               "\n\traster: ({4}) {5}"
               "\n\tmethod: {6}").format(
                    worker_tagline, Te_run, bnd_name, bnd_absolute,
                    data_name, raster, extract_type)


    except MemoryError as e:
        extract_status = -2

        print ("{0} memory error ({1})"
               "\n\tvector: ({2}) {3}"
               "\n\traster: ({4}) {5}"
               "\n\tmethod: {6}").format(
                    worker_tagline, extract_status, bnd_name, bnd_absolute,
                    data_name, raster, extract_type)


    except Exception as e:
        extract_status = -1

        print ("{0} unknown error ({1})"
               "\n\tvector: ({2}) {3}"
               "\n\traster: ({4}) {5}"
               "\n\tmethod: {6}").format(
                    worker_tagline, extract_status, bnd_name, bnd_absolute,
                    data_name, raster, extract_type)


        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                                  limit=2, file=sys.stdout)


    # update status of item in extract queue
    update_extract = c_extracts.update_one({
        '_id': task_data['_id']
    }, {
        '$set': {
            'status': extract_status,
            'update_time': int(time.time()),
            'complete_time': int(time.time())
        }
    }, upsert=False)


    return extract_status