Esempio n. 1
0
def migrate(test_db, prod_db, products, expressions):
    """
    Migrate datasets from source database to target database.
    Supports query expressions like ``datacube``.
    Additionally, multiple products may be specified with
    multiple ``--product`` options.
    """
    # connect to the source database
    prod_index = index_connect(prod_db, application_name='test-env')

    uris = collect_uris(prod_index, products, expressions)
    rules = load_rules_from_types(prod_index)

    # connect to the target database
    test_index = index_connect(test_db, application_name='test-env')
    index_uris(test_index, list(uris), rules)
Esempio n. 2
0
def _get_index(config: LocalConfig, variant: str) -> Index:
    # Avoid long names as they will print warnings all the time.
    short_name = variant.replace("_", "")[:20]
    index: Index = index_connect(config,
                                 application_name=f"gen.{short_name}",
                                 validate_connection=False)
    return index
Esempio n. 3
0
def check(config_file):
    """
    Verify & view current configuration
    """
    echo('Version:\t' + style(str(datacube.__version__), bold=True))
    echo('Config files:\t' +
         style(','.join(config_file.files_loaded), bold=True))
    echo('Host:\t\t' + style('{}:{}'.format(
        config_file.db_hostname or 'localhost', config_file.db_port or '5432'),
                             bold=True))
    echo('Database:\t' +
         style('{}'.format(config_file.db_database), bold=True))
    echo('User:\t\t' + style('{}'.format(config_file.db_username), bold=True))

    echo()
    echo('Valid connection:\t', nl=False)
    try:
        index = index_connect(local_config=config_file)
        echo(style('YES', bold=True))
        for role, user, description in index.users.list_users():
            if user == config_file.db_username:
                echo('You have %s privileges.' %
                     style(role.upper(), bold=True))
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Esempio n. 4
0
def convert_descriptor_query_to_search_query(descriptor=None, index=None):
    descriptor = descriptor or {}
    index = index or index_connect()

    known_fields = index.datasets.get_fields().keys()

    search_query = {
        key: descriptor[key]
        for key in descriptor.keys() if key in known_fields
    }
    unknown_fields = [
        key for key in descriptor.keys()
        if key not in known_fields and key not in
        ['variables', 'variable', 'dimensions', 'dimension', 'storage_type']
    ]
    if unknown_fields:
        _LOG.warning(
            "Some of the fields in the query are unknown and will be ignored: %s",
            ', '.join(unknown_fields))

    if 'storage_type' in descriptor:
        storage_type_name = descriptor.get('storage_type', '')
        storage_type = index.storage.types.get_by_name(storage_type_name)
        if storage_type:
            search_query['type'] = storage_type.id

    descriptor_dimensions = descriptor.get('dimensions', {})
    descriptor_dimensions.update(descriptor.get('dimension', {}))
    search_query.update(
        convert_descriptor_dims_to_search_dims(descriptor_dimensions))
    return search_query
Esempio n. 5
0
def check(local_config: LocalConfig):
    """
    Verify & view current configuration
    """
    def echo_field(name, value):
        echo('{:<15}'.format(name + ':') + style(str(value), bold=True))

    echo_field('Version', datacube.__version__)
    echo_field('Config files', ','.join(local_config.files_loaded))
    echo_field(
        'Host', '{}:{}'.format(local_config['db_hostname'] or 'localhost',
                               local_config.get('db_port', None) or '5432'))

    echo_field('Database', local_config['db_database'])
    echo_field('User', local_config['db_username'])
    echo_field('Environment', local_config['env'])
    echo_field('Index Driver', local_config['index_driver'])

    echo()
    echo('Valid connection:\t', nl=False)
    try:
        index = index_connect(local_config=local_config)
        echo(style('YES', bold=True))
        for role, user, description in index.users.list_users():
            if user == local_config['db_username']:
                echo('You have %s privileges.' %
                     style(role.upper(), bold=True))
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Esempio n. 6
0
def convert_request_args_to_descriptor_query(request=None, index=None):
    request_remaining = request.copy() or {}
    index = index or index_connect()

    descriptor_request = dict()

    if 'variables' in request:
        descriptor_request['variables'] = request_remaining.pop('variables')

    if 'storage_type' in request_remaining:
        descriptor_request['storage_type'] = request_remaining.pop(
            'storage_type')

    known_fields = [
        field_name
        for field_name, field in index.datasets.get_fields().items()
        if not isinstance(field, RangeDocField)
    ]  # Assume range fields will be also dimensions...

    for field in request:
        if field in known_fields:
            descriptor_request[field] = request_remaining.pop(field)

    dimensions = request_remaining.pop('dimensions', {})

    for k, v in request_remaining.items():
        if isinstance(v, slice):
            dimensions[k] = {'array_range': (v.start, v.stop)}
        else:
            dimensions[k] = {'range': v}  # assume range or single value
    descriptor_request['dimensions'] = dimensions
    return descriptor_request
Esempio n. 7
0
def init_db(ingestion_request_id=None):
    """Creates a new database and initializes it with the standard agdc schema

    Creates a new database named the user using a psql call and uses the agdc api
    to initalize the schema.

    """
    ingestion_request = IngestionRequest.objects.get(pk=ingestion_request_id)

    cmd = "createdb -U dc_user -h {} {}".format(
        settings.MASTER_NODE, ingestion_request.get_database_name())
    os.system(cmd)

    config = get_config(ingestion_request.get_database_name())
    index = index_connect(local_config=config, validate_connection=False)
    try:
        index.init_db(with_default_types=True, with_permissions=True)
        index.metadata_types.check_field_indexes(allow_table_lock=True,
                                                 rebuild_indexes=True,
                                                 rebuild_views=True)
    except:
        index.close()
        raise

    index.close()
Esempio n. 8
0
def index(local_config, uninitialised_postgres_db):
    """
    :type initialised_postgres_db: datacube.drivers.postgres._connections.PostgresDb
    """
    index = index_connect(local_config, validate_connection=False)
    index.init_db()
    return index
Esempio n. 9
0
def convert_request_args_to_descriptor_query(request=None, index=None):
    request_remaining = request.copy() or {}
    index = index or index_connect()

    descriptor_request = dict()

    if 'variables' in request:
        descriptor_request['variables'] = request_remaining.pop('variables')

    if 'storage_type' in request_remaining:
        descriptor_request['storage_type'] = request_remaining.pop('storage_type')

    known_fields = [field_name for field_name, field in index.datasets.get_fields().items()
                    if not isinstance(field, RangeDocField)]  # Assume range fields will be also dimensions...

    for field in request:
        if field in known_fields:
            descriptor_request[field] = request_remaining.pop(field)

    dimensions = request_remaining.pop('dimensions', {})

    for k, v in request_remaining.items():
        if isinstance(v, slice):
            dimensions[k] = {'array_range': (v.start, v.stop)}
        else:
            dimensions[k] = {'range': v} # assume range or single value
    descriptor_request['dimensions'] = dimensions
    return descriptor_request
Esempio n. 10
0
 def with_index(*args, **kwargs):
     ctx = click.get_current_context()
     try:
         index = index_connect(ctx.obj['config_file'],
                               application_name=app_name or ctx.command_path,
                               validate_connection=expect_initialised)
         return f(index, *args, **kwargs)
     except (OperationalError, ProgrammingError) as e:
         handle_exception('Error Connecting to database: %s', e)
Esempio n. 11
0
def check(config_file):
    """
    Verify & view current configuration
    """
    echo('Read configurations files from: %s' % config_file.files_loaded)
    echo('Host: {}:{}'.format(config_file.db_hostname or 'localhost', config_file.db_port or '5432'))
    echo('Database: {}'.format(config_file.db_database))
    echo('User: {}'.format(config_file.db_username))

    echo('\n')
    echo('Attempting connect')
    try:
        index_connect(local_config=config_file)
        echo('Success.')
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Esempio n. 12
0
def check(config_file):
    """
    Verify & view current configuration
    """
    echo('Host: {}:{}'.format(config_file.db_hostname or 'localhost', config_file.db_port or '5432'))
    echo('Database: {}'.format(config_file.db_database))
    echo('User: {}'.format(config_file.db_username))

    echo('\n')
    echo('Attempting connect')
    try:
        index_connect(local_config=config_file)
        echo('Success.')
    #: pylint: disable=broad-except
    except Exception:
        _LOG.exception("Connection error")
        echo('Connection error', file=sys.stderr)
        click.get_current_context().exit(1)
Esempio n. 13
0
 def with_index(*args, **kwargs):
     ctx = click.get_current_context()
     try:
         index = index_connect(ctx.obj['config_file'],
                               application_name=app_name or ctx.command_path,
                               validate_connection=expect_initialised)
         _LOG.debug("Connected to datacube index: %s", index)
         return f(index, *args, **kwargs)
     except (OperationalError, ProgrammingError) as e:
         handle_exception('Error Connecting to database: %s', e)
Esempio n. 14
0
def check(config_file):
    """
    Verify & view current configuration
    """
    echo('Read configurations files from: %s' % config_file.files_loaded)
    echo('Host: {}:{}'.format(config_file.db_hostname or 'localhost',
                              config_file.db_port or '5432'))
    echo('Database: {}'.format(config_file.db_database))
    echo('User: {}'.format(config_file.db_username))

    echo('\n')
    echo('Attempting connect')
    try:
        index_connect(local_config=config_file)
        echo('Success.')
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Esempio n. 15
0
def main(folders: Iterable[str],
         dry_run: bool,
         queue: str,
         project: str,
         work_folder: str,
         cache_folder: str,
         max_jobs: int,
         concurrent_jobs: int,
         submit_limit: int):
    """
    Submit PBS jobs to run dea-sync

    Note that this is currently specific to tiled products, as it expects their folder naming conventions
    when splitting up jobs. TODO generalise function task_split()

    Example usage: dea-submit-sync 5fc /g/data/fk4/datacube/002/LS5_TM_FC

    5fc is just the name for the job: subsequent resubmissions will not rerun jobs with the same name
    if output files exist.

    A run folder is used (defaulting to `runs` in current dir) for storing output status.
    """
    input_paths = [Path(folder).absolute() for folder in folders]

    with index_connect(application_name='sync-submit') as index:
        collections.init_nci_collections(index)
        submitter = SyncSubmission(cache_folder, project, queue, dry_run, verbose=True, workers=4)
        click.echo(
            "{} input path(s)".format(len(input_paths))
        )
        tasks = _paths_to_tasks(input_paths)
        click.echo(
            "Found {} tasks across collection(s): {}".format(
                len(tasks),
                ', '.join(set(t.collection.name for t in tasks))
            )
        )

        if len(tasks) > max_jobs:
            click.echo(
                "Grouping (max_jobs={})".format(max_jobs)
            )
        tasks = group_tasks(tasks, maximum=max_jobs)

        total_datasets = sum(t.dataset_count for t in tasks)
        click.secho(
            "Submitting {} total jobs with {} datasets (avg {:.2f} each)...".format(
                len(tasks),
                total_datasets,
                total_datasets / len(tasks)
            ),
            bold=True
        )

        _find_and_submit(tasks, work_folder, concurrent_jobs, submit_limit, submitter)
Esempio n. 16
0
 def with_index(*args, **kwargs):
     ctx = click.get_current_context()
     application_name = app_name or re.sub('[^0-9a-zA-Z]+', '-',
                                           ctx.command_path)
     try:
         index = index_connect(ctx.obj['config_file'],
                               application_name=application_name,
                               validate_connection=expect_initialised)
         return f(index, *args, **kwargs)
     except (OperationalError, ProgrammingError) as e:
         handle_exception('Error Connecting to database: %s', e)
Esempio n. 17
0
def setup(config, init_users):
    """ Setup a test database environment. """
    # should these go into a log?
    command = CREATE_DATABASE_TEMPLATE.format(**as_dict(config))
    click.echo(psql_command(command, config))

    # do not validate database (nothing there yet)
    index = index_connect(config, application_name='test-env',
                          validate_connection=False)

    return init_dea(index, init_users)
Esempio n. 18
0
    def __init__(self, config):
        self.remote_host = config['remote_host']
        self.remote_user = config['remote_user']
        self.db_password = config['db_password']
        self.remote_dir = config['remote_dir']
        self.local_dir = config['local_dir']
        self.replication_defns = config['replicated_data']

        self.client = None
        self.sftp = None
        self.tunnel = None
        self.remote_dc_config = None
        self.remote_dc = None
        self.local_index = index_connect()
Esempio n. 19
0
def add_source_datasets(ingestion_request_id=None):
    """Populate the newly created database with source datasets that match the criteria

    Searches for datasets using the search criteria found on the IngestionRequest model and populates
    the newly created database with the new data. The dataset type's id is reset to 0 to prevent collisions in
    the agdc script.

    A dataset type, datasets, dataset_locations, and dataset_sources are added to the new database.
    """

    ingestion_request = IngestionRequest.objects.get(pk=ingestion_request_id)
    ingestion_request.update_status(
        "WAIT", "Populating database with source datasets...")

    config = get_config(ingestion_request.user)
    index = index_connect(local_config=config, validate_connection=False)

    dataset_type = DatasetType.objects.using('agdc').get(
        id=ingestion_request.dataset_type_ref)
    filtering_options = {
        key: getattr(ingestion_request, key)
        for key in [
            'dataset_type_ref', 'start_date', 'end_date', 'latitude_min',
            'latitude_max', 'longitude_min', 'longitude_max'
        ]
    }
    datasets = list(Dataset.filter_datasets(filtering_options))

    dataset_locations = DatasetLocation.objects.using('agdc').filter(
        dataset_ref__in=datasets)
    dataset_sources = DatasetSource.objects.using('agdc').filter(
        dataset_ref__in=datasets)

    create_db(ingestion_request.user)

    dataset_type.id = 0
    dataset_type.save(using=ingestion_request.user)

    for dataset in datasets:
        dataset.dataset_type_ref_id = 0

    Dataset.objects.using(ingestion_request.user).bulk_create(datasets)
    DatasetLocation.objects.using(
        ingestion_request.user).bulk_create(dataset_locations)
    DatasetSource.objects.using(
        ingestion_request.user).bulk_create(dataset_sources)

    close_db(ingestion_request.user)
    index.close()
Esempio n. 20
0
    def post(self, request):
        """Add a dataset type to the database

        Using forms, create a json representation of a dataset type and attempt to index it in the database.

        POST Data:
            Measurement forms, metadata forms
        """
        if not request.user.is_superuser:
            return JsonResponse({'status': "ERROR", 'message': "Only superusers can add or update datasets."})

        form_data = request.POST
        measurements = json.loads(form_data.get('measurements'))
        metadata = json.loads(form_data.get('metadata_form'))
        #each measurement_form contains a dict of other forms..
        measurement_forms = [utils.create_measurement_form(measurements[measurement]) for measurement in measurements]
        #just a single form
        metadata_form = forms.DatasetTypeMetadataForm(metadata)

        valid, error = utils.validate_dataset_type_forms(metadata_form, measurement_forms)
        if not valid:
            return JsonResponse({'status': "ERROR", 'message': error})

        if models.DatasetType.objects.using('agdc').filter(name=metadata_form.cleaned_data['name']).exists():
            return JsonResponse({
                'status':
                "ERROR",
                'message':
                'A dataset type already exists with the entered name. Please enter a new name for your dataset and ensure that the definition is different.'
            })

        #since everything is valid, now create yaml from defs..
        product_def = utils.dataset_type_definition_from_forms(metadata_form, measurement_forms)

        conf_path = '/home/' + settings.LOCAL_USER + '/Datacube/data_cube_ui/config/.datacube.conf'
        index = index_connect(local_config=conf_path)
        try:
            type_ = index.products.from_doc(product_def)
            index.products.add(type_)
        except:
            return JsonResponse({
                'status':
                "ERROR",
                'message':
                'Invalid product definition. Please contact a system administrator if this problem persists.'
            })

        return JsonResponse({'status': 'OK', 'message': 'Your dataset type has been added to the database.'})
Esempio n. 21
0
def get_storage_units(descriptor_request=None, index=None, is_diskless=False):
    '''
    Given a descriptor query, get the storage units covered
    :param descriptor_request dict of requests
    :param is_diskless: (default False) If True, use a light-weight class that only reads the files for
                        data not stored in the db, such as irregular variables
    :return: StorageUnitCollection
    '''
    index = index or index_connect()
    query = convert_descriptor_query_to_search_query(descriptor_request, index)
    _LOG.debug("Database storage search %s", query)
    sus = index.storage.search(**query)
    storage_units_by_type = defaultdict(StorageUnitCollection)
    for su in sus:
        unit = make_storage_unit(su, is_diskless=is_diskless)
        storage_units_by_type[su.storage_type.name].append(unit)
    return storage_units_by_type
Esempio n. 22
0
def run_ingestion(ingestion_definition):
    """Kick off the standard system database ingestion process using a user defined configuration

    Args:
        ingestion_definition: dict representing a Data Cube ingestion def produced using the utils func.

    Returns:
        The primary key of the new dataset type.
    """
    conf_path = os.environ.get('DATACUBE_CONFIG_PATH')
    index = index_connect(local_config=LocalConfig.find([conf_path]))

    source_type, output_type = ingest.make_output_type(index, ingestion_definition)
    ingestion_work.delay(output_type, source_type, ingestion_definition)

    index.close()
    return output_type.id
Esempio n. 23
0
def get_storage_units(descriptor_request=None, index=None, is_diskless=False):
    '''
    Given a descriptor query, get the storage units covered
    :param descriptor_request dict of requests
    :param is_diskless: (default False) If True, use a light-weight class that only reads the files for
                        data not stored in the db, such as irregular variables
    :return: StorageUnitCollection
    '''
    index = index or index_connect()
    query = convert_descriptor_query_to_search_query(descriptor_request, index)
    _LOG.debug("Database storage search %s", query)
    sus = index.storage.search(**query)
    storage_units_by_type = defaultdict(StorageUnitCollection)
    for su in sus:
        unit = make_storage_unit(su, is_diskless=is_diskless)
        storage_units_by_type[su.storage_type.name].append(unit)
    return storage_units_by_type
Esempio n. 24
0
def run_ingestion(ingestion_definition):
    """Kick off the standard system database ingestion process using a user defined configuration

    Args:
        ingestion_definition: dict representing a Data Cube ingestion def produced using the utils func.

    Returns:
        The primary key of the new dataset type.
    """
    conf_path = '/home/' + settings.LOCAL_USER + '/Datacube/NE-GeoCloud/config/.datacube.conf'
    index = index_connect(local_config=LocalConfig.find([conf_path]))

    source_type, output_type = ingest.make_output_type(index, ingestion_definition)
    ingestion_work.delay(output_type, source_type, ingestion_definition)

    index.close()
    return output_type.id
Esempio n. 25
0
        def with_index(local_config: config.LocalConfig, *args, **kwargs):
            command_path = click.get_current_context().command_path
            try:
                index = index_connect(local_config,
                                      application_name=app_name
                                      or command_path,
                                      validate_connection=expect_initialised)
                _LOG.debug("Connected to datacube index: %s", index)
            except (OperationalError, ProgrammingError) as e:
                handle_exception('Error Connecting to database: %s', e)
                return

            try:
                return f(index, *args, **kwargs)
            finally:
                index.close()
                del index
Esempio n. 26
0
def index_datasets(path, index=None):
    """
    Discover datasets in path and add them to the index

    :type path: pathlib.Path
    :type index: datacube.index._api.Index
    :rtype: list[datacube.model.Dataset]
    """
    index = index or index_connect()

    metadata_path = ui.get_metadata_path(path)
    if not metadata_path or not metadata_path.exists():
        raise ValueError('No supported metadata docs found for dataset {}'.format(path))

    datasets = [index.datasets.add(metadata_doc, metadata_path)
                for metadata_path, metadata_doc
                in ui.read_documents(metadata_path)]
    _LOG.info('Indexed datasets %s', path)
    return datasets
Esempio n. 27
0
def ingest_subset(ingestion_request_id=None):
    """Run the ingestion process on the new database

    Open a connection to the new database and run ingestion based on the
    ingestion configuration found on the IngestionRequest model.

    """

    ingestion_request = IngestionRequest.objects.get(pk=ingestion_request_id)

    config = get_config(ingestion_request.get_database_name())
    index = index_connect(local_config=config, validate_connection=True)

    # Thisis done because of something that the agdc guys do in ingest: https://github.com/opendatacube/datacube-core/blob/develop/datacube/scripts/ingest.py#L168
    ingestion_request.ingestion_definition[
        'filename'] = "ceos_data_cube_sample.yaml"
    try:
        # source_type, output_type = ingest.make_output_type(index, ingestion_request.ingestion_definition)

        source_type = index.products.get_by_name(
            ingestion_request.ingestion_definition['source_type'])
        output_type = index.products.add(ingest.morph_dataset_type(
            source_type, ingestion_request.ingestion_definition),
                                         allow_table_lock=True)

        tasks = list(
            ingest.create_task_list(index, output_type, None, source_type,
                                    ingestion_request.ingestion_definition))

        ingestion_request.total_storage_units = len(tasks)
        ingestion_request.update_status("WAIT",
                                        "Starting the ingestion process...")

        executor = SerialExecutor()
        successful, failed = ingest.process_tasks(
            index, ingestion_request.ingestion_definition, source_type,
            output_type, tasks, 3200, executor)
    except:
        index.close()
        raise

    index.close()
Esempio n. 28
0
def store_datasets(datasets, index=None, executor=SerialExecutor()):
    """
    Create any necessary storage units for the given datasets.

    Find matching storage_types for datasets
    Create storage units for datasets according to the storage_type
    Add storage units to the index

    :type datasets: list[datacube.model.Dataset]
    :type index: datacube.index._api.Index
    """
    index = index or index_connect()

    storage_types = find_storage_types_for_datasets(datasets, index)

    storage_units = []
    for storage_type, datasets in storage_types.items():
        _LOG.info('Storing %s dataset(s) using %s', len(datasets), storage_type)
        storage_units += create_storage_units(datasets, storage_type, executor=executor)

    index.storage.add_many(executor.result(value) for value in storage_units)
Esempio n. 29
0
def find_storage_types_for_datasets(datasets, index=None):
    """
    Find matching storage_types for datasets

    Return a dictionary, keys are storage_types, values are lists of datasets

    :type datasets: list[datacube.model.Dataset]
    :type index: datacube.index._api.Index
    :rtype dict[datacube.model.StorageType, list[datacube.model.Dataset]]
    """
    # TODO: Move to storage-types
    index = index or index_connect()

    storage_types = defaultdict(list)
    for dataset in datasets:
        dataset_storage_types = index.storage.types.get_for_dataset(dataset)
        if not dataset_storage_types:
            raise ValueError('No storage types found for %s' % dataset.local_path)
        for storage_type in dataset_storage_types:
            storage_types[storage_type.id].append(dataset)
    return {index.storage.types.get(id): datasets for id, datasets in storage_types.items()}
Esempio n. 30
0
def check(config_file):
    """
    Verify & view current configuration
    """
    echo('Version: %s' % datacube.__version__)
    echo('Read configurations files from: %s' % config_file.files_loaded)
    echo('Host: {}:{}'.format(config_file.db_hostname or 'localhost', config_file.db_port or '5432'))
    echo('Database: {}'.format(config_file.db_database))
    echo('User: {}'.format(config_file.db_username))

    echo('\n')
    echo('Attempting connect')
    try:
        index = index_connect(local_config=config_file)
        echo('Success.')
        for role, user, description in index.users.list_users():
            if user == config_file.db_username:
                echo('You have %s privileges.' % role.upper())
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Esempio n. 31
0
def ingestion_work(output_type, source_type, ingestion_definition):
    """Run the ingestion process for a user defined configuration

    Args:
        output_type, source_type: types produced by ingest.make_output_type
        ingestion_definition: dict representing a Data Cube ingestion def produced using the utils func.
    """
    conf_path = os.environ.get('DATACUBE_CONFIG_PATH')
    index = index_connect(local_config=LocalConfig.find([conf_path]))

    tasks = ingest.create_task_list(index, output_type, None, source_type, ingestion_definition)

    # this is a dry run
    # paths = [ingest.get_filename(ingestion_definition, task['tile_index'], task['tile'].sources) for task in tasks]
    # ingest.check_existing_files(paths)

    # this actually ingests stuff
    successful, failed = ingest.process_tasks(index, ingestion_definition, source_type, output_type, tasks, 3200,
                                              get_executor(None, None))

    index.close()
    return 0
Esempio n. 32
0
def prepare_output(ingestion_request_id=None):
    """Dump the database and perform cleanup functions

    Drops the database, create the bulk download script, and dumps the database.

    """

    ingestion_request = IngestionRequest.objects.get(pk=ingestion_request_id)
    ingestion_request.update_status("WAIT", "Creating output products...")

    config = get_config(ingestion_request.user)
    index = index_connect(local_config=config, validate_connection=False)

    cmd = "pg_dump -U dc_user -n agdc {} > {}".format(
        ingestion_request.user, ingestion_request.get_database_dump_path())
    os.system(cmd)
    index.close()
    cmd = "dropdb -U dc_user {}".format(ingestion_request.user)
    os.system(cmd)

    ingestion_request.download_script_path = ingestion_request.get_base_data_path(
    ) + "/bulk_downloader.py"

    with open(ingestion_request.download_script_path, "w+") as downloader:
        file_list = ",".join(
            '"{}"'.format(path)
            for path in glob(ingestion_request.get_base_data_path() + '/*.nc'))
        download_script = base_downloader_script.format(
            file_list=file_list,
            database_dump_file=ingestion_request.get_database_dump_path(),
            base_host=settings.BASE_HOST,
            base_data_path=ingestion_request.get_base_data_path(
            )) + static_script
        downloader.write(download_script)

    ingestion_request.update_status(
        "OK",
        "Please follow the directions on the right side panel to download your cube."
    )
Esempio n. 33
0
def check(config_file):
    """
    Verify & view current configuration
    """
    echo('Version: %s' % datacube.__version__)
    echo('Read configurations files from: %s' % config_file.files_loaded)
    echo('Host: {}:{}'.format(config_file.db_hostname or 'localhost', config_file.db_port or '5432'))
    echo('Database: {}'.format(config_file.db_database))
    echo('User: {}'.format(config_file.db_username))

    echo('\n')
    echo('Attempting connect')
    try:
        index = index_connect(local_config=config_file)
        echo('Success.')
        for role, user, description in index.users.list_users():
            if user == config_file.db_username:
                echo('You have %s privileges.' % role.upper())
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Esempio n. 34
0
def convert_descriptor_query_to_search_query(descriptor=None, index=None):
    descriptor = descriptor or {}
    index = index or index_connect()

    known_fields = index.datasets.get_fields().keys()

    search_query = {key: descriptor[key] for key in descriptor.keys() if key in known_fields}
    unknown_fields = [key for key in descriptor.keys()
                      if key not in known_fields
                      and key not in ['variables', 'variable', 'dimensions', 'dimension', 'storage_type']]
    if unknown_fields:
        _LOG.warning("Some of the fields in the query are unknown and will be ignored: %s",
                     ', '.join(unknown_fields))

    if 'storage_type' in descriptor:
        storage_type_name = descriptor.get('storage_type', '')
        storage_type = index.storage.types.get_by_name(storage_type_name)
        if storage_type:
            search_query['type'] = storage_type.id

    descriptor_dimensions = descriptor.get('dimensions', {})
    descriptor_dimensions.update(descriptor.get('dimension', {}))
    search_query.update(convert_descriptor_dims_to_search_dims(descriptor_dimensions))
    return search_query
Esempio n. 35
0
def add_source_datasets(ingestion_request_id=None):
    """Populate the newly created database with source datasets that match the criteria

    Searches for datasets using the search criteria found on the IngestionRequest model and populates
    the newly created database with the new data. The dataset type's id is reset to 0 to prevent collisions in
    the agdc script.

    A dataset type, datasets, dataset_locations, and dataset_sources are added to the new database.
    """

    ingestion_request = IngestionRequest.objects.get(pk=ingestion_request_id)
    ingestion_request.update_status(
        "WAIT", "Populating database with source datasets...")

    config = get_config(ingestion_request.get_database_name())
    index = index_connect(local_config=config, validate_connection=True)

    dataset_type = DatasetType.objects.using('agdc').get(
        id=ingestion_request.dataset_type_ref)
    filtering_options = {
        key: getattr(ingestion_request, key)
        for key in [
            'dataset_type_ref', 'start_date', 'end_date', 'latitude_min',
            'latitude_max', 'longitude_min', 'longitude_max'
        ]
    }
    datasets = list(Dataset.filter_datasets(filtering_options))
    dataset_locations = DatasetLocation.objects.using('agdc').filter(
        dataset_ref__in=datasets)
    dataset_sources = DatasetSource.objects.using('agdc').filter(
        dataset_ref__in=datasets)

    def create_source_dataset_models(dataset_sources, dataset_type_index=0):
        source_datasets = Dataset.objects.using('agdc').filter(
            pk__in=dataset_sources.values_list('source_dataset_ref',
                                               flat=True))
        source_dataset_type = DatasetType.objects.using('agdc').get(
            id=source_datasets[0].dataset_type_ref.id)
        source_dataset_locations = DatasetLocation.objects.using(
            'agdc').filter(dataset_ref__in=source_datasets)
        source_dataset_sources = DatasetSource.objects.using('agdc').filter(
            dataset_ref__in=source_datasets)

        if source_dataset_sources.exists():
            dataset_type_index = create_source_dataset_models(
                source_dataset_sources, dataset_type_index=dataset_type_index)

        source_dataset_type.id = dataset_type_index
        source_dataset_type.save(using=ingestion_request.get_database_name())

        for dataset in source_datasets:
            dataset.dataset_type_ref_id = source_dataset_type.id

        Dataset.objects.using(
            ingestion_request.get_database_name()).bulk_create(source_datasets)
        DatasetLocation.objects.using(
            ingestion_request.get_database_name()).bulk_create(
                source_dataset_locations)
        DatasetSource.objects.using(
            ingestion_request.get_database_name()).bulk_create(
                source_dataset_sources)

        return dataset_type_index + 1

    create_db(ingestion_request.get_database_name())

    dataset_type_index = create_source_dataset_models(
        dataset_sources) if dataset_sources else 0

    dataset_type.id = dataset_type_index
    dataset_type.save(using=ingestion_request.get_database_name())

    for dataset in datasets:
        dataset.dataset_type_ref_id = dataset_type.id

    Dataset.objects.using(
        ingestion_request.get_database_name()).bulk_create(datasets)
    DatasetLocation.objects.using(
        ingestion_request.get_database_name()).bulk_create(dataset_locations)
    DatasetSource.objects.using(
        ingestion_request.get_database_name()).bulk_create(dataset_sources)

    cmd = "psql -U dc_user -h {} {} -c \"ALTER SEQUENCE agdc.dataset_type_id_seq RESTART WITH {};\"".format(
        settings.MASTER_NODE, ingestion_request.get_database_name(),
        dataset_type_index + 1)
    os.system(cmd)

    close_db(ingestion_request.get_database_name())
    index.close()
Esempio n. 36
0
def _get_store(config: LocalConfig, variant: str, log=_LOG) -> SummaryStore:
    index: Index = index_connect(config,
                                 application_name=f"cubedash.show.{variant}",
                                 validate_connection=False)
    return SummaryStore.create(index, log=log)
Esempio n. 37
0
import shapely.geometry
import shapely.ops
from cachetools import cached
from dateutil import parser
from dateutil import tz
from dateutil.relativedelta import relativedelta

from datacube.index import index_connect
from datacube.model import Range
from datacube.utils import jsonify_document

_PRODUCT_PREFIX = '/<product>'
# There's probably a proper flask way to do this.
API_PREFIX = '/api'

index = index_connect()
app = flask.Flask('cubedash')

ACCEPTABLE_SEARCH_FIELDS = ['platform', 'instrument', 'product']


def as_json(o):
    return jsonify(jsonify_document(o), indent=4)


@app.template_filter('strftime')
def _format_datetime(date, fmt=None):
    return date.strftime("%Y-%m-%d %H:%M:%S")


@app.template_filter('query_value')