Ejemplo n.º 1
0
    def test_datastore_config(self):

        self.assertEqual({
            'some': 'config',
            'name': 'DatastoreA',
        }, get_datastore_config('DatastoreA'))

        with self.assertRaises(GOBConfigException):
            get_datastore_config('NonExistent')
Ejemplo n.º 2
0
    def _init_config(self):
        """
        Initialize dumper configuration

        If localhost is used then use the public GOB url for all other case use the GOB secure url.
        Read the destination database properties from the environment

        :return:
        """
        api_host = get_host()
        api_url = PUBLIC_URL if any(host in api_host for host in ["localhost", "gobapi"]) else SECURE_URL
        self.dump_api = f"{api_host}{api_url}"
        self.db_config = get_datastore_config(ANALYSE_DB_DATASTORE_ID)
Ejemplo n.º 3
0
def _get_datastore(destination_name: str):
    """Returns Datastore and base_directory for Datastore.
    Returned Datastore has an initialised connection for destination_name

    :param destination_name:
    :return:
    """
    datastore_config = get_datastore_config(destination_name)
    datastore = DatastoreFactory.get_datastore(datastore_config)
    datastore.connect()

    # Prepend main directory to file, except for ObjectDatastore, as this will use a container by default
    base_directory = f"{CONTAINER_BASE}/" if not isinstance(
        datastore, ObjectDatastore) else ""
    return datastore, base_directory
Ejemplo n.º 4
0
    def connect(self):  # noqa: C901
        """The first step of every import is a technical step. A connection need to be setup to
        connect to a database, filesystem, API, ...

        :return:
        """

        # Get manually added config, or config based on application name
        datastore_config = self.source.get(
            'application_config') or get_datastore_config(
                self.source['application'])

        read_config = {**self.source.get('read_config', {}), 'mode': self.mode}
        self.datastore = DatastoreFactory.get_datastore(
            datastore_config, read_config)
        self.datastore.connect()

        logger.info(
            f"Connection to {self.app} {self.datastore.user} has been made.")
Ejemplo n.º 5
0
    def __init__(self, config, row_formatter=None):
        """Constructor

        Lazy loading, Just register objectstore connection and reader and wait for the iterator to be called
        to load the data

        :param config:
        """
        self.config = config
        self.objectstore_config = get_datastore_config(
            self.config['objectstore'])

        self.datastore = DatastoreFactory.get_datastore(
            self.objectstore_config, self.config)

        assert isinstance(self.datastore, ObjectDatastore)

        self.datastore.connect()
        self.row_formatter = row_formatter
Ejemplo n.º 6
0
def _get_cbs_features(path: str) -> dict[str, dict[str, str]]:
    """
    Gets the CBS codes from the Objectstore and returns a list of dicts with the naam,
    code (wijk or buurt).

    :param path: the path to source file
    :return: a list of dicts with CBS Code and CBS naam, mapped on the local code.
    """
    datastore = ObjectDatastore(
        connection_config=get_datastore_config("Basisinformatie"),
        read_config={
            "file_filter": path,
            "file_type": "XLS"
        })

    datastore.connect()
    result = list(datastore.query(''))
    datastore.disconnect()

    if not result:
        raise GOBException(f"No CBS features found for path '{path}'")

    return {row[0]: {"code": row[1], "naam": row[2]} for row in result}
Ejemplo n.º 7
0
def test(catalogue):
    """
    Test export files for a given catalogue

    :param catalogue: catalogue to test
    :return: None
    """
    logger.info(f"Test export for catalogue {catalogue}")

    logger.info("Connect to Objectstore")

    config = get_datastore_config(GOB_OBJECTSTORE)
    datastore = DatastoreFactory.get_datastore(config)
    datastore.connect()
    container_name = CONTAINER_BASE

    logger.info(f"Load files from {container_name}")
    conn_info = {
        "connection": datastore.connection,
        "container": container_name
    }

    # Get test definitions for the given catalogue
    checks = _get_checks(conn_info, catalogue)

    # Make proposals for any missing test definitions
    proposals = {}
    for config in _export_config[catalogue]:
        resolve_config_filenames(config)

        for name, product in config.products.items():
            filenames = [product['filename']] + [
                product['filename']
                for product in product.get('extra_files', [])
            ]

            for filename in filenames:
                # Check the previously exported file at its temporary location
                obj_info, obj = _get_file(
                    conn_info, f"{EXPORT_DIR}/{catalogue}/{filename}")

                # Clone check so that changes to the check file don't affect other runs
                check = copy.deepcopy(_get_check(checks, filename))

                # Report results with the name of the matched file
                matched_filename = obj_info['name'] if obj_info else filename

                if obj_info is None:
                    logger.error(f"File {filename} MISSING")
                elif check:
                    stats = _get_analysis(obj_info, obj, check)
                    if _check_file(check, matched_filename, stats):
                        logger.info(f"Check {matched_filename} OK")
                        # Copy the file to its final location
                        distribute_file(conn_info, matched_filename)
                    else:
                        logger.info(f"Check {matched_filename} FAILED")
                    _propose_check_file(proposals, filename, obj_info, obj)
                else:
                    logger.warning(f"File {filename} UNCHECKED")
                    # Do not copy unchecked files
                    _propose_check_file(proposals, filename, obj_info, obj)

    # Write out any missing test definitions
    _write_proposals(conn_info, catalogue, checks, proposals)
Ejemplo n.º 8
0
 def __init__(self):
     config = get_datastore_config(GOB_OBJECTSTORE)
     datastore = DatastoreFactory.get_datastore(config)
     datastore.connect()
     self.connection = datastore.connection
Ejemplo n.º 9
0
def _export_collection(host, catalogue, collection, product_name,
                       destination):  # noqa: C901
    """Export a collection from a catalog

    :param host: The API host to retrieve the catalog and collection from
    :param catalog: The name of the catalog
    :param collection: The name of the collection
    :param product_name: The name of the product to export
    :param destination: The destination of the resulting output file(s)
    :return:
    """
    logger.info(f"Export {catalogue}:{collection} to {destination} started.")

    # Get the configuration for this collection
    config = CONFIG_MAPPING[catalogue][collection]
    resolve_config_filenames(config)

    files = []

    # If a product has been supplied, export only that product
    try:
        products = {
            product_name: config.products[product_name]
        } if product_name else config.products
    except KeyError:
        logger.error(f"Product '{product_name}' not found")
        return

    # Start exporting each product
    for name, product in products.items():
        logger.info(
            f"Export to file '{name}' started, API type: {product.get('api_type', 'REST')}"
        )

        # Get name of local file to write results to
        results_file = _get_filename(
            product['filename']
        ) if destination == "Objectstore" else product['filename']

        if product.get('append', False):
            # Add .to_append to avoid writing to the previously created file
            results_file = _get_filename(f"{product['filename']}.to_append")
            product['append_to_filename'] = _get_filename(product['filename']) \
                if destination == "Objectstore" \
                else product['filename']

        # Buffer items if they are used multiple times. This prevents calling API multiple times for same data
        source = product_source(product)
        buffer_items = len(
            list(
                filter(lambda p: product_source(p) == source,
                       config.products.values()))) > 1

        logger.info(
            f"Buffering API output {'enabled' if buffer_items else 'disabled'}"
        )
        try:
            row_count = _with_retries(
                lambda: export_to_file(host,
                                       product,
                                       results_file,
                                       catalogue,
                                       product.get('collection', collection),
                                       buffer_items=buffer_items))
        except Exception as e:
            logger.error(f"Export to local file {name} failed: {str(e)}.")
        else:
            logger.info(f"{row_count} records exported to local file {name}.")

            if product.get('append', False):
                # Append temporary file to existing file and cleanup temp file
                _append_to_file(results_file, product['append_to_filename'])
                os.remove(results_file)
            else:
                # Do not add file to files again when appending
                files.append({
                    'temp_location': results_file,
                    'distribution': product['filename'],
                    'mime_type': product['mime_type']
                })

            # Add extra result files (e.g. .prj file)
            extra_files = product.get('extra_files', [])
            files.extend([{
                'temp_location': _get_filename(file['filename']),
                'distribution': file['filename'],
                'mime_type': file['mime_type']
            } for file in extra_files])

    if destination == "Objectstore":
        # Get objectstore connection
        config = get_datastore_config(GOB_OBJECTSTORE)
        datastore = DatastoreFactory.get_datastore(config)
        datastore.connect()

        assert isinstance(datastore, ObjectDatastore)

        connection = datastore.connection
        logger.info(
            f"Connection to {destination} {datastore.user} has been made.")

    # Start distribution of all resulting files
    for file in files:
        logger.info(f"Write file '{file['distribution']}'.")
        if destination == "Objectstore":
            # Distribute to pre-final location
            container = f'{CONTAINER_BASE}/{EXPORT_DIR}/{catalogue}/'
            with open(file['temp_location'], 'rb') as fp:
                try:
                    distribute_to_objectstore(connection, container,
                                              file['distribution'], fp,
                                              file['mime_type'])
                except GOBException as e:
                    logger.error(
                        f"Failed to copy to {destination} on location: {container}{file['distribution']}. \
                                 Error: {e}")
                    return False

            logger.info(
                f"File copied to {destination} on location: {container}{file['distribution']}."
            )

            cleanup_datefiles(
                connection, CONTAINER_BASE,
                f"{EXPORT_DIR}/{catalogue}/{file['distribution']}")

            # Delete temp file
            os.remove(file['temp_location'])

        elif destination == "File":
            logger.info(f"Export is written to {file['distribution']}.")

    logger.info("Export completed")