Beispiel #1
0
def _with_retries(method,
                  max_tries=_MAX_TRIES,
                  retry_timeout=_RETRY_TIMEOUT,
                  exc=Exception):
    """
    Run method, retry n_tries times if any exception is raised

    :param method: any method to execute
    :param max_tries: number of tries, if <=0 method will not be executed and None is returned
    :param exc: Exception class to catch (eg KeyError)
    :raises: exc if method fails n_tries time
    :return: result of method()
    """
    while max_tries > 0:
        max_tries -= 1
        try:
            return method()
        except exc as e:
            if max_tries == 0:
                logger.warning("Operation failed, no retries left")
                raise e

            print("Caught exception:")
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      file=sys.stdout)

            logger.warning(
                f"Operation failed: {str(e)}, retry in {retry_timeout} seconds. Retries left: {max_tries}"
            )
            time.sleep(retry_timeout)
Beispiel #2
0
def _process_events(storage, events, stats):
    """Store and apply events

    :param storage: GOB (events + entities)
    :param event: the event to process
    :param stats: update statitics for this action
    :return:
    """
    # Get the max eventid of the entities and the last eventid of the events
    entity_max_eventid, last_eventid = get_event_ids(storage)
    logger.info(
        f"Events are at {last_eventid or 0:,}, model is at {entity_max_eventid or 0:,}"
    )

    # Get all source_id - last_event combinations to check for validity and existence
    with storage.get_session():
        last_events = storage.get_last_events(
        )  # { source_id: last_event, ... }

    if is_corrupted(entity_max_eventid, last_eventid):
        logger.error("Model is inconsistent! data is more recent than events")
    elif entity_max_eventid == last_eventid:
        logger.info("Model is up to date")
        # Add new events
        return _store_events(storage, last_events, events, stats)
    else:
        logger.warning("Model is out of date, Further processing has stopped")
Beispiel #3
0
    def query(self, query, **kwargs):
        """Reads from the GeoJSON response.

        The requests library is used to iterate through the items
        The `properties` (should be always present) values are placed at the top level in the feature.

        :return: a list of dicts
        """
        for feature in self.response.json()['features']:
            if 'properties' not in feature:
                logger.warning("WFS feature does not contain 'properties' key")
            feature |= feature.pop('properties', {})
            yield feature
Beispiel #4
0
    def try_dump_collection(self, schema, catalog_name, collection_name, force_full=False):
        """
        Try to dump the given catalog collection in the given schema

        The dump is performed by issuing an API POST request to the GOB API.

        :param schema:
        :param catalog_name:
        :param collection_name:
        :return:
        """
        url = f"{self.dump_api}/dump/{catalog_name}/{collection_name}/"
        data = {
            "db": self.db_config,
            "schema": schema,
            "include_relations": False,
            "force_full": force_full,
        }
        headers = {
            "Content-Type": "application/json"
        }

        logger.info(f"Dump {catalog_name} - {collection_name} (schema: {schema})")
        start_request = time.time()
        success = False
        try:
            result = requests.post(
                url=url,
                json=data,
                headers=self.update_headers(url, headers),
                stream=True
            )

            last_line = ""
            start_line = time.time()
            for line in result.iter_lines(chunk_size=1):
                last_line = line.decode()
                end_line = time.time()
                logger.info(f"{last_line} ({(end_line - start_line):.2f} / {(end_line - start_request):.2f} secs)")
                start_line = time.time()
        except Exception as e:
            logger.warning(f'Export {catalog_name}-{collection_name} failed: {str(e)}')
        else:
            success = re.match(r'Export completed', last_line) is not None
            if not success:
                logger.warning(f'Export {catalog_name}-{collection_name} completed with errors')
        finally:
            end_request = time.time()
            logger.info(f"Elapsed time: {(end_request - start_request):.2f} secs")
        return success
Beispiel #5
0
    def _log_warning(self, key, value):
        """
        Issue a warning if any duplicate values are found. Stop issuing warnings after MAX_WARNINGS

        :param key:
        :param value:
        :return:
        """
        self.warnings += 1
        if self.warnings <= self.MAX_WARNINGS:
            logger.warning(f"Non unique value found for {key}: {value}")
        if self.warnings == self.MAX_WARNINGS:
            logger.warning(
                f"More than {self.MAX_WARNINGS} duplicate values found")
Beispiel #6
0
def _check_file(check, filename, stats):
    """
    Test if all checks that have been defined for the given file are OK

    :param filename: Name of the file to check
    :param stats: Statistics of the file
    :param checks: Check to apply onto the statistics
    :return: True if all checks succeed
    """
    total_result = True
    _check_uniqueness(check)

    for key, margin in check.items():
        # Get corresponding value for check
        if key not in stats:
            logger.warning(f"Value missing for {key} check in {filename}")
            continue
        value = stats[key]
        if len(margin) == 1:
            result = value == margin[0]
            formatted_margin = f"= {_fmt(margin[0])}"
        elif margin[0] is None:
            result = value <= margin[1]
            formatted_margin = f"<= {_fmt(margin[1])}"
        elif margin[1] is None:
            result = value >= margin[0]
            formatted_margin = f">= {_fmt(margin[0])}"
        else:
            result = margin[0] <= value <= margin[1]
            formatted_margin = f"{_fmt(margin[0])} - {_fmt(margin[1])}"
        total_result = total_result and result

        # Report any errors for the given filename as a group
        str_value = f"{value:,.2f}".replace(
            ".00", "") if type(value) in [float, int] else value
        extra_data = {'id': filename, 'data': {key: str_value}}
        if result:
            extra_data['id'] += " OK"
            logger.info("OK", extra_data)
        else:
            extra_data['data']['margin'] = formatted_margin
            logger.error("Check FAIL", extra_data)
    return total_result
Beispiel #7
0
def has_no_errors(msg):
    """
    Checks the message

    Interprets the message info and either return True to signal that the message was OK
    or return False and logs an error message explaining why the result was rejected
    :param msg: The message to check
    :return: True if the message is OK to proceed to the next step
    """
    summary = msg.get('summary')
    is_ok = True
    if summary:
        num_errors = len(summary.get('errors', []))
        is_ok = num_errors == 0
        if not is_ok:
            logger.configure(msg, "WORKFLOW")
            logger.warning(
                f"Workflow stopped because of {num_errors} error{'s' if num_errors > 1 else '' }"
            )
    return is_ok
Beispiel #8
0
def test(catalogue):
    """
    Test export files for a given catalogue

    :param catalogue: catalogue to test
    :return: None
    """
    logger.info(f"Test export for catalogue {catalogue}")

    logger.info("Connect to Objectstore")

    config = get_datastore_config(GOB_OBJECTSTORE)
    datastore = DatastoreFactory.get_datastore(config)
    datastore.connect()
    container_name = CONTAINER_BASE

    logger.info(f"Load files from {container_name}")
    conn_info = {
        "connection": datastore.connection,
        "container": container_name
    }

    # Get test definitions for the given catalogue
    checks = _get_checks(conn_info, catalogue)

    # Make proposals for any missing test definitions
    proposals = {}
    for config in _export_config[catalogue]:
        resolve_config_filenames(config)

        for name, product in config.products.items():
            filenames = [product['filename']] + [
                product['filename']
                for product in product.get('extra_files', [])
            ]

            for filename in filenames:
                # Check the previously exported file at its temporary location
                obj_info, obj = _get_file(
                    conn_info, f"{EXPORT_DIR}/{catalogue}/{filename}")

                # Clone check so that changes to the check file don't affect other runs
                check = copy.deepcopy(_get_check(checks, filename))

                # Report results with the name of the matched file
                matched_filename = obj_info['name'] if obj_info else filename

                if obj_info is None:
                    logger.error(f"File {filename} MISSING")
                elif check:
                    stats = _get_analysis(obj_info, obj, check)
                    if _check_file(check, matched_filename, stats):
                        logger.info(f"Check {matched_filename} OK")
                        # Copy the file to its final location
                        distribute_file(conn_info, matched_filename)
                    else:
                        logger.info(f"Check {matched_filename} FAILED")
                    _propose_check_file(proposals, filename, obj_info, obj)
                else:
                    logger.warning(f"File {filename} UNCHECKED")
                    # Do not copy unchecked files
                    _propose_check_file(proposals, filename, obj_info, obj)

    # Write out any missing test definitions
    _write_proposals(conn_info, catalogue, checks, proposals)
Beispiel #9
0
 def _log_warning(self, message):
     logger.warning(message)