def _with_retries(method, max_tries=_MAX_TRIES, retry_timeout=_RETRY_TIMEOUT, exc=Exception): """ Run method, retry n_tries times if any exception is raised :param method: any method to execute :param max_tries: number of tries, if <=0 method will not be executed and None is returned :param exc: Exception class to catch (eg KeyError) :raises: exc if method fails n_tries time :return: result of method() """ while max_tries > 0: max_tries -= 1 try: return method() except exc as e: if max_tries == 0: logger.warning("Operation failed, no retries left") raise e print("Caught exception:") exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout) logger.warning( f"Operation failed: {str(e)}, retry in {retry_timeout} seconds. Retries left: {max_tries}" ) time.sleep(retry_timeout)
def _process_events(storage, events, stats): """Store and apply events :param storage: GOB (events + entities) :param event: the event to process :param stats: update statitics for this action :return: """ # Get the max eventid of the entities and the last eventid of the events entity_max_eventid, last_eventid = get_event_ids(storage) logger.info( f"Events are at {last_eventid or 0:,}, model is at {entity_max_eventid or 0:,}" ) # Get all source_id - last_event combinations to check for validity and existence with storage.get_session(): last_events = storage.get_last_events( ) # { source_id: last_event, ... } if is_corrupted(entity_max_eventid, last_eventid): logger.error("Model is inconsistent! data is more recent than events") elif entity_max_eventid == last_eventid: logger.info("Model is up to date") # Add new events return _store_events(storage, last_events, events, stats) else: logger.warning("Model is out of date, Further processing has stopped")
def query(self, query, **kwargs): """Reads from the GeoJSON response. The requests library is used to iterate through the items The `properties` (should be always present) values are placed at the top level in the feature. :return: a list of dicts """ for feature in self.response.json()['features']: if 'properties' not in feature: logger.warning("WFS feature does not contain 'properties' key") feature |= feature.pop('properties', {}) yield feature
def try_dump_collection(self, schema, catalog_name, collection_name, force_full=False): """ Try to dump the given catalog collection in the given schema The dump is performed by issuing an API POST request to the GOB API. :param schema: :param catalog_name: :param collection_name: :return: """ url = f"{self.dump_api}/dump/{catalog_name}/{collection_name}/" data = { "db": self.db_config, "schema": schema, "include_relations": False, "force_full": force_full, } headers = { "Content-Type": "application/json" } logger.info(f"Dump {catalog_name} - {collection_name} (schema: {schema})") start_request = time.time() success = False try: result = requests.post( url=url, json=data, headers=self.update_headers(url, headers), stream=True ) last_line = "" start_line = time.time() for line in result.iter_lines(chunk_size=1): last_line = line.decode() end_line = time.time() logger.info(f"{last_line} ({(end_line - start_line):.2f} / {(end_line - start_request):.2f} secs)") start_line = time.time() except Exception as e: logger.warning(f'Export {catalog_name}-{collection_name} failed: {str(e)}') else: success = re.match(r'Export completed', last_line) is not None if not success: logger.warning(f'Export {catalog_name}-{collection_name} completed with errors') finally: end_request = time.time() logger.info(f"Elapsed time: {(end_request - start_request):.2f} secs") return success
def _log_warning(self, key, value): """ Issue a warning if any duplicate values are found. Stop issuing warnings after MAX_WARNINGS :param key: :param value: :return: """ self.warnings += 1 if self.warnings <= self.MAX_WARNINGS: logger.warning(f"Non unique value found for {key}: {value}") if self.warnings == self.MAX_WARNINGS: logger.warning( f"More than {self.MAX_WARNINGS} duplicate values found")
def _check_file(check, filename, stats): """ Test if all checks that have been defined for the given file are OK :param filename: Name of the file to check :param stats: Statistics of the file :param checks: Check to apply onto the statistics :return: True if all checks succeed """ total_result = True _check_uniqueness(check) for key, margin in check.items(): # Get corresponding value for check if key not in stats: logger.warning(f"Value missing for {key} check in {filename}") continue value = stats[key] if len(margin) == 1: result = value == margin[0] formatted_margin = f"= {_fmt(margin[0])}" elif margin[0] is None: result = value <= margin[1] formatted_margin = f"<= {_fmt(margin[1])}" elif margin[1] is None: result = value >= margin[0] formatted_margin = f">= {_fmt(margin[0])}" else: result = margin[0] <= value <= margin[1] formatted_margin = f"{_fmt(margin[0])} - {_fmt(margin[1])}" total_result = total_result and result # Report any errors for the given filename as a group str_value = f"{value:,.2f}".replace( ".00", "") if type(value) in [float, int] else value extra_data = {'id': filename, 'data': {key: str_value}} if result: extra_data['id'] += " OK" logger.info("OK", extra_data) else: extra_data['data']['margin'] = formatted_margin logger.error("Check FAIL", extra_data) return total_result
def has_no_errors(msg): """ Checks the message Interprets the message info and either return True to signal that the message was OK or return False and logs an error message explaining why the result was rejected :param msg: The message to check :return: True if the message is OK to proceed to the next step """ summary = msg.get('summary') is_ok = True if summary: num_errors = len(summary.get('errors', [])) is_ok = num_errors == 0 if not is_ok: logger.configure(msg, "WORKFLOW") logger.warning( f"Workflow stopped because of {num_errors} error{'s' if num_errors > 1 else '' }" ) return is_ok
def test(catalogue): """ Test export files for a given catalogue :param catalogue: catalogue to test :return: None """ logger.info(f"Test export for catalogue {catalogue}") logger.info("Connect to Objectstore") config = get_datastore_config(GOB_OBJECTSTORE) datastore = DatastoreFactory.get_datastore(config) datastore.connect() container_name = CONTAINER_BASE logger.info(f"Load files from {container_name}") conn_info = { "connection": datastore.connection, "container": container_name } # Get test definitions for the given catalogue checks = _get_checks(conn_info, catalogue) # Make proposals for any missing test definitions proposals = {} for config in _export_config[catalogue]: resolve_config_filenames(config) for name, product in config.products.items(): filenames = [product['filename']] + [ product['filename'] for product in product.get('extra_files', []) ] for filename in filenames: # Check the previously exported file at its temporary location obj_info, obj = _get_file( conn_info, f"{EXPORT_DIR}/{catalogue}/{filename}") # Clone check so that changes to the check file don't affect other runs check = copy.deepcopy(_get_check(checks, filename)) # Report results with the name of the matched file matched_filename = obj_info['name'] if obj_info else filename if obj_info is None: logger.error(f"File {filename} MISSING") elif check: stats = _get_analysis(obj_info, obj, check) if _check_file(check, matched_filename, stats): logger.info(f"Check {matched_filename} OK") # Copy the file to its final location distribute_file(conn_info, matched_filename) else: logger.info(f"Check {matched_filename} FAILED") _propose_check_file(proposals, filename, obj_info, obj) else: logger.warning(f"File {filename} UNCHECKED") # Do not copy unchecked files _propose_check_file(proposals, filename, obj_info, obj) # Write out any missing test definitions _write_proposals(conn_info, catalogue, checks, proposals)
def _log_warning(self, message): logger.warning(message)