def _log_helper(self, message: str, level: str, attempt_id: str = None, run_id: str = None, time_logged: str = None) -> Dict: """Posts a progress log to the import progress dashboard. Args: message: Log message as a string. level: Log level as a string. run_id: ID of the system run to link to as a string. attempt_id: ID of the import attempt to link to as a string. time_logged: Time logged in ISO 8601 format with UTC timezone as a string. Returns: The posted progress log as a dict. Raises: ValueError: Neither run_id nor attempt_id is specified. """ if not run_id and not attempt_id: raise ValueError('Neither run_id nor attempt_id is specified') if not time_logged: time_logged = utils.utctime() log = {'message': message, 'level': level, 'time_logged': time_logged} if run_id: log['run_id'] = run_id if attempt_id: log['attempt_id'] = attempt_id response = self.iap.post(_DASHBOARD_LOG_LIST, json=log) response.raise_for_status() return response.json()
def post(self, attempt_id): """Adds a new log to an existing attempt. time_logged must be in ISO 8601 format with timezone UTC+0, e.g. "2020-06-30T04:28:53.717569+00:00". If time_logged is not specified, the time at which the request is processed will be used. level must be one of the allowed levels defined by LogLevel. Args: attempt_id: ID string of the attempt Returns: A list of logs of the attempt if successful. Otherwise, (error message, error code). """ args = ImportLog.parser.parse_args() if args['level'] not in LOG_LEVELS: return ('Log level {} is not allowed'.format(args['level']), http.HTTPStatus.FORBIDDEN) attempt = self.database.get_by_id(attempt_id) if not attempt: return import_attempt.NOT_FOUND_ERROR, http.HTTPStatus.NOT_FOUND if attempt_id != args.get('attempt_id', attempt_id): return import_attempt.ID_NOT_MATCH_ERROR, http.HTTPStatus.CONFLICT args.setdefault('time_logged', utils.utctime()) logs = attempt.setdefault('logs', []) log = datastore.Entity(exclude_from_indexes=('message', )) log.update(args) logs.append(log) self.database.save(attempt) return logs
def test_to_datetime(self): """Tests that the string returned by utctime can be converted to a datetime object and the timezone component is correct.""" time_iso = utils.utctime() time_datetime = datetime.fromisoformat(time_iso) self.assertEqual(0, time_datetime.utcoffset().total_seconds()) self.assertEqual('UTC', time_datetime.tzname())
def post(self): """Creates a new progress log with the fields provided in the request body. Log level can only be one of the levels defined by LogLevel. A progress log must be at least linked to a system run and can optionally be linked to an import attempt. Returns: The created progress log as a datastore Entity object with log_id set. Otherwise, (error message, error code), where the error message is a string and the error code is an int. """ args = progress_log.ProgressLog.parser.parse_args() args.pop(_LOG.log_id, None) args.setdefault('time_logged', utils.utctime()) valid, err, code = validation.is_progress_log_valid(args) if not valid: return err, code valid, err, code = validation.required_fields_present( (_LOG.run_id, _LOG.level, _LOG.message), args) if not valid: return err, code run_id = args.get('run_id') attempt_id = args.get('attempt_id') run = None attempt = None with self.client.transaction(): if run_id: run = self.run_database.get(run_id) if not run: return validation.get_not_found_error(_RUN.run_id, run_id) if attempt_id: attempt = self.attempt_database.get(attempt_id) if not attempt: return validation.get_not_found_error( _ATTEMPT.attempt_id, attempt_id) if run and attempt: if attempt_id not in run[_RUN.import_attempts]: return ('The import attempt specified by the attempt_id ' f'{attempt_id} in the request body is not ' 'executed by the system run specified by the ' f'run_id {run_id} in the request body', http.HTTPStatus.CONFLICT) log = self.log_database.get(make_new=True) log.update(args) log = self.log_database.save(log, save_content=True) if run: self.run_database.save(add_log_to_entity(log.key.name, run)) if attempt: self.attempt_database.save( add_log_to_entity(log.key.name, attempt)) return log
def set_import_attempt_default_values(import_attempt): """Sets default values for some fields of the import attempt and returns it. status is set to 'created'. time_created is set to the current time. logs is set to an empty list. """ import_attempt.setdefault(_MODEL.status, ImportAttemptStatus.CREATED.value) import_attempt.setdefault(_MODEL.time_created, utils.utctime()) import_attempt.setdefault(_MODEL.logs, []) return import_attempt
def set_system_run_default_values(system_run): """Sets default values for some fields of a system run. import_attempts is set to an empty list. status is set to 'created'. time_created is set to the current time. logs is set to an empty list. Args: system_run: System run as a dict. Returns: The same system run with the fields set, as a dict. """ system_run.setdefault(_MODEL.import_attempts, []) system_run.setdefault(_MODEL.status, SystemRunStatus.CREATED.value) system_run.setdefault(_MODEL.time_created, utils.utctime()) system_run.setdefault(_MODEL.logs, []) return system_run
def _mark_import_attempt_failed(attempt_id: str, message: str, dashboard: dashboard_api.DashboardAPI) -> Dict: """Communicates with the import progress dashboard that an import attempt has failed. Args: attempt_id: ID of the import attempt. message: An additional message to log to the dashboard with level critical. dashboard: DashboardAPI object for the communicaiton. Returns: Updated import attempt returned from the dashboard. """ dashboard.critical(message, attempt_id=attempt_id) return dashboard.update_attempt( { 'status': 'failed', 'time_completed': utils.utctime() }, attempt_id)
def _mark_system_run_failed(run_id: str, message: str, dashboard: dashboard_api.DashboardAPI) -> Dict: """Communicates with the import progress dashboard that a system run has failed. Args: run_id: ID of the system run. message: An additional message to log to the dashboard with level critical. dashboard: DashboardAPI object for the communicaiton. Returns: Updated system run returned from the dashboard. """ dashboard.critical(message, run_id=run_id) return dashboard.update_run( { 'status': 'failed', 'time_completed': utils.utctime() }, run_id=run_id)
def test_post_then_get(self, parse_args): """Tests that POST adds a log to an attempt.""" log_0 = {'level': 'info', 'message': 'first'} log_1 = { 'level': 'info', 'message': 'second', 'time_logged': utils.utctime() } parse_args.side_effect = [{'import_name': 'name'}, log_0, log_1] attempt_api = import_attempt.ImportAttemptByID() attempt_id = '0' attempt_api.put(attempt_id) log_api = import_log.ImportLog() log_api.post(attempt_id) log_api.post(attempt_id) logs = log_api.get(attempt_id) self.assertEqual(2, len(logs)) self.assertIn(log_0, logs) self.assertIn(log_1, logs)
def _log_helper(self, message: str, level: str, attempt_id: str = None, run_id: str = None, time_logged: str = None) -> Dict: """Posts a progress log to the import progress dashboard. Args: message: Log message as a string. level: Log level as a string. run_id: ID of the system run to link to as a string. attempt_id: ID of the import attempt to link to as a string. time_logged: Time logged in ISO 8601 format with UTC timezone as a string. Returns: The posted progress log as a dict. Raises: ValueError: Neither run_id nor attempt_id is specified. requests.HTTPError: The importer returns a status code that is larger than or equal to 400. """ if not run_id and not attempt_id: raise ValueError('Neither run_id nor attempt_id is specified') if not time_logged: time_logged = utils.utctime() log = {'message': message, 'level': level, 'time_logged': time_logged} if run_id: log['run_id'] = run_id if attempt_id: log['attempt_id'] = attempt_id logging.info('DashboardAPI._log_helper: Logging %s to %s', log, _DASHBOARD_LOG_LIST) response = self.iap.post(_DASHBOARD_LOG_LIST, json=log) response.raise_for_status() logging.info('DashboardAPI._log_helper: Received %s from %s', response.text, _DASHBOARD_LOG_LIST) return response.json()
def _import_one_helper(self, repo_dir: str, relative_import_dir: str, absolute_import_dir: str, import_spec: dict, run_id: str = None, attempt_id: str = None) -> None: """Helper for _import_one. Args: See _import_one. attempt_id: ID of the import attempt executed by the system run with the run_id, as a string. This is only used to communicate with the import progress dashboard. """ urls = import_spec.get('data_download_url') if urls: for url in urls: utils.download_file(url, absolute_import_dir, self.config.file_download_timeout) if self.dashboard: self.dashboard.info(f'Downloaded: {url}', attempt_id=attempt_id, run_id=run_id) with tempfile.TemporaryDirectory() as tmpdir: requirements_path = os.path.join(absolute_import_dir, self.config.requirements_filename) central_requirements_path = os.path.join( repo_dir, self.config.requirements_filename) interpreter_path, process = _create_venv( (central_requirements_path, requirements_path), tmpdir, timeout=self.config.venv_create_timeout) _log_process(process=process, dashboard=self.dashboard, attempt_id=attempt_id, run_id=run_id) process.check_returncode() script_paths = import_spec.get('scripts') for path in script_paths: process = _run_user_script( interpreter_path=interpreter_path, script_path=os.path.join(absolute_import_dir, path), timeout=self.config.user_script_timeout, cwd=absolute_import_dir) _log_process(process=process, dashboard=self.dashboard, attempt_id=attempt_id, run_id=run_id) process.check_returncode() inputs = self._upload_import_inputs( import_dir=absolute_import_dir, output_dir=f'{relative_import_dir}/{import_spec["import_name"]}', import_inputs=import_spec.get('import_inputs', []), attempt_id=attempt_id) if self.importer: self.importer.delete_previous_output(relative_import_dir, import_spec) if self.dashboard: self.dashboard.info( f'Submitting job to delete the previous import', attempt_id=attempt_id, run_id=run_id) self.importer.delete_import( relative_import_dir, import_spec, block=True, timeout=self.config.importer_delete_timeout) if self.dashboard: self.dashboard.info(f'Deleted previous import', attempt_id=attempt_id, run_id=run_id) self.dashboard.info(f'Submitting job to perform the import', attempt_id=attempt_id, run_id=run_id) self.importer.smart_import( relative_import_dir, inputs, import_spec, block=True, timeout=self.config.importer_import_timeout) if self.dashboard: self.dashboard.info(f'Import succeeded', attempt_id=attempt_id, run_id=run_id) if self.dashboard: self.dashboard.update_attempt( { 'status': 'succeeded', 'time_completed': utils.utctime() }, attempt_id)
def _execute_imports_on_commit_helper(self, commit_sha: str, run_id: str = None ) -> ExecutionResult: """Helper for execute_imports_on_commit. Args: See execute_imports_on_commit. run_id: ID of the system run as a string. This is only used to communicate with the import progress dashboard. Returns: ExecutionResult object describing the results of the executions. Raises: ExecutionError: The execution of an import failed for any reason. """ # Import targets specified in the commit message, # e.g., 'scripts/us_fed/treasury:constant_maturity', 'constant_maturity' targets = import_target.find_targets_in_commit(commit_sha, 'IMPORTS', self.github) if not targets: return ExecutionResult( 'pass', [], 'No import target specified in commit message') # Relative paths to directories having files changed by the commit # containing manifests, e.g. 'scripts/us_fed/treasury'. manifest_dirs = self.github.find_dirs_in_commit_containing_file( commit_sha, self.config.manifest_filename) with tempfile.TemporaryDirectory() as tmpdir: repo_dir = self.github.download_repo( tmpdir, commit_sha, self.config.repo_download_timeout) if self.dashboard: self.dashboard.info(f'Downloaded repo: {repo_dir}', run_id=run_id) imports_to_execute = import_target.find_imports_to_execute( targets=targets, manifest_dirs=manifest_dirs, manifest_filename=self.config.manifest_filename, repo_dir=repo_dir) executed_imports = [] for relative_dir, spec in imports_to_execute: try: self._import_one(repo_dir=repo_dir, relative_import_dir=relative_dir, absolute_import_dir=os.path.join( repo_dir, relative_dir), import_spec=spec, run_id=run_id) except Exception: raise ExecutionError( ExecutionResult('failed', executed_imports, traceback.format_exc())) absolute_name = import_target.get_absolute_import_name( relative_dir, spec['import_name']) executed_imports.append(absolute_name) if self.dashboard: self.dashboard.update_run( { 'status': 'succeeded', 'time_completed': utils.utctime() }, run_id) return ExecutionResult('succeeded', executed_imports, 'No issues')
def _schedule_on_commit_helper( self, commit_sha: str, run_id: str) -> import_executor.ExecutionResult: """Helper for schedule_on_commit. Args: See schedule_on_commit. run_id: ID of the system run as a string. This is only used to communicate with the import progress dashboard. If not provided, no such communication will be made. Returns: ExecutionResult object describing the results of scheduling the updates. Raises: ExecutionError: Scheduling an update failed for any reason. """ targets = import_target.find_targets_in_commit(commit_sha, 'SCHEDULES', self.github) if not targets: return import_executor.ExecutionResult( 'pass', [], 'No import target specified in commit message') manifest_dirs = self.github.find_dirs_in_commit_containing_file( commit_sha, self.config.manifest_filename) with tempfile.TemporaryDirectory() as tmpdir: repo_dir = self.github.download_repo( tmpdir, commit_sha, self.config.repo_download_timeout) imports_to_execute = import_target.find_imports_to_execute( targets=targets, manifest_dirs=manifest_dirs, manifest_filename=self.config.manifest_filename, repo_dir=repo_dir) scheduled = [] for relative_dir, spec in imports_to_execute: schedule = spec.get('cron_schedule') if not schedule: manifest_path = os.path.join(relative_dir, self.config.manifest_filename) raise KeyError( f'cron_schedule not found in {manifest_path}') try: absolute_name = import_target.get_absolute_import_name( relative_dir, spec['import_name']) scheduled.append( self.create_schedule(absolute_name, schedule)) except Exception: raise import_executor.ExecutionError( import_executor.ExecutionResult('failed', scheduled, traceback.format_exc())) if self.dashboard: self.dashboard.update_run( {'status': 'succeeded', 'time_completed': utils.utctime()}, run_id) return import_executor.ExecutionResult('succeeded', scheduled, 'No issues')
def test_to_datetime_then_back(self): """Tests that the string returned by utctime can be converted to a datetime object and remains the same when converted back.""" time_iso = utils.utctime() time_datetime = datetime.fromisoformat(time_iso) self.assertEqual(time_iso, time_datetime.isoformat())