Ejemplo n.º 1
0
    def _log_helper(self,
                    message: str,
                    level: str,
                    attempt_id: str = None,
                    run_id: str = None,
                    time_logged: str = None) -> Dict:
        """Posts a progress log to the import progress dashboard.

        Args:
            message: Log message as a string.
            level: Log level as a string.
            run_id: ID of the system run to link to as a string.
            attempt_id: ID of the import attempt to link to as a string.
            time_logged: Time logged in ISO 8601 format with UTC timezone
                as a string.

        Returns:
            The posted progress log as a dict.

        Raises:
            ValueError: Neither run_id nor attempt_id is specified.
        """
        if not run_id and not attempt_id:
            raise ValueError('Neither run_id nor attempt_id is specified')
        if not time_logged:
            time_logged = utils.utctime()
        log = {'message': message, 'level': level, 'time_logged': time_logged}
        if run_id:
            log['run_id'] = run_id
        if attempt_id:
            log['attempt_id'] = attempt_id
        response = self.iap.post(_DASHBOARD_LOG_LIST, json=log)
        response.raise_for_status()
        return response.json()
Ejemplo n.º 2
0
    def post(self, attempt_id):
        """Adds a new log to an existing attempt.

        time_logged must be in ISO 8601 format with timezone UTC+0, e.g.
        "2020-06-30T04:28:53.717569+00:00". If time_logged is not specified,
        the time at which the request is processed will be used.

        level must be one of the allowed levels defined by LogLevel.

        Args:
            attempt_id: ID string of the attempt

        Returns:
            A list of logs of the attempt if successful. Otherwise,
            (error message, error code).
        """
        args = ImportLog.parser.parse_args()
        if args['level'] not in LOG_LEVELS:
            return ('Log level {} is not allowed'.format(args['level']),
                    http.HTTPStatus.FORBIDDEN)

        attempt = self.database.get_by_id(attempt_id)
        if not attempt:
            return import_attempt.NOT_FOUND_ERROR, http.HTTPStatus.NOT_FOUND
        if attempt_id != args.get('attempt_id', attempt_id):
            return import_attempt.ID_NOT_MATCH_ERROR, http.HTTPStatus.CONFLICT

        args.setdefault('time_logged', utils.utctime())
        logs = attempt.setdefault('logs', [])
        log = datastore.Entity(exclude_from_indexes=('message', ))
        log.update(args)
        logs.append(log)
        self.database.save(attempt)

        return logs
Ejemplo n.º 3
0
 def test_to_datetime(self):
     """Tests that the string returned by utctime can be converted to
     a datetime object and the timezone component is correct."""
     time_iso = utils.utctime()
     time_datetime = datetime.fromisoformat(time_iso)
     self.assertEqual(0, time_datetime.utcoffset().total_seconds())
     self.assertEqual('UTC', time_datetime.tzname())
Ejemplo n.º 4
0
    def post(self):
        """Creates a new progress log with the fields provided in the
        request body.

        Log level can only be one of the levels defined by LogLevel.
        A progress log must be at least linked to a system run and can
        optionally be linked to an import attempt.

        Returns:
            The created progress log as a datastore Entity object with
            log_id set. Otherwise, (error message, error code), where
            the error message is a string and the error code is an int.
        """
        args = progress_log.ProgressLog.parser.parse_args()
        args.pop(_LOG.log_id, None)
        args.setdefault('time_logged', utils.utctime())

        valid, err, code = validation.is_progress_log_valid(args)
        if not valid:
            return err, code

        valid, err, code = validation.required_fields_present(
            (_LOG.run_id, _LOG.level, _LOG.message), args)
        if not valid:
            return err, code

        run_id = args.get('run_id')
        attempt_id = args.get('attempt_id')
        run = None
        attempt = None

        with self.client.transaction():
            if run_id:
                run = self.run_database.get(run_id)
                if not run:
                    return validation.get_not_found_error(_RUN.run_id, run_id)
            if attempt_id:
                attempt = self.attempt_database.get(attempt_id)
                if not attempt:
                    return validation.get_not_found_error(
                        _ATTEMPT.attempt_id, attempt_id)
            if run and attempt:
                if attempt_id not in run[_RUN.import_attempts]:
                    return ('The import attempt specified by the attempt_id '
                            f'{attempt_id} in the request body is not '
                            'executed by the system run specified by the '
                            f'run_id {run_id} in the request body',
                            http.HTTPStatus.CONFLICT)

            log = self.log_database.get(make_new=True)
            log.update(args)
            log = self.log_database.save(log, save_content=True)
            if run:
                self.run_database.save(add_log_to_entity(log.key.name, run))
            if attempt:
                self.attempt_database.save(
                    add_log_to_entity(log.key.name, attempt))
            return log
Ejemplo n.º 5
0
def set_import_attempt_default_values(import_attempt):
    """Sets default values for some fields of the import attempt and returns it.

    status is set to 'created'.
    time_created is set to the current time.
    logs is set to an empty list.
    """
    import_attempt.setdefault(_MODEL.status, ImportAttemptStatus.CREATED.value)
    import_attempt.setdefault(_MODEL.time_created, utils.utctime())
    import_attempt.setdefault(_MODEL.logs, [])
    return import_attempt
Ejemplo n.º 6
0
def set_system_run_default_values(system_run):
    """Sets default values for some fields of a system run.

    import_attempts is set to an empty list.
    status is set to 'created'.
    time_created is set to the current time.
    logs is set to an empty list.

    Args:
        system_run: System run as a dict.

    Returns:
        The same system run with the fields set, as a dict.
    """
    system_run.setdefault(_MODEL.import_attempts, [])
    system_run.setdefault(_MODEL.status, SystemRunStatus.CREATED.value)
    system_run.setdefault(_MODEL.time_created, utils.utctime())
    system_run.setdefault(_MODEL.logs, [])
    return system_run
Ejemplo n.º 7
0
def _mark_import_attempt_failed(attempt_id: str, message: str,
                                dashboard: dashboard_api.DashboardAPI) -> Dict:
    """Communicates with the import progress dashboard that an import attempt
    has failed.
    
    Args:
        attempt_id: ID of the import attempt.
        message: An additional message to log to the dashboard
            with level critical.
        dashboard: DashboardAPI object for the communicaiton.
    
    Returns:
        Updated import attempt returned from the dashboard.
    """
    dashboard.critical(message, attempt_id=attempt_id)
    return dashboard.update_attempt(
        {
            'status': 'failed',
            'time_completed': utils.utctime()
        }, attempt_id)
Ejemplo n.º 8
0
def _mark_system_run_failed(run_id: str, message: str,
                            dashboard: dashboard_api.DashboardAPI) -> Dict:
    """Communicates with the import progress dashboard that a system run
    has failed.
    
    Args:
        run_id: ID of the system run.
        message: An additional message to log to the dashboard
            with level critical.
        dashboard: DashboardAPI object for the communicaiton.
    
    Returns:
        Updated system run returned from the dashboard.
    """
    dashboard.critical(message, run_id=run_id)
    return dashboard.update_run(
        {
            'status': 'failed',
            'time_completed': utils.utctime()
        }, run_id=run_id)
Ejemplo n.º 9
0
    def test_post_then_get(self, parse_args):
        """Tests that POST adds a log to an attempt."""
        log_0 = {'level': 'info', 'message': 'first'}
        log_1 = {
            'level': 'info',
            'message': 'second',
            'time_logged': utils.utctime()
        }
        parse_args.side_effect = [{'import_name': 'name'}, log_0, log_1]
        attempt_api = import_attempt.ImportAttemptByID()
        attempt_id = '0'
        attempt_api.put(attempt_id)

        log_api = import_log.ImportLog()
        log_api.post(attempt_id)
        log_api.post(attempt_id)

        logs = log_api.get(attempt_id)
        self.assertEqual(2, len(logs))
        self.assertIn(log_0, logs)
        self.assertIn(log_1, logs)
Ejemplo n.º 10
0
    def _log_helper(self,
                    message: str,
                    level: str,
                    attempt_id: str = None,
                    run_id: str = None,
                    time_logged: str = None) -> Dict:
        """Posts a progress log to the import progress dashboard.

        Args:
            message: Log message as a string.
            level: Log level as a string.
            run_id: ID of the system run to link to as a string.
            attempt_id: ID of the import attempt to link to as a string.
            time_logged: Time logged in ISO 8601 format with UTC timezone
                as a string.

        Returns:
            The posted progress log as a dict.

        Raises:
            ValueError: Neither run_id nor attempt_id is specified.
            requests.HTTPError: The importer returns a status code that is
                larger than or equal to 400.
        """
        if not run_id and not attempt_id:
            raise ValueError('Neither run_id nor attempt_id is specified')
        if not time_logged:
            time_logged = utils.utctime()
        log = {'message': message, 'level': level, 'time_logged': time_logged}
        if run_id:
            log['run_id'] = run_id
        if attempt_id:
            log['attempt_id'] = attempt_id
        logging.info('DashboardAPI._log_helper: Logging %s to %s', log,
                     _DASHBOARD_LOG_LIST)
        response = self.iap.post(_DASHBOARD_LOG_LIST, json=log)
        response.raise_for_status()
        logging.info('DashboardAPI._log_helper: Received %s from %s',
                     response.text, _DASHBOARD_LOG_LIST)
        return response.json()
Ejemplo n.º 11
0
    def _import_one_helper(self,
                           repo_dir: str,
                           relative_import_dir: str,
                           absolute_import_dir: str,
                           import_spec: dict,
                           run_id: str = None,
                           attempt_id: str = None) -> None:
        """Helper for _import_one.

        Args:
            See _import_one.
            attempt_id: ID of the import attempt executed by the system run
                with the run_id, as a string. This is only used to communicate
                with the import progress dashboard.
        """
        urls = import_spec.get('data_download_url')
        if urls:
            for url in urls:
                utils.download_file(url, absolute_import_dir,
                                    self.config.file_download_timeout)
                if self.dashboard:
                    self.dashboard.info(f'Downloaded: {url}',
                                        attempt_id=attempt_id,
                                        run_id=run_id)

        with tempfile.TemporaryDirectory() as tmpdir:
            requirements_path = os.path.join(absolute_import_dir,
                                             self.config.requirements_filename)
            central_requirements_path = os.path.join(
                repo_dir, self.config.requirements_filename)
            interpreter_path, process = _create_venv(
                (central_requirements_path, requirements_path),
                tmpdir,
                timeout=self.config.venv_create_timeout)

            _log_process(process=process,
                         dashboard=self.dashboard,
                         attempt_id=attempt_id,
                         run_id=run_id)
            process.check_returncode()

            script_paths = import_spec.get('scripts')
            for path in script_paths:
                process = _run_user_script(
                    interpreter_path=interpreter_path,
                    script_path=os.path.join(absolute_import_dir, path),
                    timeout=self.config.user_script_timeout,
                    cwd=absolute_import_dir)
                _log_process(process=process,
                             dashboard=self.dashboard,
                             attempt_id=attempt_id,
                             run_id=run_id)
                process.check_returncode()

        inputs = self._upload_import_inputs(
            import_dir=absolute_import_dir,
            output_dir=f'{relative_import_dir}/{import_spec["import_name"]}',
            import_inputs=import_spec.get('import_inputs', []),
            attempt_id=attempt_id)

        if self.importer:
            self.importer.delete_previous_output(relative_import_dir,
                                                 import_spec)

            if self.dashboard:
                self.dashboard.info(
                    f'Submitting job to delete the previous import',
                    attempt_id=attempt_id,
                    run_id=run_id)
            self.importer.delete_import(
                relative_import_dir,
                import_spec,
                block=True,
                timeout=self.config.importer_delete_timeout)
            if self.dashboard:
                self.dashboard.info(f'Deleted previous import',
                                    attempt_id=attempt_id,
                                    run_id=run_id)
                self.dashboard.info(f'Submitting job to perform the import',
                                    attempt_id=attempt_id,
                                    run_id=run_id)
            self.importer.smart_import(
                relative_import_dir,
                inputs,
                import_spec,
                block=True,
                timeout=self.config.importer_import_timeout)
            if self.dashboard:
                self.dashboard.info(f'Import succeeded',
                                    attempt_id=attempt_id,
                                    run_id=run_id)

        if self.dashboard:
            self.dashboard.update_attempt(
                {
                    'status': 'succeeded',
                    'time_completed': utils.utctime()
                }, attempt_id)
Ejemplo n.º 12
0
    def _execute_imports_on_commit_helper(self,
                                          commit_sha: str,
                                          run_id: str = None
                                         ) -> ExecutionResult:
        """Helper for execute_imports_on_commit.

        Args:
            See execute_imports_on_commit.
            run_id: ID of the system run as a string. This is only used to
                communicate with the import progress dashboard.

        Returns:
            ExecutionResult object describing the results of the executions.

        Raises:
            ExecutionError: The execution of an import failed for any reason.
        """

        # Import targets specified in the commit message,
        # e.g., 'scripts/us_fed/treasury:constant_maturity', 'constant_maturity'
        targets = import_target.find_targets_in_commit(commit_sha, 'IMPORTS',
                                                       self.github)
        if not targets:
            return ExecutionResult(
                'pass', [], 'No import target specified in commit message')
        # Relative paths to directories having files changed by the commit
        # containing manifests, e.g. 'scripts/us_fed/treasury'.
        manifest_dirs = self.github.find_dirs_in_commit_containing_file(
            commit_sha, self.config.manifest_filename)

        with tempfile.TemporaryDirectory() as tmpdir:
            repo_dir = self.github.download_repo(
                tmpdir, commit_sha, self.config.repo_download_timeout)
            if self.dashboard:
                self.dashboard.info(f'Downloaded repo: {repo_dir}',
                                    run_id=run_id)

            imports_to_execute = import_target.find_imports_to_execute(
                targets=targets,
                manifest_dirs=manifest_dirs,
                manifest_filename=self.config.manifest_filename,
                repo_dir=repo_dir)

            executed_imports = []
            for relative_dir, spec in imports_to_execute:
                try:
                    self._import_one(repo_dir=repo_dir,
                                     relative_import_dir=relative_dir,
                                     absolute_import_dir=os.path.join(
                                         repo_dir, relative_dir),
                                     import_spec=spec,
                                     run_id=run_id)

                except Exception:
                    raise ExecutionError(
                        ExecutionResult('failed', executed_imports,
                                        traceback.format_exc()))
                absolute_name = import_target.get_absolute_import_name(
                    relative_dir, spec['import_name'])
                executed_imports.append(absolute_name)

            if self.dashboard:
                self.dashboard.update_run(
                    {
                        'status': 'succeeded',
                        'time_completed': utils.utctime()
                    }, run_id)

            return ExecutionResult('succeeded', executed_imports, 'No issues')
Ejemplo n.º 13
0
    def _schedule_on_commit_helper(
            self, commit_sha: str,
            run_id: str) -> import_executor.ExecutionResult:
        """Helper for schedule_on_commit.

        Args:
            See schedule_on_commit.
            run_id: ID of the system run as a string. This is only used to
                communicate with the import progress dashboard. If not provided,
                no such communication will be made.

        Returns:
            ExecutionResult object describing the results of scheduling
                the updates.

        Raises:
            ExecutionError: Scheduling an update failed for any reason.
        """

        targets = import_target.find_targets_in_commit(commit_sha, 'SCHEDULES',
                                                       self.github)
        if not targets:
            return import_executor.ExecutionResult(
                'pass', [], 'No import target specified in commit message')
        manifest_dirs = self.github.find_dirs_in_commit_containing_file(
            commit_sha, self.config.manifest_filename)

        with tempfile.TemporaryDirectory() as tmpdir:
            repo_dir = self.github.download_repo(
                tmpdir, commit_sha, self.config.repo_download_timeout)

            imports_to_execute = import_target.find_imports_to_execute(
                targets=targets,
                manifest_dirs=manifest_dirs,
                manifest_filename=self.config.manifest_filename,
                repo_dir=repo_dir)

            scheduled = []
            for relative_dir, spec in imports_to_execute:
                schedule = spec.get('cron_schedule')
                if not schedule:
                    manifest_path = os.path.join(relative_dir,
                                                 self.config.manifest_filename)
                    raise KeyError(
                        f'cron_schedule not found in {manifest_path}')
                try:
                    absolute_name = import_target.get_absolute_import_name(
                        relative_dir, spec['import_name'])
                    scheduled.append(
                        self.create_schedule(absolute_name, schedule))
                except Exception:
                    raise import_executor.ExecutionError(
                        import_executor.ExecutionResult('failed', scheduled,
                                                        traceback.format_exc()))

            if self.dashboard:
                self.dashboard.update_run(
                    {'status': 'succeeded', 'time_completed': utils.utctime()},
                    run_id)
            return import_executor.ExecutionResult('succeeded', scheduled,
                                                   'No issues')
Ejemplo n.º 14
0
 def test_to_datetime_then_back(self):
     """Tests that the string returned by utctime can be converted to
     a datetime object and remains the same when converted back."""
     time_iso = utils.utctime()
     time_datetime = datetime.fromisoformat(time_iso)
     self.assertEqual(time_iso, time_datetime.isoformat())