Ejemplo n.º 1
0
    def put_suite_params(self, schd):
        """Put various suite parameters from schd in runtime database.

        This method queues the relevant insert statements.

        Arguments:
            schd (cylc.flow.scheduler.Scheduler): scheduler object.
        """
        if schd.final_point is None:
            # Store None as proper null value in database. No need to do this
            # for initial cycle point, which should never be None.
            final_point_str = None
        else:
            final_point_str = str(schd.final_point)
        self.db_inserts_map[self.TABLE_SUITE_PARAMS].extend([
            {"key": "uuid_str", "value": str(schd.uuid_str)},
            {"key": "run_mode", "value": schd.run_mode},
            {"key": "cylc_version", "value": CYLC_VERSION},
            {"key": "UTC_mode", "value": get_utc_mode()},
            {"key": "initial_point", "value": str(schd.initial_point)},
            {"key": "final_point", "value": final_point_str},
        ])
        if schd.config.cfg['cylc']['cycle point format']:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": "cycle_point_format",
                "value": schd.config.cfg['cylc']['cycle point format']})
        if schd.pool.is_held:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": "is_held", "value": 1})
        if schd.cli_start_point_string:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": "start_point", "value": schd.cli_start_point_string})
Ejemplo n.º 2
0
    def put_workflow_params(self, schd):
        """Put various workflow parameters from schd in runtime database.

        This method queues the relevant insert statements.

        Arguments:
            schd (cylc.flow.scheduler.Scheduler): scheduler object.
        """
        self.db_deletes_map[self.TABLE_WORKFLOW_PARAMS].append({})
        self.db_inserts_map[self.TABLE_WORKFLOW_PARAMS].extend([
            {
                "key": self.KEY_UUID_STR,
                "value": schd.uuid_str
            },
            {
                "key": self.KEY_CYLC_VERSION,
                "value": CYLC_VERSION
            },
            {
                "key": self.KEY_UTC_MODE,
                "value": get_utc_mode()
            },
        ])
        if schd.config.cycle_point_dump_format is not None:
            self.db_inserts_map[self.TABLE_WORKFLOW_PARAMS].append({
                "key":
                self.KEY_CYCLE_POINT_FORMAT,
                "value":
                schd.config.cycle_point_dump_format
            })
        if schd.is_paused:
            self.db_inserts_map[self.TABLE_WORKFLOW_PARAMS].append({
                "key":
                self.KEY_PAUSED,
                "value":
                1
            })
        for key in (self.KEY_INITIAL_CYCLE_POINT, self.KEY_FINAL_CYCLE_POINT,
                    self.KEY_START_CYCLE_POINT, self.KEY_STOP_CYCLE_POINT,
                    self.KEY_RUN_MODE, self.KEY_CYCLE_POINT_TIME_ZONE):
            value = getattr(schd.options, key, None)
            if value is not None and value != 'ignore':
                self.db_inserts_map[self.TABLE_WORKFLOW_PARAMS].append({
                    "key":
                    key,
                    "value":
                    value
                })
        for key in (self.KEY_STOP_CLOCK_TIME, self.KEY_STOP_TASK):
            value = getattr(schd, key, None)
            if value is not None:
                self.db_inserts_map[self.TABLE_WORKFLOW_PARAMS].append({
                    "key":
                    key,
                    "value":
                    value
                })
Ejemplo n.º 3
0
 def _test(utc_mode, expected, expected_warnings=0):
     mock_glbl_cfg(
         'cylc.flow.config.glbl_cfg', f'''
         [cylc]
             UTC mode = {utc_mode['glbl']}
         ''')
     mock_config = Mock()
     mock_config.cfg = {'cylc': {'UTC mode': utc_mode['suite']}}
     mock_config.options.utc_mode = utc_mode['stored']
     SuiteConfig.process_utc_mode(mock_config)
     assert mock_config.cfg['cylc']['UTC mode'] is expected
     assert get_utc_mode() is expected
     assert len(caplog.record_tuples) == expected_warnings
     caplog.clear()
Ejemplo n.º 4
0
    def put_suite_params(self, schd):
        """Put various suite parameters from schd in runtime database.

        This method queues the relevant insert statements.

        Arguments:
            schd (cylc.flow.scheduler.Scheduler): scheduler object.
        """
        self.db_deletes_map[self.TABLE_SUITE_PARAMS].append({})
        if schd.config.final_point is None:
            # Store None as proper null value in database. No need to do this
            # for initial cycle point, which should never be None.
            final_point_str = None
        else:
            final_point_str = str(schd.config.final_point)
        self.db_inserts_map[self.TABLE_SUITE_PARAMS].extend([
            {"key": self.KEY_UUID_STR, "value": str(schd.uuid_str)},
            {"key": "cylc_version", "value": CYLC_VERSION},
            {"key": "UTC_mode", "value": get_utc_mode()},
        ])
        if schd.config.cfg['cylc']['cycle point format']:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": "cycle_point_format",
                "value": schd.config.cfg['cylc']['cycle point format']})
        if schd.pool.is_held:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": self.KEY_HOLD, "value": 1})
        for key in (
            self.KEY_INITIAL_CYCLE_POINT,
            self.KEY_FINAL_CYCLE_POINT,
            self.KEY_START_CYCLE_POINT,
            self.KEY_STOP_CYCLE_POINT,
            self.KEY_RUN_MODE,
        ):
            value = getattr(schd.options, key, None)
            if value is not None:
                self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                    "key": key, "value": value})
        if schd.options.no_auto_shutdown is not None:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": self.KEY_NO_AUTO_SHUTDOWN,
                "value": int(schd.options.no_auto_shutdown)})
        for key in (self.KEY_STOP_CLOCK_TIME, self.KEY_STOP_TASK):
            value = getattr(schd, key, None)
            if value is not None:
                self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                    "key": key, "value": value})
Ejemplo n.º 5
0
    def put_suite_params(self, schd):
        """Put various suite parameters from schd in runtime database.

        This method queues the relevant insert statements.

        Arguments:
            schd (cylc.flow.scheduler.Scheduler): scheduler object.
        """
        self.db_deletes_map[self.TABLE_SUITE_PARAMS].append({})
        self.db_inserts_map[self.TABLE_SUITE_PARAMS].extend([
            {"key": self.KEY_UUID_STR, "value": str(schd.uuid_str)},
            {"key": self.KEY_CYLC_VERSION, "value": CYLC_VERSION},
            {"key": self.KEY_UTC_MODE, "value": get_utc_mode()},
        ])
        if schd.config.cycle_point_dump_format is not None:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": self.KEY_CYCLE_POINT_FORMAT,
                "value": schd.config.cycle_point_dump_format})
        if schd.pool.is_held:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": self.KEY_HOLD, "value": 1})
        for key in (
            self.KEY_INITIAL_CYCLE_POINT,
            self.KEY_FINAL_CYCLE_POINT,
            self.KEY_START_CYCLE_POINT,
            self.KEY_STOP_CYCLE_POINT,
            self.KEY_RUN_MODE,
            self.KEY_CYCLE_POINT_TIME_ZONE
        ):
            value = getattr(schd.options, key, None)
            if value is not None:
                self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                    "key": key, "value": value})
        if schd.options.no_auto_shutdown is not None:
            self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                "key": self.KEY_NO_AUTO_SHUTDOWN,
                "value": int(schd.options.no_auto_shutdown)})
        for key in (self.KEY_STOP_CLOCK_TIME, self.KEY_STOP_TASK):
            value = getattr(schd, key, None)
            if value is not None:
                self.db_inserts_map[self.TABLE_SUITE_PARAMS].append({
                    "key": key, "value": value})
Ejemplo n.º 6
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                    self.job_pool.add_job_msg(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num),
                        self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name'])
                    and not is_remote_host(host)):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info('[%s] -submit-num=%02d, owner@host=%s', itask,
                         itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(
                    itask, {
                        'is_manual_submit': itask.is_manual_submit,
                        'try_num': itask.get_try_num(),
                        'time_submit': now_str,
                        'user_at_host': owner_at_host,
                        'batch_sys_name': itask.summary['batch_sys_name'],
                    })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(self.JOBS_SUBMIT,
                                       '(init %s)' % owner_at_host,
                                       err=REMOTE_INIT_FAILED,
                                       ret_code=1), suite, itask.point,
                        itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [('host', host, is_remote_host),
                                          ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(get_remote_suite_run_job_dir(host, owner, suite))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size]
                for i in range(0, len(itasks), chunk_size)
            ]
            LOG.debug('%s ... # will invoke in batches, sizes=%s', cmd,
                      [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(suite, itask.point,
                                                 itask.tdef.name,
                                                 itask.submit_num))
                    job_log_dirs.append(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(self.JOBS_SUBMIT,
                                   cmd + job_log_dirs,
                                   stdin_files=stdin_files,
                                   job_log_dirs=job_log_dirs,
                                   **kwargs), self._submit_task_jobs_callback,
                    [suite, itasks_batch])
        return done_tasks
Ejemplo n.º 7
0
    def submit_task_jobs(self, suite, itasks, is_simulation=False):
        """Prepare and submit task jobs.

        Submit tasks where possible. Ignore tasks that are waiting for host
        select command to complete, or tasks that are waiting for remote
        initialisation. Bad host select command, error writing to a job file or
        bad remote initialisation will cause a bad task - leading to submission
        failure.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.remote_host_select_reset()

        if not prepared_tasks:
            return bad_tasks

        # Group task jobs by (host, owner)
        auth_itasks = {}  # {(host, owner): [itask, ...], ...}
        for itask in prepared_tasks:
            auth_itasks.setdefault((itask.task_host, itask.task_owner), [])
            auth_itasks[(itask.task_host, itask.task_owner)].append(itask)
        # Submit task jobs for each (host, owner) group
        done_tasks = bad_tasks
        for (host, owner), itasks in sorted(auth_itasks.items()):
            is_init = self.task_remote_mgr.remote_init(host, owner)
            if is_init is None:
                # Remote is waiting to be initialised
                for itask in itasks:
                    itask.set_summary_message(self.REMOTE_INIT_MSG)
                continue
            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            if (
                self.batch_sys_mgr.is_job_local_to_host(
                    itask.summary['batch_sys_name']) and
                not is_remote_host(host)
            ):
                owner_at_host = get_host()
            else:
                owner_at_host = host
            # Persist
            if owner:
                owner_at_host = owner + '@' + owner_at_host
            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info(
                    '[%s] -submit-num=%02d, owner@host=%s',
                    itask, itask.submit_num, owner_at_host)
                self.suite_db_mgr.put_insert_task_jobs(itask, {
                    'is_manual_submit': itask.is_manual_submit,
                    'try_num': itask.get_try_num(),
                    'time_submit': now_str,
                    'user_at_host': owner_at_host,
                    'batch_sys_name': itask.summary['batch_sys_name'],
                })
                itask.is_manual_submit = False
            if is_init == REMOTE_INIT_FAILED:
                # Remote has failed to initialise
                # Set submit-failed for all affected tasks
                for itask in itasks:
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(
                            self.JOBS_SUBMIT,
                            '(init %s)' % owner_at_host,
                            err=REMOTE_INIT_FAILED,
                            ret_code=1),
                        suite, itask.point, itask.tdef.name)
                    self.task_events_mgr.process_message(
                        itask, CRITICAL,
                        self.task_events_mgr.EVENT_SUBMIT_FAILED)
                continue
            # Build the "cylc jobs-submit" command
            cmd = ['cylc', self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            remote_mode = False
            kwargs = {}
            for key, value, test_func in [
                    ('host', host, is_remote_host),
                    ('user', owner, is_remote_user)]:
                if test_func(value):
                    cmd.append('--%s=%s' % (key, value))
                    remote_mode = True
                    kwargs[key] = value
            if remote_mode:
                cmd.append('--remote-mode')
            cmd.append('--')
            cmd.append(glbl_cfg().get_derived_host_item(
                suite, 'suite job log directory', host, owner))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = len(itasks) // ((len(itasks) // 100) + 1) + 1
            itasks_batches = [
                itasks[i:i + chunk_size] for i in range(0,
                                                        len(itasks),
                                                        chunk_size)]
            LOG.debug(
                '%s ... # will invoke in batches, sizes=%s',
                cmd, [len(b) for b in itasks_batches])
            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            get_task_job_job_log(
                                suite, itask.point, itask.tdef.name,
                                itask.submit_num))
                    job_log_dirs.append(get_task_job_id(
                        itask.point, itask.tdef.name, itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    itask.state.reset_state(TASK_STATUS_READY)
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)
                self.proc_pool.put_command(
                    SubProcContext(
                        self.JOBS_SUBMIT,
                        cmd + job_log_dirs,
                        stdin_files=stdin_files,
                        job_log_dirs=job_log_dirs,
                        **kwargs
                    ),
                    self._submit_task_jobs_callback, [suite, itasks_batch])
        return done_tasks
Ejemplo n.º 8
0
    def generate_definition_elements(self):
        """Generate static definition data elements.

        Populates the tasks, families, and workflow elements
        with data from and/or derived from the workflow definition.

        """
        config = self.schd.config
        update_time = time()
        tasks = self.added[TASKS]
        families = self.added[FAMILIES]
        workflow = self.added[WORKFLOW]
        workflow.id = self.workflow_id
        workflow.last_updated = update_time
        workflow.stamp = f'{workflow.id}@{workflow.last_updated}'

        graph = workflow.edges
        graph.leaves[:] = config.leaves
        graph.feet[:] = config.feet
        for key, info in config.suite_polling_tasks.items():
            graph.workflow_polling_tasks.add(
                local_proxy=key,
                workflow=info[0],
                remote_proxy=info[1],
                req_state=info[2],
                graph_string=info[3],
            )

        ancestors = config.get_first_parent_ancestors()
        descendants = config.get_first_parent_descendants()
        parents = config.get_parent_lists()

        # Create definition elements for graphed tasks.
        for name, tdef in config.taskdefs.items():
            t_id = f'{self.workflow_id}{ID_DELIM}{name}'
            t_stamp = f'{t_id}@{update_time}'
            task = PbTask(
                stamp=t_stamp,
                id=t_id,
                name=name,
                depth=len(ancestors[name]) - 1,
            )
            task.namespace[:] = tdef.namespace_hierarchy
            task.first_parent = (
                f'{self.workflow_id}{ID_DELIM}{ancestors[name][1]}')
            user_defined_meta = {}
            for key, val in dict(tdef.describe()).items():
                if key in ['title', 'description', 'URL']:
                    setattr(task.meta, key, val)
                else:
                    user_defined_meta[key] = val
            task.meta.user_defined = json.dumps(user_defined_meta)
            elapsed_time = task_mean_elapsed_time(tdef)
            if elapsed_time:
                task.mean_elapsed_time = elapsed_time
            task.parents.extend([
                f'{self.workflow_id}{ID_DELIM}{p_name}'
                for p_name in parents[name]
            ])
            tasks[t_id] = task

        # Created family definition elements for first parent
        # ancestors of graphed tasks.
        for key, names in ancestors.items():
            for name in names:
                if (key == name or name in families):
                    continue
                f_id = f'{self.workflow_id}{ID_DELIM}{name}'
                f_stamp = f'{f_id}@{update_time}'
                family = PbFamily(
                    stamp=f_stamp,
                    id=f_id,
                    name=name,
                    depth=len(ancestors[name]) - 1,
                )
                famcfg = config.cfg['runtime'][name]
                user_defined_meta = {}
                for key, val in famcfg.get('meta', {}).items():
                    if key in ['title', 'description', 'URL']:
                        setattr(family.meta, key, val)
                    else:
                        user_defined_meta[key] = val
                family.meta.user_defined = json.dumps(user_defined_meta)
                family.parents.extend([
                    f'{self.workflow_id}{ID_DELIM}{p_name}'
                    for p_name in parents[name]
                ])
                try:
                    family.first_parent = (
                        f'{self.workflow_id}{ID_DELIM}{ancestors[name][1]}')
                except IndexError:
                    pass
                families[f_id] = family

        for name, parent_list in parents.items():
            if not parent_list:
                continue
            fam = parent_list[0]
            f_id = f'{self.workflow_id}{ID_DELIM}{fam}'
            if f_id in families:
                ch_id = f'{self.workflow_id}{ID_DELIM}{name}'
                if name in config.taskdefs:
                    families[f_id].child_tasks.append(ch_id)
                else:
                    families[f_id].child_families.append(ch_id)

        # Populate static fields of workflow
        workflow.api_version = API
        workflow.cylc_version = CYLC_VERSION
        workflow.name = self.schd.suite
        workflow.owner = self.schd.owner
        workflow.host = self.schd.host
        workflow.port = self.schd.port or -1
        workflow.pub_port = self.schd.pub_port or -1
        user_defined_meta = {}
        for key, val in config.cfg['meta'].items():
            if key in ['title', 'description', 'URL']:
                setattr(workflow.meta, key, val)
            else:
                user_defined_meta[key] = val
        workflow.meta.user_defined = json.dumps(user_defined_meta)
        workflow.tree_depth = max([
            len(val)
            for val in config.get_first_parent_ancestors(pruned=True).values()
        ]) - 1

        if get_utc_mode():
            time_zone_info = TIME_ZONE_UTC_INFO
        else:
            time_zone_info = TIME_ZONE_LOCAL_INFO
        for key, val in time_zone_info.items():
            setattr(workflow.time_zone_info, key, val)

        workflow.run_mode = config.run_mode()
        workflow.cycling_mode = config.cfg['scheduling']['cycling mode']
        workflow.workflow_log_dir = self.schd.suite_log_dir
        workflow.job_log_names.extend(list(JOB_LOG_OPTS.values()))
        workflow.ns_def_order.extend(config.ns_defn_order)

        workflow.broadcasts = json.dumps(self.schd.broadcast_mgr.broadcasts)

        workflow.tasks.extend(list(tasks))
        workflow.families.extend(list(families))

        self.ancestors = ancestors
        self.descendants = descendants
        self.parents = parents
Ejemplo n.º 9
0
    def update(self, schd):
        """Update."""
        self.update_time = time()
        global_summary = {}
        family_summary = {}

        task_summary, task_states = self._get_tasks_info(schd)

        all_states = []
        ancestors_dict = schd.config.get_first_parent_ancestors()

        # Compute state_counts (total, and per cycle).
        state_count_totals = {}
        state_count_cycles = {}

        for point_string, c_task_states in task_states.items():
            # For each cycle point, construct a family state tree
            # based on the first-parent single-inheritance tree

            c_fam_task_states = {}

            count = {}

            for key in c_task_states:
                state = c_task_states[key]
                if state is None:
                    continue
                try:
                    count[state] += 1
                except KeyError:
                    count[state] = 1

                all_states.append(state)
                for parent in ancestors_dict.get(key, []):
                    if parent == key:
                        continue
                    c_fam_task_states.setdefault(parent, set([]))
                    c_fam_task_states[parent].add(state)

            state_count_cycles[point_string] = count

            for fam, child_states in c_fam_task_states.items():
                f_id = TaskID.get(fam, point_string)
                state = extract_group_state(child_states)
                if state is None:
                    continue
                try:
                    famcfg = schd.config.cfg['runtime'][fam]['meta']
                except KeyError:
                    famcfg = {}
                description = famcfg.get('description')
                title = famcfg.get('title')
                family_summary[f_id] = {'name': fam,
                                        'description': description,
                                        'title': title,
                                        'label': point_string,
                                        'state': state}

        state_count_totals = {}
        for point_string, count in list(state_count_cycles.items()):
            for state, state_count in count.items():
                state_count_totals.setdefault(state, 0)
                state_count_totals[state] += state_count

        all_states.sort()

        for key, value in (
                ('oldest cycle point string', schd.pool.get_min_point()),
                ('newest cycle point string', schd.pool.get_max_point()),
                ('newest runahead cycle point string',
                 schd.pool.get_max_point_runahead())):
            if value:
                global_summary[key] = str(value)
            else:
                global_summary[key] = None
        if get_utc_mode():
            global_summary['time zone info'] = TIME_ZONE_UTC_INFO
        else:
            global_summary['time zone info'] = TIME_ZONE_LOCAL_INFO
        global_summary['last_updated'] = self.update_time
        global_summary['run_mode'] = schd.config.run_mode()
        global_summary['states'] = all_states
        global_summary['namespace definition order'] = (
            schd.config.ns_defn_order)
        global_summary['reloading'] = schd.pool.do_reload
        global_summary['state totals'] = state_count_totals
        # Extract suite and task URLs from config.
        global_summary['suite_urls'] = dict(
            (i, j['meta']['URL'])
            for (i, j) in schd.config.cfg['runtime'].items())
        global_summary['suite_urls']['suite'] = schd.config.cfg['meta']['URL']

        # Construct a suite status string for use by monitoring clients.
        status, status_msg = get_suite_status(schd)
        global_summary['status'] = str(status)
        global_summary['status_string'] = status_msg

        # Replace the originals (atomic update, for access from other threads).
        self.task_summary = task_summary
        self.global_summary = global_summary
        self.family_summary = family_summary
        self.state_count_totals = state_count_totals
        self.state_count_cycles = state_count_cycles
Ejemplo n.º 10
0
    def generate_definition_elements(self):
        """Generate static definition data elements"""
        config = self.schd.config
        update_time = time()
        tasks = {}
        families = {}
        workflow = PbWorkflow(
            checksum=f"{self.workflow_id}@{update_time}",
            id=self.workflow_id,
        )

        ancestors = config.get_first_parent_ancestors()
        descendants = config.get_first_parent_descendants()
        parents = config.get_parent_lists()

        # create task definition data objects
        for name, tdef in config.taskdefs.items():
            t_id = f"{self.workflow_id}/{name}"
            t_check = f"{name}@{update_time}"
            task = PbTask(
                checksum=t_check,
                id=t_id,
                name=name,
                depth=len(ancestors[name]) - 1,
            )
            task.namespace[:] = tdef.namespace_hierarchy
            for key, val in dict(tdef.describe()).items():
                if key in ['title', 'description', 'url']:
                    setattr(task.meta, key, val)
                else:
                    task.meta.user_defined.append(f"{key}={val}")
            ntimes = len(tdef.elapsed_times)
            if ntimes:
                task.mean_elapsed_time = sum(tdef.elapsed_times) / ntimes
            elif tdef.rtconfig['job']['execution time limit']:
                task.mean_elapsed_time = \
                    tdef.rtconfig['job']['execution time limit']
            tasks[name] = task

        # family definition data objects creation
        for name in ancestors.keys():
            if name in config.taskdefs.keys():
                continue
            f_id = f"{self.workflow_id}/{name}"
            f_check = f"{name}@{update_time}"
            family = PbFamily(
                checksum=f_check,
                id=f_id,
                name=name,
                depth=len(ancestors[name]) - 1,
            )
            famcfg = config.cfg['runtime'][name]
            for key, val in famcfg.get('meta', {}).items():
                if key in ['title', 'description', 'url']:
                    setattr(family.meta, key, val)
                else:
                    family.meta.user_defined.append(f"{key}={val}")
            family.parents.extend(
                [f"{self.workflow_id}/{p_name}"
                    for p_name in parents[name]])
            families[name] = family

        for name, parent_list in parents.items():
            if parent_list and parent_list[0] in families:
                ch_id = f"{self.workflow_id}/{name}"
                if name in config.taskdefs:
                    families[parent_list[0]].child_tasks.append(ch_id)
                else:
                    families[parent_list[0]].child_families.append(ch_id)

        workflow.api_version = self.schd.server.API
        workflow.cylc_version = CYLC_VERSION
        workflow.name = self.schd.suite
        workflow.owner = self.schd.owner
        workflow.host = self.schd.host
        workflow.port = self.schd.port
        for key, val in config.cfg['meta'].items():
            if key in ['title', 'description', 'URL']:
                setattr(workflow.meta, key, val)
            else:
                workflow.meta.user_defined.append(f"{key}={val}")
        workflow.tree_depth = max(
            [len(val) for key, val in ancestors.items()]) - 1

        if get_utc_mode():
            time_zone_info = TIME_ZONE_UTC_INFO
        else:
            time_zone_info = TIME_ZONE_LOCAL_INFO
        for key, val in time_zone_info.items():
            setattr(workflow.time_zone_info, key, val)

        workflow.last_updated = update_time
        workflow.run_mode = config.run_mode()
        workflow.cycling_mode = config.cfg['scheduling']['cycling mode']
        workflow.workflow_log_dir = self.schd.suite_log_dir
        workflow.job_log_names.extend(list(JOB_LOG_OPTS.values()))
        workflow.ns_defn_order.extend(config.ns_defn_order)

        workflow.tasks.extend([t.id for t in tasks.values()])
        workflow.families.extend([f.id for f in families.values()])

        # replace the originals (atomic update, for access from other threads).
        self.ancestors = ancestors
        self.descendants = descendants
        self.parents = parents
        self.tasks = tasks
        self.families = families
        self.workflow = workflow
Ejemplo n.º 11
0
    def update(self, schd):
        """Update."""
        self.update_time = time()
        global_summary = {}
        family_summary = {}

        task_summary, task_states = self._get_tasks_info(schd)

        all_states = []
        ancestors_dict = schd.config.get_first_parent_ancestors()

        # Compute state_counts (total, and per cycle).
        state_count_totals = {}
        state_count_cycles = {}

        for point_string, c_task_states in task_states.items():
            # For each cycle point, construct a family state tree
            # based on the first-parent single-inheritance tree

            c_fam_task_states = {}

            count = {}

            for key in c_task_states:
                state = c_task_states[key]
                if state is None:
                    continue
                try:
                    count[state] += 1
                except KeyError:
                    count[state] = 1

                all_states.append(state)
                for parent in ancestors_dict.get(key, []):
                    if parent == key:
                        continue
                    c_fam_task_states.setdefault(parent, set([]))
                    c_fam_task_states[parent].add(state)

            state_count_cycles[point_string] = count

            for fam, child_states in c_fam_task_states.items():
                f_id = TaskID.get(fam, point_string)
                state = extract_group_state(child_states)
                if state is None:
                    continue
                try:
                    famcfg = schd.config.cfg['runtime'][fam]['meta']
                except KeyError:
                    famcfg = {}
                description = famcfg.get('description')
                title = famcfg.get('title')
                family_summary[f_id] = {'name': fam,
                                        'description': description,
                                        'title': title,
                                        'label': point_string,
                                        'state': state}

        state_count_totals = {}
        for point_string, count in list(state_count_cycles.items()):
            for state, state_count in count.items():
                state_count_totals.setdefault(state, 0)
                state_count_totals[state] += state_count

        all_states.sort()

        for key, value in (
                ('oldest cycle point string', schd.pool.get_min_point()),
                ('newest cycle point string', schd.pool.get_max_point()),
                ('newest runahead cycle point string',
                 schd.pool.get_max_point_runahead())):
            if value:
                global_summary[key] = str(value)
            else:
                global_summary[key] = None
        if get_utc_mode():
            global_summary['time zone info'] = TIME_ZONE_UTC_INFO
        else:
            global_summary['time zone info'] = TIME_ZONE_LOCAL_INFO
        global_summary['last_updated'] = self.update_time
        global_summary['run_mode'] = schd.run_mode
        global_summary['states'] = all_states
        global_summary['namespace definition order'] = (
            schd.config.ns_defn_order)
        global_summary['reloading'] = schd.pool.do_reload
        global_summary['state totals'] = state_count_totals
        # Extract suite and task URLs from config.
        global_summary['suite_urls'] = dict(
            (i, j['meta']['URL'])
            for (i, j) in schd.config.cfg['runtime'].items())
        global_summary['suite_urls']['suite'] = schd.config.cfg['meta']['URL']

        # Construct a suite status string for use by monitoring clients.
        if schd.pool.is_held:
            global_summary['status_string'] = SUITE_STATUS_HELD
        elif schd.stop_mode is not None:
            global_summary['status_string'] = SUITE_STATUS_STOPPING
        elif schd.pool.hold_point:
            global_summary['status_string'] = (
                SUITE_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point)
        elif schd.stop_point:
            global_summary['status_string'] = (
                SUITE_STATUS_RUNNING_TO_STOP % schd.stop_point)
        elif schd.stop_clock_time is not None:
            global_summary['status_string'] = (
                SUITE_STATUS_RUNNING_TO_STOP % schd.stop_clock_time_string)
        elif schd.stop_task:
            global_summary['status_string'] = (
                SUITE_STATUS_RUNNING_TO_STOP % schd.stop_task)
        elif schd.final_point:
            global_summary['status_string'] = (
                SUITE_STATUS_RUNNING_TO_STOP % schd.final_point)
        else:
            global_summary['status_string'] = SUITE_STATUS_RUNNING

        # Replace the originals (atomic update, for access from other threads).
        self.task_summary = task_summary
        self.global_summary = global_summary
        self.family_summary = family_summary
        self.state_count_totals = state_count_totals
        self.state_count_cycles = state_count_cycles
Ejemplo n.º 12
0
    def generate_definition_elements(self):
        """Generate static definition data elements.

        Populates the tasks, families, and workflow elements
        with data from and/or derived from the workflow definition.

        """
        config = self.schd.config
        update_time = time()
        tasks = {}
        families = {}
        workflow = PbWorkflow(
            stamp=f'{self.workflow_id}@{update_time}',
            id=self.workflow_id,
        )

        graph = self.data[self.workflow_id][GRAPH]
        graph.leaves[:] = config.leaves
        graph.feet[:] = config.feet
        for key, info in config.suite_polling_tasks.items():
            graph.workflow_polling_tasks.add(
                local_proxy=key,
                workflow=info[0],
                remote_proxy=info[1],
                req_state=info[2],
                graph_string=info[3],
            )

        ancestors = config.get_first_parent_ancestors()
        descendants = config.get_first_parent_descendants()
        parents = config.get_parent_lists()

        # Create task definition elements.
        for name, tdef in config.taskdefs.items():
            t_id = f'{self.workflow_id}{ID_DELIM}{name}'
            t_check = f'{t_id}@{update_time}'
            task = PbTask(
                stamp=t_check,
                id=t_id,
                name=name,
                depth=len(ancestors[name]) - 1,
            )
            task.namespace[:] = tdef.namespace_hierarchy
            for key, val in dict(tdef.describe()).items():
                if key in ['title', 'description', 'url']:
                    setattr(task.meta, key, val)
                else:
                    task.meta.user_defined.append(f'{key}={val}')
            elapsed_time = task_mean_elapsed_time(tdef)
            if elapsed_time:
                task.mean_elapsed_time = elapsed_time
            tasks[t_id] = task

        # Created family definition elements.
        for name in ancestors.keys():
            if name in config.taskdefs.keys():
                continue
            f_id = f'{self.workflow_id}{ID_DELIM}{name}'
            f_check = f'{f_id}@{update_time}'
            family = PbFamily(
                stamp=f_check,
                id=f_id,
                name=name,
                depth=len(ancestors[name]) - 1,
            )
            famcfg = config.cfg['runtime'][name]
            for key, val in famcfg.get('meta', {}).items():
                if key in ['title', 'description', 'url']:
                    setattr(family.meta, key, val)
                else:
                    family.meta.user_defined.append(f'{key}={val}')
            family.parents.extend([
                f'{self.workflow_id}{ID_DELIM}{p_name}'
                for p_name in parents[name]
            ])
            families[f_id] = family

        for name, parent_list in parents.items():
            if not parent_list:
                continue
            fam = parent_list[0]
            f_id = f'{self.workflow_id}{ID_DELIM}{fam}'
            if f_id in families:
                ch_id = f'{self.workflow_id}{ID_DELIM}{name}'
                if name in config.taskdefs:
                    families[f_id].child_tasks.append(ch_id)
                else:
                    families[f_id].child_families.append(ch_id)

        # Populate static fields of workflow
        workflow.api_version = self.schd.server.API
        workflow.cylc_version = CYLC_VERSION
        workflow.name = self.schd.suite
        workflow.owner = self.schd.owner
        workflow.host = self.schd.host
        workflow.port = self.schd.port
        for key, val in config.cfg['meta'].items():
            if key in ['title', 'description', 'URL']:
                setattr(workflow.meta, key, val)
            else:
                workflow.meta.user_defined.append(f'{key}={val}')
        workflow.tree_depth = max([len(val)
                                   for key, val in ancestors.items()]) - 1

        if get_utc_mode():
            time_zone_info = TIME_ZONE_UTC_INFO
        else:
            time_zone_info = TIME_ZONE_LOCAL_INFO
        for key, val in time_zone_info.items():
            setattr(workflow.time_zone_info, key, val)

        workflow.last_updated = update_time
        workflow.run_mode = config.run_mode()
        workflow.cycling_mode = config.cfg['scheduling']['cycling mode']
        workflow.workflow_log_dir = self.schd.suite_log_dir
        workflow.job_log_names.extend(list(JOB_LOG_OPTS.values()))
        workflow.ns_defn_order.extend(config.ns_defn_order)

        workflow.tasks.extend(list(tasks))
        workflow.families.extend(list(families))

        # replace the originals (atomic update, for access from other threads).
        self.ancestors = ancestors
        self.descendants = descendants
        self.parents = parents
        self.data[self.workflow_id][TASKS] = tasks
        self.data[self.workflow_id][FAMILIES] = families
        self.data[self.workflow_id][WORKFLOW] = workflow
Ejemplo n.º 13
0
    def submit_task_jobs(self,
                         suite,
                         itasks,
                         curve_auth,
                         client_pub_key_dir,
                         is_simulation=False):
        """Prepare for job submission and submit task jobs.

        Preparation (host selection, remote host init, and remote install)
        is done asynchronously. Newly released tasks may be sent here several
        times until these init subprocesses have returned. Failure during
        preparation is considered to be job submission failure.

        Once preparation has completed or failed, reset .waiting_on_job_prep in
        task instances so the scheduler knows to stop sending them back here.

        This method uses prep_submit_task_job() as helper.

        Return (list): list of tasks that attempted submission.
        """
        if is_simulation:
            return self._simulation_submit_task_jobs(itasks)

        # Prepare tasks for job submission
        prepared_tasks, bad_tasks = self.prep_submit_task_jobs(suite, itasks)

        # Reset consumed host selection results
        self.task_remote_mgr.subshell_eval_reset()

        if not prepared_tasks:
            return bad_tasks
        auth_itasks = {}  # {platform: [itask, ...], ...}
        for itask in prepared_tasks:
            platform_name = itask.platform['name']
            auth_itasks.setdefault(platform_name, [])
            auth_itasks[platform_name].append(itask)
        # Submit task jobs for each platform
        done_tasks = bad_tasks

        for platform_name, itasks in sorted(auth_itasks.items()):
            platform = itasks[0].platform
            install_target = get_install_target_from_platform(platform)
            ri_map = self.task_remote_mgr.remote_init_map

            if (ri_map.get(install_target) != REMOTE_FILE_INSTALL_DONE):
                if install_target == get_localhost_install_target():
                    # Skip init and file install for localhost.
                    LOG.debug(f"REMOTE INIT NOT REQUIRED for {install_target}")
                    ri_map[install_target] = (REMOTE_FILE_INSTALL_DONE)

                elif install_target not in ri_map:
                    # Remote init not in progress for target, so start it.
                    self.task_remote_mgr.remote_init(platform, curve_auth,
                                                     client_pub_key_dir)
                    for itask in itasks:
                        itask.set_summary_message(self.REMOTE_INIT_MSG)
                        self.data_store_mgr.delta_job_msg(
                            get_task_job_id(itask.point, itask.tdef.name,
                                            itask.submit_num),
                            self.REMOTE_INIT_MSG)
                    continue

                elif (ri_map[install_target] == REMOTE_INIT_DONE):
                    # Already done remote init so move on to file install
                    self.task_remote_mgr.file_install(platform)
                    continue

                elif (ri_map[install_target] in self.IN_PROGRESS.keys()):
                    # Remote init or file install in progress.
                    for itask in itasks:
                        msg = self.IN_PROGRESS[ri_map[install_target]]
                        itask.set_summary_message(msg)
                        self.data_store_mgr.delta_job_msg(
                            get_task_job_id(itask.point, itask.tdef.name,
                                            itask.submit_num), msg)
                    continue

            # Ensure that localhost background/at jobs are recorded as running
            # on the host name of the current suite host, rather than just
            # "localhost". On suite restart on a different suite host, this
            # allows the restart logic to correctly poll the status of the
            # background/at jobs that may still be running on the previous
            # suite host.
            host = get_host_from_platform(platform)
            if (self.job_runner_mgr.is_job_local_to_host(
                    itask.summary['job_runner_name'])
                    and not is_remote_platform(platform)):
                host = get_host()

            now_str = get_current_time_string()
            done_tasks.extend(itasks)
            for itask in itasks:
                # Log and persist
                LOG.info('[%s] -submit-num=%02d, host=%s', itask,
                         itask.submit_num, host)
                self.suite_db_mgr.put_insert_task_jobs(
                    itask, {
                        'is_manual_submit': itask.is_manual_submit,
                        'try_num': itask.get_try_num(),
                        'time_submit': now_str,
                        'platform_name': itask.platform['name'],
                        'job_runner_name': itask.summary['job_runner_name'],
                    })
                itask.is_manual_submit = False

            if (ri_map[install_target]
                    in [REMOTE_INIT_FAILED, REMOTE_FILE_INSTALL_FAILED]):
                # Remote init or install failed. Set submit-failed for all
                # affected tasks and remove target from remote init map
                # - this enables new tasks to re-initialise that target
                init_error = (ri_map[install_target])
                del ri_map[install_target]
                for itask in itasks:
                    itask.waiting_on_job_prep = False
                    itask.local_job_file_path = None  # reset for retry
                    log_task_job_activity(
                        SubProcContext(self.JOBS_SUBMIT,
                                       '(init %s)' % host,
                                       err=init_error,
                                       ret_code=1), suite, itask.point,
                        itask.tdef.name)
                    self._prep_submit_task_job_error(suite, itask,
                                                     '(remote init)', '')

                continue
            # Build the "cylc jobs-submit" command
            cmd = [self.JOBS_SUBMIT]
            if LOG.isEnabledFor(DEBUG):
                cmd.append('--debug')
            if get_utc_mode():
                cmd.append('--utc-mode')
            if is_remote_platform(itask.platform):
                remote_mode = True
                cmd.append('--remote-mode')
            else:
                remote_mode = False
            if itask.platform['clean job submission environment']:
                cmd.append('--clean-env')
            for var in itask.platform[
                    'job submission environment pass-through']:
                cmd.append(f"--env={var}")
            for path in itask.platform[
                    'job submission executable paths'] + SYSPATH:
                cmd.append(f"--path={path}")
            cmd.append('--')
            cmd.append(get_remote_suite_run_job_dir(platform, suite))
            # Chop itasks into a series of shorter lists if it's very big
            # to prevent overloading of stdout and stderr pipes.
            itasks = sorted(itasks, key=lambda itask: itask.identity)
            chunk_size = (len(itasks) // (
                (len(itasks) // platform['max batch submit size']) + 1) + 1)
            itasks_batches = [
                itasks[i:i + chunk_size]
                for i in range(0, len(itasks), chunk_size)
            ]
            LOG.debug('%s ... # will invoke in batches, sizes=%s', cmd,
                      [len(b) for b in itasks_batches])

            if remote_mode:
                cmd = construct_ssh_cmd(cmd, platform)
            else:
                cmd = ['cylc'] + cmd

            for i, itasks_batch in enumerate(itasks_batches):
                stdin_files = []
                job_log_dirs = []
                for itask in itasks_batch:
                    if remote_mode:
                        stdin_files.append(
                            os.path.expandvars(
                                get_task_job_job_log(suite, itask.point,
                                                     itask.tdef.name,
                                                     itask.submit_num)))
                    job_log_dirs.append(
                        get_task_job_id(itask.point, itask.tdef.name,
                                        itask.submit_num))
                    # The job file is now (about to be) used: reset the file
                    # write flag so that subsequent manual retrigger will
                    # generate a new job file.
                    itask.local_job_file_path = None
                    if itask.state.outputs.has_custom_triggers():
                        self.suite_db_mgr.put_update_task_outputs(itask)

                    itask.waiting_on_job_prep = False
                self.proc_pool.put_command(
                    SubProcContext(
                        self.JOBS_SUBMIT,
                        cmd + job_log_dirs,
                        stdin_files=stdin_files,
                        job_log_dirs=job_log_dirs,
                    ), self._submit_task_jobs_callback, [suite, itasks_batch])
        return done_tasks