Ejemplo n.º 1
0
    def apply_action(self, action, fw_id):
        """
        Apply a FWAction on a Firework in the Workflow.

        Args:
            action (FWAction): action to apply
            fw_id (int): id of Firework on which to apply the action

        Returns:
            [int]: list of Firework ids that were updated or new
        """
        updated_ids = []

        # update the spec of the children FireWorks
        if action.update_spec:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].spec.update(action.update_spec)
                updated_ids.append(cfid)

        # update the spec of the children FireWorks using DictMod language
        if action.mod_spec:
            for cfid in self.links[fw_id]:
                for mod in action.mod_spec:
                    apply_mod(mod, self.id_fw[cfid].spec)
                    updated_ids.append(cfid)

        # defuse children
        if action.defuse_children:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].state = 'DEFUSED'
                self.fw_states[cfid] = 'DEFUSED'
                updated_ids.append(cfid)

        # defuse workflow
        if action.defuse_workflow:
            for fw_id in self.links.nodes:
                if self.id_fw[fw_id].state not in ['FIZZLED', 'COMPLETED']:
                    self.id_fw[fw_id].state = 'DEFUSED'
                    self.fw_states[fw_id] = 'DEFUSED'
                    updated_ids.append(fw_id)

        # add detour FireWorks. This should be done *before* additions
        if action.detours:
            for wf in action.detours:
                new_updates = self.append_wf(wf, [fw_id], detour=True, pull_spec_mods=False)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically detouring workflows!")
                updated_ids.extend(new_updates)

        # add additional FireWorks
        if action.additions:
            for wf in action.additions:
                new_updates = self.append_wf(wf, [fw_id], detour=False, pull_spec_mods=False)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically adding workflows!")
                updated_ids.extend(new_updates)

        return list(set(updated_ids))
Ejemplo n.º 2
0
    def apply_action(self, action, fw_id):
        updated_ids = []

        if action.command in ['CONTINUE', 'BREAK']:
            # Do nothing
            pass

        if action.command == 'DEFUSE':
            # mark all children as defused
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].state = 'DEFUSED'
                updated_ids.append(cfid)

        if action.command == 'MODIFY' or 'CREATE':
            for cfid in self.links[fw_id]:
                for mod in action.mod_spec.get('dict_mods', []):
                    apply_mod(mod, self.id_fw[cfid].spec)
                    updated_ids.append(cfid)

        if action.command == 'CREATE':
            create_fw = action.mod_spec['create_fw']
            self.links[fw_id].append(create_fw.fw_id)
            self.links[create_fw.fw_id] = [
            ]  # TODO: allow this to be children of original FW
            self.id_fw[create_fw.fw_id] = create_fw
            updated_ids.append(create_fw.fw_id)

        # TODO: implement the remaining actions
        return updated_ids
Ejemplo n.º 3
0
    def apply_action(self, action, fw_id):
        updated_ids = []

        if action.command in ['CONTINUE', 'BREAK']:
            # Do nothing
            pass

        if action.command == 'DEFUSE':
            # mark all children as defused
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].state = 'DEFUSED'
                updated_ids.append(cfid)

        if action.command == 'MODIFY' or 'CREATE':
            for cfid in self.links[fw_id]:
                for mod in action.mod_spec.get('dict_mods', []):
                    apply_mod(mod, self.id_fw[cfid].spec)
                    updated_ids.append(cfid)

        if action.command == 'CREATE':
            create_fw = action.mod_spec['create_fw']
            self.links[fw_id].append(create_fw.fw_id)
            self.links[create_fw.fw_id] = []  # TODO: allow this to be children of original FW
            self.id_fw[create_fw.fw_id] = create_fw
            updated_ids.append(create_fw.fw_id)

        # TODO: implement the remaining actions
        return updated_ids
Ejemplo n.º 4
0
    def apply_action(self, action, fw_id):
        """
        Apply a FWAction on a Firework in the Workflow.

        Args:
            action (FWAction): action to apply
            fw_id (int): id of Firework on which to apply the action

        Returns:
            [int]: list of Firework ids that were updated or new
        """
        updated_ids = []

        # update the spec of the children FireWorks
        if action.update_spec:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].spec.update(action.update_spec)
                updated_ids.append(cfid)

        # update the spec of the children FireWorks using DictMod language
        if action.mod_spec:
            for cfid in self.links[fw_id]:
                for mod in action.mod_spec:
                    apply_mod(mod, self.id_fw[cfid].spec)
                    updated_ids.append(cfid)

        # defuse children
        if action.defuse_children:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].state = 'DEFUSED'
                self.fw_states[cfid] = 'DEFUSED'
                updated_ids.append(cfid)

        # defuse workflow
        if action.defuse_workflow:
            for fw_id in self.links.nodes:
                if self.id_fw[fw_id].state not in ['FIZZLED', 'COMPLETED']:
                    self.id_fw[fw_id].state = 'DEFUSED'
                    self.fw_states[fw_id] = 'DEFUSED'
                    updated_ids.append(fw_id)

        # add detour FireWorks. This should be done *before* additions
        if action.detours:
            for wf in action.detours:
                new_updates = self.append_wf(wf, [fw_id], detour=True, pull_spec_mods=False)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically detouring workflows!")
                updated_ids.extend(new_updates)

        # add additional FireWorks
        if action.additions:
            for wf in action.additions:
                new_updates = self.append_wf(wf, [fw_id], detour=False, pull_spec_mods=False)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically adding workflows!")
                updated_ids.extend(new_updates)

        return list(set(updated_ids))
Ejemplo n.º 5
0
    def add_wf_to_fws(self, new_wf, fw_ids, pull_spec_mods=True, detour=False):
        """
        Internal method to add a workflow as a child to a Firework
        Note: detours must have children that have STATE_RANK that is WAITING or below

        :param new_wf: (Workflow) New Workflow to add
        :param fw_ids: ([int]) ids of the parent Fireworks on which to add the Workflow
        :param pull_spec_mods: (bool) pull spec mods of COMPLETED parents
        :param detour: (bool) add children of the current Firework to the Workflow's leaves
        :return: ([int]) list of Firework ids that were updated or new
        """
        updated_ids = []

        root_ids = new_wf.root_fw_ids
        leaf_ids = new_wf.leaf_fw_ids

        for new_fw in new_wf.fws:
            if new_fw.fw_id >= 0:  # note - this is also used later in the 'detour' code
                raise ValueError(
                    'FireWorks to add must use a negative fw_id! Got fw_id: '
                    '{}'.format(
                        new_fw.fw_id))

            self.id_fw[new_fw.fw_id] = new_fw  # add new_fw to id_fw

            for fw_id in fw_ids:
                if new_fw.fw_id in leaf_ids:
                    if detour:
                        # make sure all of these links are WAITING, else the DETOUR is not well defined
                        ready_run = [(f >= 0 and Firework.STATE_RANKS[self.fw_states[f]] > 1) for f in self.links[fw_id]]
                        if any(ready_run):
                            raise ValueError("Detour option only works if all children of detours are not READY to run and have not already run")
                        self.links[new_fw.fw_id] = [f for f in self.links[fw_id] if f >= 0]  # add children of current FW to new FW
                    else:
                        self.links[new_fw.fw_id] = []
                else:
                    self.links[new_fw.fw_id] = new_wf.links[new_fw.fw_id]
                updated_ids.append(new_fw.fw_id)

        for fw_id in fw_ids:
            for root_id in root_ids:
                self.links[fw_id].append(root_id)  # add the root id as my child
                if pull_spec_mods:  # re-apply some actions of the parent
                    m_fw = self.id_fw[fw_id]  # get the parent FW
                    m_launch = self._get_representative_launch(m_fw)  # get Launch of parent
                    if m_launch:
                        # pull spec update
                        if m_launch.state == 'COMPLETED' and m_launch.action.update_spec:
                            new_wf.id_fw[root_id].spec.update(m_launch.action.update_spec)
                        # pull spec mods
                        if m_launch.state == 'COMPLETED' and m_launch.action.mod_spec:
                            for mod in m_launch.action.mod_spec:
                                apply_mod(mod, new_wf.id_fw[root_id].spec)

        for new_fw in new_wf.fws:
            updated_ids = self.refresh(new_fw.fw_id, set(updated_ids))

        return updated_ids
Ejemplo n.º 6
0
    def apply_action(self, action, fw_id):
        """
        Apply a FWAction on a Firework in the Workflow

        :param action: (FWAction) action to apply
        :param fw_id: (int) id of Firework on which to apply the action
        :return: ([int]) list of Firework ids that were updated or new
        """

        updated_ids = []

        # update the spec of the children FireWorks
        if action.update_spec:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].spec.update(action.update_spec)
                updated_ids.append(cfid)

        # update the spec of the children FireWorks using DictMod language
        if action.mod_spec:
            for cfid in self.links[fw_id]:
                for mod in action.mod_spec:
                    apply_mod(mod, self.id_fw[cfid].spec)
                    updated_ids.append(cfid)

        # defuse children
        if action.defuse_children:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].state = 'DEFUSED'
                updated_ids.append(cfid)

        # add detour FireWorks
        # this should be done *before* additions
        if action.detours:
            for wf in action.detours:
                new_updates = self.add_wf_to_fws(wf, [fw_id],
                                                 pull_spec_mods=False,
                                                 detour=True)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically detouring workflows!"
                    )
                updated_ids.extend(new_updates)

        # add additional FireWorks
        if action.additions:
            for wf in action.additions:
                new_updates = self.add_wf_to_fws(wf, [fw_id],
                                                 pull_spec_mods=False,
                                                 detour=False)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically adding workflows!"
                    )
                updated_ids.extend(new_updates)

        return list(set(updated_ids))
Ejemplo n.º 7
0
    def apply_action(self, action, fw_id):
        """
        Apply a FWAction on a Firework in the Workflow

        :param action: (FWAction) action to apply
        :param fw_id: (int) id of Firework on which to apply the action
        :return: ([int]) list of Firework ids that were updated or new
        """

        updated_ids = []

        # update the spec of the children FireWorks
        if action.update_spec:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].spec.update(action.update_spec)
                updated_ids.append(cfid)

        # update the spec of the children FireWorks using DictMod language
        if action.mod_spec:
            for cfid in self.links[fw_id]:
                for mod in action.mod_spec:
                    apply_mod(mod, self.id_fw[cfid].spec)
                    updated_ids.append(cfid)

        # defuse children
        if action.defuse_children:
            for cfid in self.links[fw_id]:
                self.id_fw[cfid].state = 'DEFUSED'
                updated_ids.append(cfid)

        # add detour FireWorks
        # this should be done *before* additions
        if action.detours:
            for wf in action.detours:
                new_updates = self.add_wf_to_fws(wf, [fw_id], pull_spec_mods=False, detour=True)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically detouring workflows!")
                updated_ids.extend(new_updates)

        # add additional FireWorks
        if action.additions:
            for wf in action.additions:
                new_updates = self.add_wf_to_fws(wf, [fw_id], pull_spec_mods=False, detour=False)
                if len(set(updated_ids).intersection(new_updates)) > 0:
                    raise ValueError(
                        "Cannot use duplicated fw_ids when dynamically adding workflows!")
                updated_ids.extend(new_updates)

        return list(set(updated_ids))
Ejemplo n.º 8
0
    def run_task(self, fw_spec):

        incar_name = self.get("input_filename", "INCAR")
        incar = Incar.from_file(incar_name)

        incar_update = env_chk(self.get('incar_update'), fw_spec)
        incar_multiply = env_chk(self.get('incar_multiply'), fw_spec)
        incar_dictmod = env_chk(self.get('incar_dictmod'), fw_spec)

        if incar_update:
            incar.update(incar_update)

        if incar_multiply:
            for k in incar_multiply:
                incar[k] = incar[k] * incar_multiply[k]

        if incar_dictmod:
            apply_mod(incar_dictmod, incar)

        incar.write_file(self.get("output_filename", "INCAR"))
Ejemplo n.º 9
0
    def run_task(self, fw_spec):

        # load INCAR
        incar_name = self.get("input_filename", "INCAR")
        incar = Incar.from_file(incar_name)

        # process FireWork env values via env_chk
        incar_update = env_chk(self.get('incar_update'), fw_spec)
        incar_multiply = env_chk(self.get('incar_multiply'), fw_spec)
        incar_dictmod = env_chk(self.get('incar_dictmod'), fw_spec)

        if incar_update:
            incar.update(incar_update)

        if incar_multiply:
            for k in incar_multiply:
                incar[k] = incar[k] * incar_multiply[k]

        if incar_dictmod:
            apply_mod(incar_dictmod, incar)

        # write INCAR
        incar.write_file(self.get("output_filename", "INCAR"))
Ejemplo n.º 10
0
    def run_task(self, fw_spec):

        incar_name = self.get("input_filename", "INCAR")
        incar = Incar.from_file(incar_name)

        incar_update = env_chk(self.get('incar_update'), fw_spec)
        incar_multiply = env_chk(self.get('incar_multiply'), fw_spec)
        incar_dictmod = env_chk(self.get('incar_dictmod'), fw_spec)

        if incar_update:
            incar.update(incar_update)

        if incar_multiply:
            for k in incar_multiply:
                if hasattr(incar[k], '__iter__'):  # is list-like
                    incar[k] = list(np.multiply(incar[k], incar_multiply[k]))
                else:
                    incar[k] = incar[k] * incar_multiply[k]

        if incar_dictmod:
            apply_mod(incar_dictmod, incar)

        incar.write_file(self.get("output_filename", "INCAR"))
Ejemplo n.º 11
0
    def run_task(self, fw_spec):

        incar_name = self.get("input_filename", "INCAR")
        incar = Incar.from_file(incar_name)

        incar_update = env_chk(self.get("incar_update"), fw_spec)
        incar_multiply = env_chk(self.get("incar_multiply"), fw_spec)
        incar_dictmod = env_chk(self.get("incar_dictmod"), fw_spec)

        if incar_update:
            incar.update(incar_update)

        if incar_multiply:
            for k in incar_multiply:
                if hasattr(incar[k], "__iter__"):  # is list-like
                    incar[k] = list(np.multiply(incar[k], incar_multiply[k]))
                else:
                    incar[k] = incar[k] * incar_multiply[k]

        if incar_dictmod:
            apply_mod(incar_dictmod, incar)

        incar.write_file(self.get("output_filename", "INCAR"))
Ejemplo n.º 12
0
    def append_wf(self, new_wf, fw_ids, detour=False, pull_spec_mods=False):
        """
        Method to add a workflow as a child to a Firework
        Note: detours must have children that have STATE_RANK that is WAITING or below

        Args:
            new_wf (Workflow): New Workflow to add.
            fw_ids ([int]): ids of the parent Fireworks on which to add the Workflow.
            detour (bool): add children of the current Firework to the Workflow's leaves.
            pull_spec_mods (bool): pull spec mods of COMPLETED parents, refreshes the WF states.

        Returns:
            [int]: list of Firework ids that were updated or new
        """
        updated_ids = []

        root_ids = new_wf.root_fw_ids
        leaf_ids = new_wf.leaf_fw_ids

        # make sure detour runs do not link to ready/running/completed/etc. runs
        if detour:
            for fw_id in fw_ids:
                if fw_id in self.links:
                    # make sure all of these links are WAITING, else the DETOUR is not well defined
                    ready_run = [
                        (f >= 0
                         and Firework.STATE_RANKS[self.fw_states[f]] > 1)
                        for f in self.links[fw_id]
                    ]
                    if any(ready_run):
                        raise ValueError(
                            "fw_id: {}: Detour option only works if all children "
                            "of detours are not READY to run and have not "
                            "already run".format(fw_id))

        # make sure all new child fws have negative fw_id
        for new_fw in new_wf.fws:
            if new_fw.fw_id >= 0:  # note: this is also used later in the 'detour' code
                raise ValueError(
                    'FireWorks to add must use a negative fw_id! Got fw_id: {}'
                    .format(new_fw.fw_id))

        # completed checks - go ahead and append
        for new_fw in new_wf.fws:
            self.id_fw[new_fw.fw_id] = new_fw  # add new_fw to id_fw

            if new_fw.fw_id in leaf_ids:
                if detour:
                    for fw_id in fw_ids:
                        # add children of current FW to new FW
                        self.links[new_fw.fw_id] = [
                            f for f in self.links[fw_id] if f >= 0
                        ]
                else:
                    self.links[new_fw.fw_id] = []
            else:
                self.links[new_fw.fw_id] = new_wf.links[new_fw.fw_id]
            updated_ids.append(new_fw.fw_id)

        for fw_id in fw_ids:
            for root_id in root_ids:
                self.links[fw_id].append(
                    root_id)  # add the root id as my child
                if pull_spec_mods:  # re-apply some actions of the parent
                    m_fw = self.id_fw[fw_id]  # get the parent FW
                    m_launch = self._get_representative_launch(
                        m_fw)  # get Launch of parent
                    if m_launch:
                        # pull spec update
                        if m_launch.state == 'COMPLETED' and m_launch.action.update_spec:
                            new_wf.id_fw[root_id].spec.update(
                                m_launch.action.update_spec)
                        # pull spec mods
                        if m_launch.state == 'COMPLETED' and m_launch.action.mod_spec:
                            for mod in m_launch.action.mod_spec:
                                apply_mod(mod, new_wf.id_fw[root_id].spec)

        for new_fw in new_wf.fws:
            updated_ids = self.refresh(new_fw.fw_id, set(updated_ids))

        return updated_ids
Ejemplo n.º 13
0
    def run(self, pdb_on_exception=False):
        """
        Run the rocket (check out a job from the database and execute it)

        Args:
            pdb_on_exception (bool): whether to invoke the debugger on
                a caught exception.  Default False.
        """
        all_stored_data = {}  # combined stored data for *all* the Tasks
        all_update_spec = {}  # combined update_spec for *all* the Tasks
        all_mod_spec = []  # combined mod_spec for *all* the Tasks

        lp = self.launchpad
        launch_dir = os.path.abspath(os.getcwd())
        logdir = lp.get_logdir() if lp else None
        l_logger = get_fw_logger('rocket.launcher', l_dir=logdir,
                                 stream_level=ROCKET_STREAM_LOGLEVEL)

        # check a FW job out of the launchpad
        if lp:
            m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id)
        else:  # offline mode
            m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json"))

            # set the run start time
            fpath = zpath("FW_offline.json")
            with zopen(fpath) as f_in:
                d = json.loads(f_in.read())
                d['started_on'] = datetime.utcnow().isoformat()
                with zopen(fpath, "wt") as f_out:
                    f_out.write(json.dumps(d, ensure_ascii=False))

            launch_id = None  # we don't need this in offline mode...

        if not m_fw:
            print("No FireWorks are ready to run and match query! {}".format(self.fworker.query))
            return False

        final_state = None
        ping_stop = None
        btask_stops = []

        try:
            if '_launch_dir' in m_fw.spec and lp:
                prev_dir = launch_dir
                launch_dir = os.path.expandvars(m_fw.spec['_launch_dir'])
                if not os.path.abspath(launch_dir):
                    launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir))
                # thread-safe "mkdir -p"
                try:
                    os.makedirs(launch_dir)
                except OSError as exception:
                    if exception.errno != errno.EEXIST:
                        raise
                os.chdir(launch_dir)

                if not os.path.samefile(launch_dir, prev_dir):
                    lp.change_launch_dir(launch_id, launch_dir)

                if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS:
                    try:
                        os.rmdir(prev_dir)
                    except Exception:
                        pass

            recovery = m_fw.spec.get('_recovery', None)
            if recovery:
                recovery_dir = recovery.get('_prev_dir')
                recovery_mode = recovery.get('_mode')
                starting_task = recovery.get('_task_n')
                all_stored_data.update(recovery.get('_all_stored_data'))
                all_update_spec.update(recovery.get('_all_update_spec'))
                all_mod_spec.extend(recovery.get('_all_mod_spec'))
                if lp:
                    l_logger.log(
                        logging.INFO,
                        'Recovering from task number {} in folder {}.'.format(starting_task,
                                                                              recovery_dir))
                if recovery_mode == 'cp' and launch_dir != recovery_dir:
                    if lp:
                        l_logger.log(
                            logging.INFO,
                            'Copying data from recovery folder {} to folder {}.'.format(recovery_dir,
                                                                                        launch_dir))
                    distutils.dir_util.copy_tree(recovery_dir, launch_dir, update=1)

            else:
                starting_task = 0
                files_in = m_fw.spec.get("_files_in", {})
                prev_files = m_fw.spec.get("_files_prev", {})
                for f in set(files_in.keys()).intersection(prev_files.keys()):
                    # We use zopen for the file objects for transparent handling
                    # of zipped files. shutil.copyfileobj does the actual copy
                    # in chunks that avoid memory issues.
                    with zopen(prev_files[f], "rb") as fin, zopen(files_in[f], "wb") as fout:
                        shutil.copyfileobj(fin, fout)

            if lp:
                message = 'RUNNING fw_id: {} in directory: {}'. \
                    format(m_fw.fw_id, os.getcwd())
                l_logger.log(logging.INFO, message)

            # write FW.json and/or FW.yaml to the directory
            if PRINT_FW_JSON:
                m_fw.to_file('FW.json', indent=4)
            if PRINT_FW_YAML:
                m_fw.to_file('FW.yaml')

            my_spec = dict(m_fw.spec)  # make a copy of spec, don't override original
            my_spec["_fw_env"] = self.fworker.env

            # set up heartbeat (pinging the server that we're still alive)
            ping_stop = start_ping_launch(lp, launch_id)

            # start background tasks
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    btask_stops.append(start_background_task(bt, m_fw.spec))

            # execute the Firetasks!
            for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task):
                checkpoint = {'_task_n': t_counter,
                              '_all_stored_data': all_stored_data,
                              '_all_update_spec': all_update_spec,
                              '_all_mod_spec': all_mod_spec}
                Rocket.update_checkpoint(lp, launch_dir, launch_id, checkpoint)

                if lp:
                    l_logger.log(logging.INFO, "Task started: %s." % t.fw_name)

                if my_spec.get("_add_launchpad_and_fw_id"):
                    t.fw_id = m_fw.fw_id
                    if FWData().MULTIPROCESSING:
                        # hack because AutoProxy manager can't access attributes
                        t.launchpad = LaunchPad.from_dict(self.launchpad.to_dict())
                    else:
                        t.launchpad = self.launchpad

                if my_spec.get("_add_fworker"):
                    t.fworker = self.fworker

                try:
                    m_action = t.run_task(my_spec)
                except BaseException as e:
                    traceback.print_exc()
                    tb = traceback.format_exc()
                    stop_backgrounds(ping_stop, btask_stops)
                    do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
                    # If the exception is serializable, save its details
                    if pdb_on_exception:
                        pdb.post_mortem()
                    try:
                        exception_details = e.to_dict()
                    except AttributeError:
                        exception_details = None
                    except BaseException as e:
                        if lp:
                            l_logger.log(logging.WARNING,
                                         "Exception couldn't be serialized: %s " % e)
                        exception_details = None

                    try:
                        m_task = t.to_dict()
                    except Exception:
                        m_task = None

                    m_action = FWAction(stored_data={'_message': 'runtime error during task',
                                                     '_task': m_task,
                                                     '_exception': {'_stacktrace': tb,
                                                                    '_details': exception_details}},
                                        exit=True)
                    m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)

                    if lp:
                        final_state = 'FIZZLED'
                        lp.complete_launch(launch_id, m_action, final_state)
                    else:
                        fpath = zpath("FW_offline.json")
                        with zopen(fpath) as f_in:
                            d = json.loads(f_in.read())
                            d['fwaction'] = m_action.to_dict()
                            d['state'] = 'FIZZLED'
                            d['completed_on'] = datetime.utcnow().isoformat()
                            with zopen(fpath, "wt") as f_out:
                                f_out.write(json.dumps(d, ensure_ascii=False))

                    return True

                # read in a FWAction from a file, in case the task is not Python and cannot return
                # it explicitly
                if os.path.exists('FWAction.json'):
                    m_action = FWAction.from_file('FWAction.json')
                elif os.path.exists('FWAction.yaml'):
                    m_action = FWAction.from_file('FWAction.yaml')

                if not m_action:
                    m_action = FWAction()

                # update the global stored data with the data to store and update from this
                # particular Task
                all_stored_data.update(m_action.stored_data)
                all_update_spec.update(m_action.update_spec)
                all_mod_spec.extend(m_action.mod_spec)

                # update spec for next task as well
                my_spec.update(m_action.update_spec)
                for mod in m_action.mod_spec:
                    apply_mod(mod, my_spec)
                if lp:
                    l_logger.log(logging.INFO, "Task completed: %s " % t.fw_name)
                if m_action.skip_remaining_tasks:
                    break

            # add job packing info if this is needed
            if FWData().MULTIPROCESSING and STORE_PACKING_INFO:
                all_stored_data['multiprocess_name'] = multiprocessing.current_process().name

            # perform finishing operation
            stop_backgrounds(ping_stop, btask_stops)
            for b in btask_stops:
                b.set()
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # last background monitors
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    if bt.run_on_finish:
                        for task in bt.tasks:
                            task.run_task(m_fw.spec)

            m_action.stored_data = all_stored_data
            m_action.mod_spec = all_mod_spec
            m_action.update_spec = all_update_spec

            m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)

            if lp:
                final_state = 'COMPLETED'
                lp.complete_launch(launch_id, m_action, final_state)
            else:

                fpath = zpath("FW_offline.json")
                with zopen(fpath) as f_in:
                    d = json.loads(f_in.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'COMPLETED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    with zopen(fpath, "wt") as f_out:
                        f_out.write(json.dumps(d, ensure_ascii=False))

            return True

        except LockedWorkflowError as e:
            l_logger.log(logging.DEBUG, traceback.format_exc())
            l_logger.log(logging.WARNING,
                         "Firework {} reached final state {} but couldn't complete the update of "
                         "the database. Reason: {}\nRefresh the WF to recover the result "
                         "(lpad admin refresh -i {}).".format(
                             self.fw_id, final_state, e, self.fw_id))
            return True

        except Exception:
            # problems while processing the results. high probability of malformed data.
            traceback.print_exc()
            stop_backgrounds(ping_stop, btask_stops)
            # restore initial state to prevent the raise of further exceptions
            if lp:
                lp.restore_backup_data(launch_id, m_fw.fw_id)

            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # the action produced by the task is discarded
            m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None,
                                             '_exception': {'_stacktrace': traceback.format_exc(),
                                                            '_details': None}},
                                exit=True)

            try:
                m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)
            except Exception:
                traceback.print_exc()

            if lp:
                try:
                    lp.complete_launch(launch_id, m_action, 'FIZZLED')
                except LockedWorkflowError as e:
                    l_logger.log(logging.DEBUG, traceback.format_exc())
                    l_logger.log(logging.WARNING,
                                 "Firework {} fizzled but couldn't complete the update of the database."
                                 " Reason: {}\nRefresh the WF to recover the result "
                                 "(lpad admin refresh -i {}).".format(
                                     self.fw_id, final_state, e, self.fw_id))
                    return True
            else:
                fpath = zpath("FW_offline.json")
                with zopen(fpath) as f_in:
                    d = json.loads(f_in.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'FIZZLED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    with zopen(fpath, "wt") as f_out:
                        f_out.write(json.dumps(d, ensure_ascii=False))

            return True
Ejemplo n.º 14
0
    def run(self):
        """
        Run the rocket (check out a job from the database and execute it)
        """
        all_stored_data = {}  # combined stored data for *all* the Tasks
        all_update_spec = {}  # combined update_spec for *all* the Tasks
        all_mod_spec = []  # combined mod_spec for *all* the Tasks

        lp = self.launchpad
        launch_dir = os.path.abspath(os.getcwd())

        # check a FW job out of the launchpad
        if lp:
            m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id)
        else:  # offline mode
            m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json"))

            # set the run start time
            with open('FW_offline.json', 'r+') as f:
                d = json.loads(f.read())
                d['started_on'] = datetime.utcnow().isoformat()
                f.seek(0)
                f.write(json.dumps(d))
                f.truncate()

            launch_id = None  # we don't need this in offline mode...

        if not m_fw:
            print("No FireWorks are ready to run and match query! {}".format(self.fworker.query))
            return False

        if '_launch_dir' in m_fw.spec:
            prev_dir = launch_dir
            os.chdir(m_fw.spec['_launch_dir'])
            launch_dir = os.path.abspath(os.getcwd())

            if lp:
                lp.change_launch_dir(launch_id, launch_dir)

            if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS:
                try:
                    os.rmdir(prev_dir)
                except:
                    pass

        if lp:
            message = 'RUNNING fw_id: {} in directory: {}'.\
                format(m_fw.fw_id, os.getcwd())
            lp.log_message(logging.INFO, message)

        # write FW.json and/or FW.yaml to the directory
        if PRINT_FW_JSON:
            m_fw.to_file('FW.json', indent=4)
        if PRINT_FW_YAML:
            m_fw.to_file('FW.yaml')

        try:
            my_spec = dict(m_fw.spec)  # make a copy of spec, don't override original
            my_spec["_fw_env"] = self.fworker.env

            # set up heartbeat (pinging the server that we're still alive)
            ping_stop = start_ping_launch(lp, launch_id)

            # start background tasks
            btask_stops = []
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    btask_stops.append(start_background_task(bt, m_fw.spec))

            # execute the FireTasks!
            for t in m_fw.tasks:
                lp.log_message(logging.INFO, "Task started: %s." % t.fw_name)
                m_action = t.run_task(my_spec)

                # read in a FWAction from a file, in case the task is not Python and cannot return it explicitly
                if os.path.exists('FWAction.json'):
                    m_action = FWAction.from_file('FWAction.json')
                elif os.path.exists('FWAction.yaml'):
                    m_action = FWAction.from_file('FWAction.yaml')

                if not m_action:
                    m_action = FWAction()

                # update the global stored data with the data to store and update from this particular Task
                all_stored_data.update(m_action.stored_data)
                all_update_spec.update(m_action.update_spec)
                all_mod_spec.extend(m_action.mod_spec)

                # update spec for next task as well
                my_spec.update(m_action.update_spec)
                for mod in m_action.mod_spec:
                    apply_mod(mod, my_spec)
                lp.log_message(logging.INFO, "Task completed: %s " % t.fw_name)
                if m_action.skip_remaining_tasks:
                    break

            # add job packing info if this is needed
            if FWData().MULTIPROCESSING and STORE_PACKING_INFO:
                all_stored_data['multiprocess_name'] = multiprocessing.current_process().name

            # perform finishing operation
            stop_backgrounds(ping_stop, btask_stops)
            for b in btask_stops:
                b.set()
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # last background monitors
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    if bt.run_on_finish:
                        for task in bt.tasks:
                            task.run_task(m_fw.spec)

            m_action.stored_data = all_stored_data
            m_action.mod_spec = all_mod_spec
            m_action.update_spec = all_update_spec

            if lp:
                lp.complete_launch(launch_id, m_action, 'COMPLETED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'COMPLETED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True

        except:
            stop_backgrounds(ping_stop, btask_stops)
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            traceback.print_exc()
            try:
                m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': t.to_dict(),
                                             '_exception': traceback.format_exc()}, exit=True)
            except:
                m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None,
                                             '_exception': traceback.format_exc()}, exit=True)
            if lp:
                lp.complete_launch(launch_id, m_action, 'FIZZLED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'FIZZLED'
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True
Ejemplo n.º 15
0
    def run(self):
        """
        Run the rocket (check out a job from the database and execute it)
        """
        all_stored_data = {}  # combined stored data for *all* the Tasks
        all_update_spec = {}  # combined update_spec for *all* the Tasks
        all_mod_spec = []  # combined mod_spec for *all* the Tasks

        lp = self.launchpad
        launch_dir = os.path.abspath(os.getcwd())

        # check a FW job out of the launchpad
        if lp:
            m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir,
                                             self.fw_id)
        else:  # offline mode
            m_fw = FireWork.from_file(os.path.join(os.getcwd(), "FW.json"))

            # set the run start time
            with open('FW_offline.json', 'r+') as f:
                d = json.loads(f.read())
                d['started_on'] = datetime.utcnow().isoformat()
                f.seek(0)
                f.write(json.dumps(d))
                f.truncate()

            launch_id = None  # we don't need this in offline mode...

        if not m_fw:
            print("No FireWorks are ready to run and match query! {}".format(
                self.fworker.query))
            return False

        if '_launch_dir' in m_fw.spec:
            prev_dir = launch_dir
            os.chdir(m_fw.spec['_launch_dir'])
            launch_dir = os.path.abspath(os.getcwd())

            if lp:
                lp._change_launch_dir(launch_id, launch_dir)

            if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS:
                try:
                    os.rmdir(prev_dir)
                except:
                    pass

        # write FW.json and/or FW.yaml to the directory
        if PRINT_FW_JSON:
            m_fw.to_file('FW.json', indent=4)
        if PRINT_FW_YAML:
            m_fw.to_file('FW.yaml')

        try:
            my_spec = dict(
                m_fw.spec)  # make a copy of spec, don't override original

            # set up heartbeat (pinging the server that we're still alive)
            ping_stop = start_ping_launch(lp, launch_id)

            # start background tasks
            btask_stops = []
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    btask_stops.append(start_background_task(bt, m_fw.spec))

            # execute the FireTasks!
            for my_task in m_fw.tasks:
                m_action = my_task.run_task(my_spec)

                # read in a FWAction from a file, in case the task is not Python and cannot return it explicitly
                if os.path.exists('FWAction.json'):
                    m_action = FWAction.from_file('FWAction.json')
                elif os.path.exists('FWAction.yaml'):
                    m_action = FWAction.from_file('FWAction.yaml')

                if not m_action:
                    m_action = FWAction()

                # update the global stored data with the data to store and update from this particular Task
                all_stored_data.update(m_action.stored_data)
                all_update_spec.update(m_action.update_spec)
                all_mod_spec.extend(m_action.mod_spec)

                # update spec for next task as well
                my_spec.update(m_action.update_spec)
                for mod in m_action.mod_spec:
                    apply_mod(mod, my_spec)

                if m_action.skip_remaining_tasks:
                    break

            # add job packing info if this is needed
            if FWData().MULTIPROCESSING and STORE_PACKING_INFO:
                all_stored_data[
                    'multiprocess_name'] = multiprocessing.current_process(
                    ).name

            # perform finishing operation
            stop_backgrounds(ping_stop, btask_stops)
            for b in btask_stops:
                b.set()
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # last background monitors
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    if bt.run_on_finish:
                        for task in bt.tasks:
                            task.run_task(m_fw.spec)

            m_action.stored_data = all_stored_data
            m_action.mod_spec = all_mod_spec
            m_action.update_spec = all_update_spec

            if lp:
                lp.complete_launch(launch_id, m_action, 'COMPLETED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'COMPLETED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True

        except:
            stop_backgrounds(ping_stop, btask_stops)
            traceback.print_exc()
            try:
                m_action = FWAction(stored_data={
                    '_message': 'runtime error during task',
                    '_task': my_task.to_dict(),
                    '_exception': traceback.format_exc()
                },
                                    exit=True)
            except:
                m_action = FWAction(stored_data={
                    '_message': 'runtime error during task',
                    '_task': None,
                    '_exception': traceback.format_exc()
                },
                                    exit=True)
            if lp:
                lp.complete_launch(launch_id, m_action, 'FIZZLED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'FIZZLED'
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True
Ejemplo n.º 16
0
    def run(self, pdb_on_exception=False):
        """
        Run the rocket (check out a job from the database and execute it)

        Args:
            pdb_on_exception (bool): whether to invoke the debugger on
                a caught exception.  Default False.
        """
        all_stored_data = {}  # combined stored data for *all* the Tasks
        all_update_spec = {}  # combined update_spec for *all* the Tasks
        all_mod_spec = []  # combined mod_spec for *all* the Tasks

        lp = self.launchpad
        launch_dir = os.path.abspath(os.getcwd())
        logdir = lp.get_logdir() if lp else None
        l_logger = get_fw_logger('rocket.launcher', l_dir=logdir,
                                 stream_level=ROCKET_STREAM_LOGLEVEL)

        # check a FW job out of the launchpad
        if lp:
            m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id)
        else:  # offline mode
            m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json"))

            # set the run start time
            fpath = zpath("FW_offline.json")
            with zopen(fpath) as f_in:
                d = json.loads(f_in.read())
                d['started_on'] = datetime.utcnow().isoformat()
                with zopen(fpath, "wt") as f_out:
                    f_out.write(json.dumps(d, ensure_ascii=False))

            launch_id = None  # we don't need this in offline mode...

        if not m_fw:
            print("No FireWorks are ready to run and match query! {}".format(self.fworker.query))
            return False

        final_state = None
        ping_stop = None
        btask_stops = []

        try:
            if '_launch_dir' in m_fw.spec and lp:
                prev_dir = launch_dir
                launch_dir = os.path.expandvars(m_fw.spec['_launch_dir'])
                if not os.path.abspath(launch_dir):
                    launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir))
                # thread-safe "mkdir -p"
                try:
                    os.makedirs(launch_dir)
                except OSError as exception:
                    if exception.errno != errno.EEXIST:
                        raise
                os.chdir(launch_dir)

                if not os.path.samefile(launch_dir, prev_dir):
                    lp.change_launch_dir(launch_id, launch_dir)

                if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS:
                    try:
                        os.rmdir(prev_dir)
                    except:
                        pass

            recovery = m_fw.spec.get('_recovery', None)
            if recovery:
                recovery_dir = recovery.get('_prev_dir')
                recovery_mode = recovery.get('_mode')
                starting_task = recovery.get('_task_n')
                all_stored_data.update(recovery.get('_all_stored_data'))
                all_update_spec.update(recovery.get('_all_update_spec'))
                all_mod_spec.extend(recovery.get('_all_mod_spec'))
                if lp:
                    l_logger.log(
                                logging.INFO,
                                'Recovering from task number {} in folder {}.'.format(starting_task,
                                                                                      recovery_dir))
                if recovery_mode == 'cp' and launch_dir != recovery_dir:
                    if lp:
                        l_logger.log(
                                    logging.INFO,
                                    'Copying data from recovery folder {} to folder {}.'.format(recovery_dir,
                                                                                                launch_dir))
                    distutils.dir_util.copy_tree(recovery_dir, launch_dir, update=1)

            else:
                starting_task = 0
                files_in = m_fw.spec.get("_files_in", {})
                prev_files = m_fw.spec.get("_files_prev", {})
                for f in set(files_in.keys()).intersection(prev_files.keys()):
                    # We use zopen for the file objects for transparent handling
                    # of zipped files. shutil.copyfileobj does the actual copy
                    # in chunks that avoid memory issues.
                    with zopen(prev_files[f], "rb") as fin, zopen(files_in[f], "wb") as fout:
                        shutil.copyfileobj(fin, fout)

            if lp:
                message = 'RUNNING fw_id: {} in directory: {}'.\
                    format(m_fw.fw_id, os.getcwd())
                l_logger.log(logging.INFO, message)

            # write FW.json and/or FW.yaml to the directory
            if PRINT_FW_JSON:
                m_fw.to_file('FW.json', indent=4)
            if PRINT_FW_YAML:
                m_fw.to_file('FW.yaml')

            my_spec = dict(m_fw.spec)  # make a copy of spec, don't override original
            my_spec["_fw_env"] = self.fworker.env

            # set up heartbeat (pinging the server that we're still alive)
            ping_stop = start_ping_launch(lp, launch_id)

            # start background tasks
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    btask_stops.append(start_background_task(bt, m_fw.spec))

            # execute the Firetasks!
            for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task):
                checkpoint = {'_task_n': t_counter,
                              '_all_stored_data': all_stored_data,
                              '_all_update_spec': all_update_spec,
                              '_all_mod_spec': all_mod_spec}
                Rocket.update_checkpoint(lp, launch_dir, launch_id, checkpoint)
 
                if lp:
                   l_logger.log(logging.INFO, "Task started: %s." % t.fw_name)

                if my_spec.get("_add_launchpad_and_fw_id"):
                    t.fw_id = m_fw.fw_id
                    if FWData().MULTIPROCESSING:
                        # hack because AutoProxy manager can't access attributes
                        t.launchpad = LaunchPad.from_dict(self.launchpad.to_dict())
                    else:
                        t.launchpad = self.launchpad

                if my_spec.get("_add_fworker"):
                    t.fworker = self.fworker

                try:
                    m_action = t.run_task(my_spec)
                except BaseException as e:
                    traceback.print_exc()
                    tb = traceback.format_exc()
                    stop_backgrounds(ping_stop, btask_stops)
                    do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
                    # If the exception is serializable, save its details
                    if pdb_on_exception:
                        pdb.post_mortem()
                    try:
                        exception_details = e.to_dict()
                    except AttributeError:
                        exception_details = None
                    except BaseException as e:
                        if lp:
                            l_logger.log(logging.WARNING,
                                        "Exception couldn't be serialized: %s " % e)
                        exception_details = None

                    try:
                        m_task = t.to_dict()
                    except:
                        m_task = None

                    m_action = FWAction(stored_data={'_message': 'runtime error during task',
                                                     '_task': m_task,
                                                     '_exception': {'_stacktrace': tb,
                                                                    '_details': exception_details}},
                                        exit=True)
                    m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)

                    if lp:
                        final_state = 'FIZZLED'
                        lp.complete_launch(launch_id, m_action, final_state)
                    else:
                        fpath = zpath("FW_offline.json")
                        with zopen(fpath) as f_in:
                            d = json.loads(f_in.read())
                            d['fwaction'] = m_action.to_dict()
                            d['state'] = 'FIZZLED'
                            d['completed_on'] = datetime.utcnow().isoformat()
                            with zopen(fpath, "wt") as f_out:
                                f_out.write(json.dumps(d, ensure_ascii=False))

                    return True

                # read in a FWAction from a file, in case the task is not Python and cannot return
                # it explicitly
                if os.path.exists('FWAction.json'):
                    m_action = FWAction.from_file('FWAction.json')
                elif os.path.exists('FWAction.yaml'):
                    m_action = FWAction.from_file('FWAction.yaml')

                if not m_action:
                    m_action = FWAction()

                # update the global stored data with the data to store and update from this
                # particular Task
                all_stored_data.update(m_action.stored_data)
                all_update_spec.update(m_action.update_spec)
                all_mod_spec.extend(m_action.mod_spec)

                # update spec for next task as well
                my_spec.update(m_action.update_spec)
                for mod in m_action.mod_spec:
                    apply_mod(mod, my_spec)
                if lp:
                    l_logger.log(logging.INFO, "Task completed: %s " % t.fw_name)
                if m_action.skip_remaining_tasks:
                    break

            # add job packing info if this is needed
            if FWData().MULTIPROCESSING and STORE_PACKING_INFO:
                all_stored_data['multiprocess_name'] = multiprocessing.current_process().name

            # perform finishing operation
            stop_backgrounds(ping_stop, btask_stops)
            for b in btask_stops:
                b.set()
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # last background monitors
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    if bt.run_on_finish:
                        for task in bt.tasks:
                            task.run_task(m_fw.spec)

            m_action.stored_data = all_stored_data
            m_action.mod_spec = all_mod_spec
            m_action.update_spec = all_update_spec

            m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)

            if lp:
                final_state = 'COMPLETED'
                lp.complete_launch(launch_id, m_action, final_state)
            else:

                fpath = zpath("FW_offline.json")
                with zopen(fpath) as f_in:
                    d = json.loads(f_in.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'COMPLETED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    with zopen(fpath, "wt") as f_out:
                        f_out.write(json.dumps(d, ensure_ascii=False))

            return True

        except LockedWorkflowError as e:
            l_logger.log(logging.DEBUG, traceback.format_exc())
            l_logger.log(logging.WARNING,
                           "Firework {} reached final state {} but couldn't complete the update of "
                           "the database. Reason: {}\nRefresh the WF to recover the result "
                           "(lpad admin refresh -i {}).".format(
                               self.fw_id, final_state, e, self.fw_id))
            return True

        except:
            # problems while processing the results. high probability of malformed data.
            traceback.print_exc()
            stop_backgrounds(ping_stop, btask_stops)
            # restore initial state to prevent the raise of further exceptions
            if lp:
                lp.restore_backup_data(launch_id, m_fw.fw_id)

            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # the action produced by the task is discarded
            m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None,
                                             '_exception': {'_stacktrace': traceback.format_exc(),
                                                            '_details': None}},
                                exit=True)

            try:
                m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)
            except:
                traceback.print_exc()

            if lp:
                try:
                    lp.complete_launch(launch_id, m_action, 'FIZZLED')
                except LockedWorkflowError as e:
                    l_logger.log(logging.DEBUG, traceback.format_exc())
                    l_logger.log(logging.WARNING,
                                   "Firework {} fizzled but couldn't complete the update of the database."
                                   " Reason: {}\nRefresh the WF to recover the result "
                                   "(lpad admin refresh -i {}).".format(
                                       self.fw_id, final_state, e, self.fw_id))
                    return True
            else:
                fpath = zpath("FW_offline.json")
                with zopen(fpath) as f_in:
                    d = json.loads(f_in.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'FIZZLED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    with zopen(fpath, "wt") as f_out:
                        f_out.write(json.dumps(d, ensure_ascii=False))

            return True
Ejemplo n.º 17
0
    def run(self):
        """
        Run the rocket (check out a job from the database and execute it)
        """
        all_stored_data = {}  # combined stored data for *all* the Tasks
        all_update_spec = {}  # combined update_spec for *all* the Tasks
        all_mod_spec = []  # combined mod_spec for *all* the Tasks

        lp = self.launchpad
        launch_dir = os.path.abspath(os.getcwd())

        # check a FW job out of the launchpad
        if lp:
            m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id)
        else:  # offline mode
            m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json"))

            # set the run start time
            with open('FW_offline.json', 'r+') as f:
                d = json.loads(f.read())
                d['started_on'] = datetime.utcnow().isoformat()
                f.seek(0)
                f.write(json.dumps(d))
                f.truncate()

            launch_id = None  # we don't need this in offline mode...

        if not m_fw:
            print("No FireWorks are ready to run and match query! {}".format(self.fworker.query))
            return False

        if lp:
            message = 'RUNNING fw_id: {} in directory: {}'.\
                format(m_fw.fw_id, os.getcwd())
            lp.log_message(logging.INFO, message)

        # write FW.json and/or FW.yaml to the directory
        if PRINT_FW_JSON:
            m_fw.to_file('FW.json', indent=4)
        if PRINT_FW_YAML:
            m_fw.to_file('FW.yaml')

        try:
            if '_launch_dir' in m_fw.spec:
                prev_dir = launch_dir
                launch_dir = os.path.expandvars(m_fw.spec['_launch_dir'])
                # thread-safe "mkdir -p"
                try:
                    os.makedirs(launch_dir)
                except OSError as exception:
                    if exception.errno != errno.EEXIST:
                        raise
                os.chdir(launch_dir)
                launch_dir = os.path.abspath(os.getcwd())

                if lp:
                    lp.change_launch_dir(launch_id, launch_dir)

                if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS:
                    try:
                        os.rmdir(prev_dir)
                    except:
                        pass

            if m_fw.spec.get('_recover_launch', None):
                launch_to_recover = lp.get_launch_by_id(m_fw.spec['_recover_launch']['_launch_id'])
                starting_task = launch_to_recover.action.stored_data.get('_exception', {}).get('_failed_task_n', 0)
                recover_launch_dir = launch_to_recover.launch_dir
                if lp:
                    lp.log_message(
                        logging.INFO,
                        'Recovering from task number {} in folder {}.'.format(starting_task, recover_launch_dir))
                if m_fw.spec['_recover_launch']['_recover_mode'] == 'cp' and launch_dir != recover_launch_dir:
                    if lp:
                        lp.log_message(
                            logging.INFO,
                            'Copying data from recovery folder {} to folder {}.'.format(recover_launch_dir, launch_dir))
                    distutils.dir_util.copy_tree(recover_launch_dir, launch_dir, update=1)

            else:
                starting_task = 0

            my_spec = dict(m_fw.spec)  # make a copy of spec, don't override original
            my_spec["_fw_env"] = self.fworker.env

            # set up heartbeat (pinging the server that we're still alive)
            ping_stop = start_ping_launch(lp, launch_id)

            # start background tasks
            btask_stops = []
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    btask_stops.append(start_background_task(bt, m_fw.spec))

            # execute the FireTasks!
            for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task):
                if lp:
                    lp.log_message(logging.INFO, "Task started: %s." % t.fw_name)
                try:
                    m_action = t.run_task(my_spec)
                except BaseException as e:
                    traceback.print_exc()
                    tb = traceback.format_exc()
                    stop_backgrounds(ping_stop, btask_stops)
                    do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
                    # If the exception is serializable, save its details
                    try:
                        exception_details = e.to_dict()
                    except AttributeError:
                        exception_details = None
                    except BaseException as e:
                        if lp:
                            lp.log_message(logging.WARNING, "Exception couldn't be serialized: %s " % e)
                        exception_details = None

                    try:
                        m_task = t.to_dict()
                    except:
                        m_task = None

                    m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': m_task,
                                                     '_exception': {'_stacktrace': tb, '_details': exception_details,
                                                                    '_failed_task_n': t_counter}}, exit=True)
                    if lp:
                        lp.complete_launch(launch_id, m_action, 'FIZZLED')
                    else:
                        with open('FW_offline.json', 'r+') as f:
                            d = json.loads(f.read())
                            d['fwaction'] = m_action.to_dict()
                            d['state'] = 'FIZZLED'
                            f.seek(0)
                            f.write(json.dumps(d))
                            f.truncate()

                    return True


                # read in a FWAction from a file, in case the task is not Python and cannot return it explicitly
                if os.path.exists('FWAction.json'):
                    m_action = FWAction.from_file('FWAction.json')
                elif os.path.exists('FWAction.yaml'):
                    m_action = FWAction.from_file('FWAction.yaml')

                if not m_action:
                    m_action = FWAction()

                # update the global stored data with the data to store and update from this particular Task
                all_stored_data.update(m_action.stored_data)
                all_update_spec.update(m_action.update_spec)
                all_mod_spec.extend(m_action.mod_spec)

                # update spec for next task as well
                my_spec.update(m_action.update_spec)
                for mod in m_action.mod_spec:
                    apply_mod(mod, my_spec)
                if lp:
                    lp.log_message(logging.INFO, "Task completed: %s " % t.fw_name)
                if m_action.skip_remaining_tasks:
                    break

            # add job packing info if this is needed
            if FWData().MULTIPROCESSING and STORE_PACKING_INFO:
                all_stored_data['multiprocess_name'] = multiprocessing.current_process().name

            # perform finishing operation
            stop_backgrounds(ping_stop, btask_stops)
            for b in btask_stops:
                b.set()
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # last background monitors
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    if bt.run_on_finish:
                        for task in bt.tasks:
                            task.run_task(m_fw.spec)

            m_action.stored_data = all_stored_data
            m_action.mod_spec = all_mod_spec
            m_action.update_spec = all_update_spec

            if lp:
                lp.complete_launch(launch_id, m_action, 'COMPLETED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'COMPLETED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True

        except:
            # problems while processing the results. high probability of malformed data.
            traceback.print_exc()
            stop_backgrounds(ping_stop, btask_stops)
            # restore initial state to prevent the raise of further exceptions
            if lp:
                lp.restore_backup_data(launch_id, m_fw.fw_id)

            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # the action produced by the task is discarded
            m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None,
                                             '_exception': {'_stacktrace': traceback.format_exc(),
                                             '_details': None}}, exit=True)
            if lp:
                lp.complete_launch(launch_id, m_action, 'FIZZLED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'FIZZLED'
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True
Ejemplo n.º 18
0
    def run(self):
        """
        Run the rocket (check out a job from the database and execute it)
        """
        all_stored_data = {}  # combined stored data for *all* the Tasks
        all_update_spec = {}  # combined update_spec for *all* the Tasks
        all_mod_spec = []  # combined mod_spec for *all* the Tasks

        lp = self.launchpad
        launch_dir = os.path.abspath(os.getcwd())

        # check a FW job out of the launchpad
        if lp:
            m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id)
        else:  # offline mode
            m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json"))

            # set the run start time
            with open('FW_offline.json', 'r+') as f:
                d = json.loads(f.read())
                d['started_on'] = datetime.utcnow().isoformat()
                f.seek(0)
                f.write(json.dumps(d))
                f.truncate()

            launch_id = None  # we don't need this in offline mode...

        if not m_fw:
            print("No FireWorks are ready to run and match query! {}".format(self.fworker.query))
            return False

        try:
            if '_launch_dir' in m_fw.spec and lp:
                prev_dir = launch_dir
                launch_dir = os.path.expandvars(m_fw.spec['_launch_dir'])
                if not os.path.abspath(launch_dir):
                    launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir))
                # thread-safe "mkdir -p"
                try:
                    os.makedirs(launch_dir)
                except OSError as exception:
                    if exception.errno != errno.EEXIST:
                        raise
                os.chdir(launch_dir)

                if not os.path.samefile(launch_dir, prev_dir):
                    lp.change_launch_dir(launch_id, launch_dir)

                if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS:
                    try:
                        os.rmdir(prev_dir)
                    except:
                        pass

            if m_fw.spec.get('_recover_launch', None):
                launch_to_recover = lp.get_launch_by_id(m_fw.spec['_recover_launch']['_launch_id'])
                starting_task = launch_to_recover.action.stored_data.get('_exception', {}).get('_failed_task_n', 0)
                recover_launch_dir = launch_to_recover.launch_dir
                if lp:
                    lp.log_message(
                        logging.INFO,
                        'Recovering from task number {} in folder {}.'.format(starting_task, recover_launch_dir))
                if m_fw.spec['_recover_launch']['_recover_mode'] == 'cp' and launch_dir != recover_launch_dir:
                    if lp:
                        lp.log_message(
                            logging.INFO,
                            'Copying data from recovery folder {} to folder {}.'.format(recover_launch_dir, launch_dir))
                    distutils.dir_util.copy_tree(recover_launch_dir, launch_dir, update=1)

            else:
                starting_task = 0

            if lp:
                message = 'RUNNING fw_id: {} in directory: {}'.\
                    format(m_fw.fw_id, os.getcwd())
                lp.log_message(logging.INFO, message)

            # write FW.json and/or FW.yaml to the directory
            if PRINT_FW_JSON:
                m_fw.to_file('FW.json', indent=4)
            if PRINT_FW_YAML:
                m_fw.to_file('FW.yaml')

            my_spec = dict(m_fw.spec)  # make a copy of spec, don't override original
            my_spec["_fw_env"] = self.fworker.env

            # set up heartbeat (pinging the server that we're still alive)
            ping_stop = start_ping_launch(lp, launch_id)

            # start background tasks
            btask_stops = []
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    btask_stops.append(start_background_task(bt, m_fw.spec))

            # execute the FireTasks!
            for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task):
                if lp:
                    lp.log_message(logging.INFO, "Task started: %s." % t.fw_name)

                if my_spec.get("_add_launchpad_and_fw_id"):
                    t.launchpad = self.launchpad
                    t.fw_id = m_fw.fw_id

                try:
                    m_action = t.run_task(my_spec)
                except BaseException as e:
                    traceback.print_exc()
                    tb = traceback.format_exc()
                    stop_backgrounds(ping_stop, btask_stops)
                    do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
                    # If the exception is serializable, save its details
                    try:
                        exception_details = e.to_dict()
                    except AttributeError:
                        exception_details = None
                    except BaseException as e:
                        if lp:
                            lp.log_message(logging.WARNING, "Exception couldn't be serialized: %s " % e)
                        exception_details = None

                    try:
                        m_task = t.to_dict()
                    except:
                        m_task = None

                    m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': m_task,
                                                     '_exception': {'_stacktrace': tb, '_details': exception_details,
                                                                    '_failed_task_n': t_counter}}, exit=True)
                    m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)

                    if lp:
                        lp.complete_launch(launch_id, m_action, 'FIZZLED')
                    else:
                        with open('FW_offline.json', 'r+') as f:
                            d = json.loads(f.read())
                            d['fwaction'] = m_action.to_dict()
                            d['state'] = 'FIZZLED'
                            f.seek(0)
                            f.write(json.dumps(d))
                            f.truncate()

                    return True

                # read in a FWAction from a file, in case the task is not Python and cannot return it explicitly
                if os.path.exists('FWAction.json'):
                    m_action = FWAction.from_file('FWAction.json')
                elif os.path.exists('FWAction.yaml'):
                    m_action = FWAction.from_file('FWAction.yaml')

                if not m_action:
                    m_action = FWAction()

                # update the global stored data with the data to store and update from this particular Task
                all_stored_data.update(m_action.stored_data)
                all_update_spec.update(m_action.update_spec)
                all_mod_spec.extend(m_action.mod_spec)

                # update spec for next task as well
                my_spec.update(m_action.update_spec)
                for mod in m_action.mod_spec:
                    apply_mod(mod, my_spec)
                if lp:
                    lp.log_message(logging.INFO, "Task completed: %s " % t.fw_name)
                if m_action.skip_remaining_tasks:
                    break

            # add job packing info if this is needed
            if FWData().MULTIPROCESSING and STORE_PACKING_INFO:
                all_stored_data['multiprocess_name'] = multiprocessing.current_process().name

            # perform finishing operation
            stop_backgrounds(ping_stop, btask_stops)
            for b in btask_stops:
                b.set()
            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # last background monitors
            if '_background_tasks' in my_spec:
                for bt in my_spec['_background_tasks']:
                    if bt.run_on_finish:
                        for task in bt.tasks:
                            task.run_task(m_fw.spec)

            m_action.stored_data = all_stored_data
            m_action.mod_spec = all_mod_spec
            m_action.update_spec = all_update_spec

            m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)

            if lp:
                lp.complete_launch(launch_id, m_action, 'COMPLETED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'COMPLETED'
                    d['completed_on'] = datetime.utcnow().isoformat()
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True

        except:
            # problems while processing the results. high probability of malformed data.
            traceback.print_exc()
            stop_backgrounds(ping_stop, btask_stops)
            # restore initial state to prevent the raise of further exceptions
            if lp:
                lp.restore_backup_data(launch_id, m_fw.fw_id)

            do_ping(lp, launch_id)  # one last ping, esp if there is a monitor
            # the action produced by the task is discarded
            m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None,
                                             '_exception': {'_stacktrace': traceback.format_exc(),
                                             '_details': None}}, exit=True)

            try:
                m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir)
            except:
                traceback.print_exc()

            if lp:
                lp.complete_launch(launch_id, m_action, 'FIZZLED')
            else:
                with open('FW_offline.json', 'r+') as f:
                    d = json.loads(f.read())
                    d['fwaction'] = m_action.to_dict()
                    d['state'] = 'FIZZLED'
                    f.seek(0)
                    f.write(json.dumps(d))
                    f.truncate()

            return True
Ejemplo n.º 19
0
    def append_wf(self, new_wf, fw_ids, detour=False, pull_spec_mods=False):
        """
        Method to add a workflow as a child to a Firework
        Note: detours must have children that have STATE_RANK that is WAITING or below

        Args:
            new_wf (Workflow): New Workflow to add.
            fw_ids ([int]): ids of the parent Fireworks on which to add the Workflow.
            detour (bool): add children of the current Firework to the Workflow's leaves.
            pull_spec_mods (bool): pull spec mods of COMPLETED parents, refreshes the WF states.

        Returns:
            [int]: list of Firework ids that were updated or new
        """
        updated_ids = []

        root_ids = new_wf.root_fw_ids
        leaf_ids = new_wf.leaf_fw_ids

        # make sure detour runs do not link to ready/running/completed/etc. runs
        if detour:
            for fw_id in fw_ids:
                if fw_id in self.links:
                    # make sure all of these links are WAITING, else the DETOUR is not well defined
                    ready_run = [(f >= 0 and Firework.STATE_RANKS[self.fw_states[f]] > 1)
                                 for f in self.links[fw_id]]
                    if any(ready_run):
                        raise ValueError("fw_id: {}: Detour option only works if all children "
                                         "of detours are not READY to run and have not "
                                         "already run".format(fw_id))

        # make sure all new child fws have negative fw_id
        for new_fw in new_wf.fws:
            if new_fw.fw_id >= 0:  # note: this is also used later in the 'detour' code
                raise ValueError(
                    'FireWorks to add must use a negative fw_id! Got fw_id: {}'.format(new_fw.fw_id))

        # completed checks - go ahead and append
        for new_fw in new_wf.fws:
            self.id_fw[new_fw.fw_id] = new_fw  # add new_fw to id_fw

            if new_fw.fw_id in leaf_ids:
                if detour:
                    for fw_id in fw_ids:
                        # add children of current FW to new FW
                        self.links[new_fw.fw_id] = [f for f in self.links[fw_id] if f >= 0]
                else:
                    self.links[new_fw.fw_id] = []
            else:
                self.links[new_fw.fw_id] = new_wf.links[new_fw.fw_id]
            updated_ids.append(new_fw.fw_id)

        for fw_id in fw_ids:
            for root_id in root_ids:
                self.links[fw_id].append(root_id)  # add the root id as my child
                if pull_spec_mods:  # re-apply some actions of the parent
                    m_fw = self.id_fw[fw_id]  # get the parent FW
                    m_launch = self._get_representative_launch(m_fw)  # get Launch of parent
                    if m_launch:
                        # pull spec update
                        if m_launch.state == 'COMPLETED' and m_launch.action.update_spec:
                            new_wf.id_fw[root_id].spec.update(m_launch.action.update_spec)
                        # pull spec mods
                        if m_launch.state == 'COMPLETED' and m_launch.action.mod_spec:
                            for mod in m_launch.action.mod_spec:
                                apply_mod(mod, new_wf.id_fw[root_id].spec)

        for new_fw in new_wf.fws:
            updated_ids = self.refresh(new_fw.fw_id, set(updated_ids))

        return updated_ids
Ejemplo n.º 20
0
    def add_wf_to_fws(self, new_wf, fw_ids, pull_spec_mods=True, detour=False):
        """
        Internal method to add a workflow as a child to a Firework
        Note: detours must have children that have STATE_RANK that is WAITING or below

        :param new_wf: (Workflow) New Workflow to add
        :param fw_ids: ([int]) ids of the parent Fireworks on which to add the Workflow
        :param pull_spec_mods: (bool) pull spec mods of COMPLETED parents
        :param detour: (bool) add children of the current Firework to the Workflow's leaves
        :return: ([int]) list of Firework ids that were updated or new
        """
        updated_ids = []

        root_ids = new_wf.root_fw_ids
        leaf_ids = new_wf.leaf_fw_ids

        for new_fw in new_wf.fws:
            if new_fw.fw_id >= 0:  # note - this is also used later in the 'detour' code
                raise ValueError(
                    'FireWorks to add must use a negative fw_id! Got fw_id: '
                    '{}'.format(new_fw.fw_id))

            self.id_fw[new_fw.fw_id] = new_fw  # add new_fw to id_fw

            for fw_id in fw_ids:
                if new_fw.fw_id in leaf_ids:
                    if detour:
                        # make sure all of these links are WAITING, else the DETOUR is not well defined
                        ready_run = [
                            (f >= 0
                             and Firework.STATE_RANKS[self.fw_states[f]] > 1)
                            for f in self.links[fw_id]
                        ]
                        if any(ready_run):
                            raise ValueError(
                                "fw_id: {}: Detour option only works if all children of detours are not READY to run and have not already run"
                                .format(fw_id))
                        self.links[new_fw.fw_id] = [
                            f for f in self.links[fw_id] if f >= 0
                        ]  # add children of current FW to new FW
                    else:
                        self.links[new_fw.fw_id] = []
                else:
                    self.links[new_fw.fw_id] = new_wf.links[new_fw.fw_id]
                updated_ids.append(new_fw.fw_id)

        for fw_id in fw_ids:
            for root_id in root_ids:
                self.links[fw_id].append(
                    root_id)  # add the root id as my child
                if pull_spec_mods:  # re-apply some actions of the parent
                    m_fw = self.id_fw[fw_id]  # get the parent FW
                    m_launch = self._get_representative_launch(
                        m_fw)  # get Launch of parent
                    if m_launch:
                        # pull spec update
                        if m_launch.state == 'COMPLETED' and m_launch.action.update_spec:
                            new_wf.id_fw[root_id].spec.update(
                                m_launch.action.update_spec)
                        # pull spec mods
                        if m_launch.state == 'COMPLETED' and m_launch.action.mod_spec:
                            for mod in m_launch.action.mod_spec:
                                apply_mod(mod, new_wf.id_fw[root_id].spec)

        for new_fw in new_wf.fws:
            updated_ids = self.refresh(new_fw.fw_id, set(updated_ids))

        return updated_ids