Exemple #1
0
    def _checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None):
        """
        (internal method) Finds a FireWork that's ready to be run, marks it as running,
        and returns it to the caller. The caller is responsible for running the FireWork.
        
        :param fworker: A FWorker instance
        :param host: the host making the request (for creating a Launch object)
        :param ip: the ip making the request (for creating a Launch object)
        :param launch_dir: the dir the FW will be run in (for creating a Launch object)
        :return: a FireWork, launch_id tuple
        """

        m_fw, prev_launch_id = self._get_a_fw_to_run(fworker, fw_id)
        if not m_fw:
            return None, None
            # create or update a launch
        l_id = prev_launch_id if prev_launch_id else self.get_new_launch_id()
        m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, launch_id=l_id, fw_id=m_fw.fw_id)
        self.launches.update({'launch_id': l_id}, m_launch.to_db_dict(), upsert=True)
        self.m_logger.debug('Created/updated Launch with launch_id: {}'.format(l_id))

        # add launch to FW
        if not prev_launch_id:
            # we're appending a new FireWork
            m_fw.launches.append(m_launch)
        else:
            # we're updating an existing launch
            m_fw.launches = [m_launch if l.launch_id == m_launch.launch_id else l for l in m_fw.launches]
        m_fw.state = 'RUNNING'
        self._upsert_fws([m_fw])
        self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id))

        return m_fw, l_id
Exemple #2
0
    def checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None):
        """
        (internal method) Finds a FireWork that's ready to be run, marks it as running,
        and returns it to the caller. The caller is responsible for running the FireWork.

        :param fworker: A FWorker instance
        :param host: the host making the request (for creating a Launch object)
        :param ip: the ip making the request (for creating a Launch object)
        :param launch_dir: the dir the FW will be run in (for creating a Launch object)
        :return: a FireWork, launch_id tuple
        """

        # TODO: this method is confusing, says AJ of Xmas past. Clean it up, remove duplication, etc.

        m_fw = self._get_a_fw_to_run(fworker.query, fw_id)
        if not m_fw:
            return None, None

        # was this Launch previously reserved? If so, overwrite that reservation with this Launch
        # note that adding a new Launch is problematic from a duplicate run standpoint
        prev_reservations = [l for l in m_fw.launches if l.state == 'RESERVED']
        reserved_launch = None if len(prev_reservations) == 0 else prev_reservations[0]

        state_history = reserved_launch.state_history if reserved_launch else None
        l_id = reserved_launch.launch_id if reserved_launch else self.get_new_launch_id()
        trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None
        m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, trackers=trackers, state_history=state_history,
                          launch_id=l_id,
                          fw_id=m_fw.fw_id)

        self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)

        self.m_logger.debug('Created/updated Launch with launch_id: {}'.format(l_id))

        if not reserved_launch:
            # we're appending a new FireWork
            m_fw.launches.append(m_launch)
        else:
            # we're updating an existing launch
            m_fw.launches = [m_launch if l.launch_id == m_launch.launch_id else l for l in
                             m_fw.launches]

        m_fw.state = 'RUNNING'
        self._upsert_fws([m_fw])

        # update any duplicated runs
        for fw in self.fireworks.find(
                {'launches': l_id, 'state': {'$in': ['WAITING', 'READY', 'RESERVED', 'FIZZLED']}},
                {'fw_id': 1}):
            fw_id = fw['fw_id']
            fw = self.get_fw_by_id(fw_id)
            fw.state = 'RUNNING'
            self._upsert_fws([fw])

        self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id))

        # use dict as return type, just to be compatible with multiprocessing
        return m_fw, l_id
Exemple #3
0
    def checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None):
        """
        (internal method) Finds a FireWork that's ready to be run, marks it as running,
        and returns it to the caller. The caller is responsible for running the FireWork.

        :param fworker: A FWorker instance
        :param host: the host making the request (for creating a Launch object)
        :param ip: the ip making the request (for creating a Launch object)
        :param launch_dir: the dir the FW will be run in (for creating a Launch object)
        :return: a FireWork, launch_id tuple
        """

        # TODO: this method is confusing, says AJ of Xmas past. Clean it up, remove duplication, etc.

        m_fw = self._get_a_fw_to_run(fworker.query, fw_id)
        if not m_fw:
            return None, None

        # was this Launch previously reserved? If so, overwrite that reservation with this Launch
        # note that adding a new Launch is problematic from a duplicate run standpoint
        prev_reservations = [l for l in m_fw.launches if l.state == 'RESERVED']
        reserved_launch = None if len(prev_reservations) == 0 else prev_reservations[0]

        state_history = reserved_launch.state_history if reserved_launch else None
        l_id = reserved_launch.launch_id if reserved_launch else self.get_new_launch_id()
        trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None
        m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, trackers=trackers, state_history=state_history,
                          launch_id=l_id,
                          fw_id=m_fw.fw_id)

        self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)

        self.m_logger.debug('Created/updated Launch with launch_id: {}'.format(l_id))

        if not reserved_launch:
            # we're appending a new FireWork
            m_fw.launches.append(m_launch)
        else:
            # we're updating an existing launch
            m_fw.launches = [m_launch if l.launch_id == m_launch.launch_id else l for l in
                             m_fw.launches]

        m_fw.state = 'RUNNING'
        self._upsert_fws([m_fw])

        # update any duplicated runs
        for fw in self.fireworks.find(
                {'launches': l_id, 'state': {'$in': ['WAITING', 'READY', 'RESERVED', 'FIZZLED']}},
                {'fw_id': 1}):
            fw_id = fw['fw_id']
            fw = self.get_fw_by_id(fw_id)
            fw.state = 'RUNNING'
            self._upsert_fws([fw])

        self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id))

        return m_fw, l_id
Exemple #4
0
    def _checkout_fw(self,
                     fworker,
                     launch_dir,
                     fw_id=None,
                     host=None,
                     ip=None):
        """
        (internal method) Finds a FireWork that's ready to be run, marks it as running,
        and returns it to the caller. The caller is responsible for running the FireWork.
        
        :param fworker: A FWorker instance
        :param host: the host making the request (for creating a Launch object)
        :param ip: the ip making the request (for creating a Launch object)
        :param launch_dir: the dir the FW will be run in (for creating a Launch object)
        :return: a FireWork, launch_id tuple
        """

        m_fw, prev_launch_id = self._get_a_fw_to_run(fworker, fw_id)
        if not m_fw:
            return None, None
            # create or update a launch
        l_id = prev_launch_id if prev_launch_id else self.get_new_launch_id()
        m_launch = Launch('RUNNING',
                          launch_dir,
                          fworker,
                          host,
                          ip,
                          launch_id=l_id,
                          fw_id=m_fw.fw_id)
        self.launches.update({'launch_id': l_id},
                             m_launch.to_db_dict(),
                             upsert=True)
        self.m_logger.debug(
            'Created/updated Launch with launch_id: {}'.format(l_id))

        # add launch to FW
        if not prev_launch_id:
            # we're appending a new FireWork
            m_fw.launches.append(m_launch)
        else:
            # we're updating an existing launch
            m_fw.launches = [
                m_launch if l.launch_id == m_launch.launch_id else l
                for l in m_fw.launches
            ]
        m_fw.state = 'RUNNING'
        self._upsert_fws([m_fw])
        self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id))

        return m_fw, l_id
Exemple #5
0
    def _reserve_fw(self, fworker, launch_dir, host=None, ip=None):
        m_fw, lid = self._get_a_fw_to_run(fworker)
        if not m_fw:
            return None, None
            # create a launch
        # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!!
        launch_id = self.get_new_launch_id()
        m_launch = Launch('RESERVED', launch_dir, fworker, host, ip, launch_id=launch_id, fw_id=m_fw.fw_id)
        self.launches.insert(m_launch.to_db_dict())

        # add launch to FW
        m_fw.launches.append(m_launch)
        m_fw.state = 'RESERVED'
        self._upsert_fws([m_fw])
        self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id))

        return m_fw, launch_id
Exemple #6
0
    def reserve_fw(self, fworker, launch_dir, host=None, ip=None):
        m_fw = self._get_a_fw_to_run(fworker.query)
        if not m_fw:
            return None, None
            # create a launch
        # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!!
        launch_id = self.get_new_launch_id()
        trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None
        m_launch = Launch('RESERVED', launch_dir, fworker, host, ip, trackers=trackers, launch_id=launch_id,
                          fw_id=m_fw.fw_id)
        self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)

        # add launch to FW
        m_fw.launches.append(m_launch)
        m_fw.state = 'RESERVED'
        self._upsert_fws([m_fw])
        self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id))

        return m_fw, launch_id
Exemple #7
0
    def _reserve_fw(self, fworker, launch_dir, host=None, ip=None):
        m_fw = self._get_a_fw_to_run(fworker.query)
        if not m_fw:
            return None, None
            # create a launch
        # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!!
        launch_id = self.get_new_launch_id()
        trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None
        m_launch = Launch('RESERVED', launch_dir, fworker, host, ip, trackers=trackers, launch_id=launch_id,
                          fw_id=m_fw.fw_id)
        self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)

        # add launch to FW
        m_fw.launches.append(m_launch)
        m_fw.state = 'RESERVED'
        self._upsert_fws([m_fw])
        self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id))

        return m_fw, launch_id
Exemple #8
0
    def get_launch_by_id(self, launch_id):
        """
        Given a Launch id, return details of the Launch

        :param launch_id: launch id
        :return: Launch object
        """
        m_launch = self.launches.find_one({'launch_id': launch_id})
        if m_launch:
            return Launch.from_dict(m_launch)
        raise ValueError('No Launch exists with launch_id: {}'.format(launch_id))
Exemple #9
0
    def get_launch_by_id(self, launch_id):
        """
        Given a Launch id, return details of the Launch

        :param launch_id: launch id
        :return: Launch object
        """
        m_launch = self.launches.find_one({'launch_id': launch_id})
        if m_launch:
            return Launch.from_dict(m_launch)
        raise ValueError('No Launch exists with launch_id: {}'.format(launch_id))
Exemple #10
0
    def _reserve_fw(self, fworker, launch_dir, host=None, ip=None):
        m_fw, lid = self._get_a_fw_to_run(fworker)
        if not m_fw:
            return None, None
            # create a launch
        # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!!
        launch_id = self.get_new_launch_id()
        m_launch = Launch('RESERVED',
                          launch_dir,
                          fworker,
                          host,
                          ip,
                          launch_id=launch_id,
                          fw_id=m_fw.fw_id)
        self.launches.insert(m_launch.to_db_dict())

        # add launch to FW
        m_fw.launches.append(m_launch)
        m_fw.state = 'RESERVED'
        self._upsert_fws([m_fw])
        self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id))

        return m_fw, launch_id
Exemple #11
0
def task_dict_to_wf(task_dict, launchpad):
    fw_id = launchpad.get_new_fw_id()
    l_id = launchpad.get_new_launch_id()

    spec = {'task_type': task_dict['task_type'], 'run_tags': task_dict['run_tags'],
            'vaspinputset_name': None, 'vasp': None, 'mpsnl': task_dict['snl'],
            'snlgroup_id': task_dict['snlgroup_id']}
    tasks = [DummyLegacyTask()]

    launch_dir = task_dict['dir_name_full']

    stored_data = {'error_list': []}
    update_spec = {'prev_vasp_dir': task_dict['dir_name'],
                   'prev_task_type': spec['task_type'],
                   'mpsnl': spec['mpsnl'], 'snlgroup_id': spec['snlgroup_id'],
                   'run_tags': spec['run_tags']}

    fwaction = FWAction(stored_data=stored_data, update_spec=update_spec)

    if task_dict['completed_at']:
        complete_date = datetime.datetime.strptime(task_dict['completed_at'], "%Y-%m-%d %H:%M:%S")
        state_history = [{"created_on": complete_date, 'state': 'COMPLETED'}]
    else:
        state_history = []

    launches = [Launch('COMPLETED', launch_dir, fworker=None, host=None, ip=None, action=fwaction,
                       state_history=state_history, launch_id=l_id, fw_id=fw_id)]

    f = Composition.from_formula(task_dict['pretty_formula']).alphabetical_formula


    fw = FireWork(tasks, spec, name=get_slug(f + '--' + spec['task_type']), launches=launches, state='COMPLETED', created_on=None,
                 fw_id=fw_id)

    wf_meta = get_meta_from_structure(Structure.from_dict(task_dict['snl']))
    wf_meta['run_version'] = 'preproduction (0)'

    wf = Workflow.from_FireWork(fw, name=f, metadata=wf_meta)

    launchpad.add_wf(wf, reassign_all=False)
    launchpad._upsert_launch(launches[0])

    print 'ADDED', fw_id
    # return fw_id
    return fw_id
Exemple #12
0
    def recover_offline(self, launch_id, ignore_errors=False):
        # get the launch directory
        m_launch = self.get_launch_by_id(launch_id)
        try:
            self.m_logger.debug("RECOVERING fw_id: {}".format(m_launch.fw_id))
            # look for ping file - update the FireWork if this is the case
            ping_loc = os.path.join(m_launch.launch_dir, "FW_ping.json")
            if os.path.exists(ping_loc):
                with open(ping_loc) as f:
                    ping_time = datetime.datetime.strptime(json.loads(f.read())['ping_time'], "%Y-%m-%dT%H:%M:%S.%f")
                    self.ping_launch(launch_id, ping_time)

            # look for action in FW_offline.json
            offline_loc = os.path.join(m_launch.launch_dir, "FW_offline.json")
            with open(offline_loc) as f:
                offline_data = json.loads(f.read())
                if 'started_on' in offline_data:
                    m_launch.state = 'RUNNING'
                    for s in m_launch.state_history:
                        if s['state'] == 'RUNNING':
                            s['created_on'] = datetime.datetime.strptime(offline_data['started_on'], "%Y-%m-%dT%H:%M:%S.%f")
                    self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)

                if 'fwaction' in offline_data:
                    fwaction = FWAction.from_dict(offline_data['fwaction'])
                    state = offline_data['state']
                    m_launch = Launch.from_dict(
                        self.complete_launch(launch_id, fwaction, state))
                    for s in m_launch.state_history:
                        if s['state'] == offline_data['state']:
                            s['created_on'] = datetime.datetime.strptime(offline_data['completed_on'], "%Y-%m-%dT%H:%M:%S.%f")
                    self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)
                    self.offline_runs.update({"launch_id": launch_id}, {"$set": {"completed":True}})

            # update the updated_on
            self.offline_runs.update({"launch_id": launch_id}, {"$set": {"updated_on": datetime.datetime.utcnow().isoformat()}})
            return None
        except:
            if not ignore_errors:
                traceback.print_exc()
            return m_launch.fw_id
Exemple #13
0
    def recover_offline(self, launch_id, ignore_errors=False):
        # get the launch directory
        m_launch = self.get_launch_by_id(launch_id)
        try:
            self.m_logger.debug("RECOVERING fw_id: {}".format(m_launch.fw_id))
            # look for ping file - update the FireWork if this is the case
            ping_loc = os.path.join(m_launch.launch_dir, "FW_ping.json")
            if os.path.exists(ping_loc):
                with open(ping_loc) as f:
                    ping_time = datetime.datetime.strptime(json.loads(f.read())['ping_time'], "%Y-%m-%dT%H:%M:%S.%f")
                    self.ping_launch(launch_id, ping_time)

            # look for action in FW_offline.json
            offline_loc = os.path.join(m_launch.launch_dir, "FW_offline.json")
            with open(offline_loc) as f:
                offline_data = json.loads(f.read())
                if 'started_on' in offline_data:
                    m_launch.state = 'RUNNING'
                    for s in m_launch.state_history:
                        if s['state'] == 'RUNNING':
                            s['created_on'] = datetime.datetime.strptime(offline_data['started_on'], "%Y-%m-%dT%H:%M:%S.%f")
                    self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)

                if 'fwaction' in offline_data:
                    fwaction = FWAction.from_dict(offline_data['fwaction'])
                    state = offline_data['state']
                    m_launch = Launch.from_dict(
                        self.complete_launch(launch_id, fwaction, state))
                    for s in m_launch.state_history:
                        if s['state'] == offline_data['state']:
                            s['created_on'] = datetime.datetime.strptime(offline_data['completed_on'], "%Y-%m-%dT%H:%M:%S.%f")
                    self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True)
                    self.offline_runs.update({"launch_id": launch_id}, {"$set": {"completed":True}})

            # update the updated_on
            self.offline_runs.update({"launch_id": launch_id}, {"$set": {"updated_on": datetime.datetime.utcnow().isoformat()}})
            return None
        except:
            if not ignore_errors:
                traceback.print_exc()
            return m_launch.fw_id