def parse_wf_for_latt_constants(wf_id): lpad = LaunchPad().from_file(lpad_file_path) wf = lpad.get_wf_by_fw_id(wf_id) lattdata = {} print('{} workflow retrieved with {} fws in it'.format( wf.name, len(wf.fws))) for fw in wf.fws: print('\t{}'.format(fw.name)) if 'structure optimization' not in fw.name: raise ValueError("Not a recognized firework!") elif fw.state != 'COMPLETED': print('\t\tstatus = {}, so skipping'.format(fw.state)) continue pat = fw.launches[-1].launch_dir s = Poscar.from_file(os.path.join(pat, 'CONTCAR.relax2.gz')).structure nom = s.composition.reduced_formula if nom in lattdata: raise ValueError("{} already exists in lattdata??".format(nom)) elif (max(s.lattice.abc) - min(s.lattice.abc)) > 0.00001 or ( max(s.lattice.angles) - min(s.lattice.angles)) > 0.00001: raise ValueError("Error occured with lattice relaxation??".format( s.lattice)) else: lattdata.update({nom: s.lattice.abc[0]}) print('\nFinalized lattice constant set:\n{}'.format(lattdata)) return lattdata
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') elif not args.fworker_file: args.fworker_file = FWORKER_LOC if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') elif not args.queueadapter_file: args.queueadapter_file = QUEUEADAPTER_LOC launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir, nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue, njobs_block=args.maxjobs_block, sleep_time=args.sleep, reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode, args.fw_id)
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker, queueadapter, args.launch_dir, args.nlaunches, args.maxjobs_queue, args.maxjobs_block, args.sleep, args.reserve, args.loglvl) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False)
def launchpad(): """Get a launchpad""" # Manually add the package to be included fw_config.USER_PACKAGES = [ 'fireworks.user_objects', 'fireworks.utilities.tests', 'fw_tutorials', 'fireworks.features' ] lpd = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR') lpd.reset(password=None, require_password=False) yield lpd lpd.connection.drop_database(TESTDB_NAME)
def perturb_wf_setup(perovskite, structure_type='111', Nstruct=100, perturbamnt=None, max_strain=0.06, nimages=8, tags=[]): if perturbamnt is None: perturbamnt = perovskite.lattconst * 0.04 print( "Setting up {} different perturbation polarization approaches\nMax strain = {}, " "Perturbation amount = {}".format(Nstruct, max_strain, perturbamnt)) allowed_struct_type = ['111', '211', 's2s21', 's2s22'] if structure_type not in allowed_struct_type: raise ValueError("{} not in {}".format(structure_type, allowed_struct_type)) fws = [] pert_N_structs = [ perovskite.get_struct_from_structure_type(structure_type).as_dict() ] user_incar_settings = {"ADDGRID": True, 'EDIFF': 1e-8, "NELMIN": 6} for nind in range(Nstruct): sclass = PerfectPerovskite(Asite=perovskite.eltA, Bsite=perovskite.eltB, Osite=perovskite.eltC, lattconst=perovskite.lattconst) strain_class = StrainedPerovskite.generate_random_strain( sclass, structure_type=structure_type, max_strain=max_strain, perturb_amnt=perturbamnt) tmp_wf = polarization_wf(strain_class.structure, strain_class.base, submit=False, nimages=nimages, user_incar_settings=user_incar_settings, tags=tags) fws.extend(tmp_wf.fws) pert_N_structs.append(strain_class.structure.as_dict()) print("Submitting Polarization workflow with {} fireworks".format( len(fws))) wf = Workflow(fws) lp = LaunchPad().from_file(lpad_file_path) lp.add_wf(wf)
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn)
def born(): fw = Firework( [ Cry(), Eat(), Sleep(), Poop(), ], spec={"name": "BabyJain"} ) launchpad = LaunchPad() launchpad.add_wf(Workflow([fw]))
def clear_env(): sma = SubmissionMongoAdapter.auto_load() lp = LaunchPad.auto_load() snl = SNLMongoAdapter.auto_load() db_dir = os.environ['DB_LOC'] db_path = os.path.join(db_dir, 'tasks_db.json') with open(db_path) as f: db_creds = json.load(f) sma._reset() lp.reset('', require_password=False) snl._reset() conn = MongoClient(db_creds['host'], db_creds['port']) db = conn[db_creds['database']] db.authenticate(db_creds['admin_user'], db_creds['admin_password']) db.tasks.remove() db.boltztrap.remove() db.counter.remove() db['dos_fs.chunks'].remove() db['dos_fs.files'].remove() db['band_structure_fs.files'].remove() db['band_structure_fs.files'].remove()
def run_task(self, fw_spec): # the FW.json/yaml file is mandatory to get the fw_id # no need to deserialize the whole FW if '_add_launchpad_and_fw_id' in fw_spec: lp = self.launchpad fw_id = self.fw_id else: try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError("Launchpad/fw_id not present in spec and No FW.json nor FW.yaml file present: " "impossible to determine fw_id") lp = LaunchPad.auto_load() fw_id = fw_dict['fw_id'] wf = lp.get_wf_by_fw_id_lzyfw(fw_id) deleted_files = [] # iterate over all the fws and launches for fw_id, fw in wf.id_fw.items(): for l in fw.launches+fw.archived_launches: l_dir = l.launch_dir deleted_files.extend(self.delete_files(os.path.join(l_dir, TMPDIR_NAME))) deleted_files.extend(self.delete_files(os.path.join(l_dir, INDIR_NAME))) deleted_files.extend(self.delete_files(os.path.join(l_dir, OUTDIR_NAME), self.out_exts)) logging.info("Deleted files:\n {}".format("\n".join(deleted_files))) return FWAction(stored_data={'deleted_files': deleted_files})
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') elif not args.fworker_file: args.fworker_file = FWORKER_LOC if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') elif not args.queueadapter_file: args.queueadapter_file = QUEUEADAPTER_LOC launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir, nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue, njobs_block=args.maxjobs_block, sleep_time=args.sleep, reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode)
def run_task(self, fw_spec): self.db_data.connect_mongoengine() try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError( "No FW.json nor FW.yaml file present: impossible to determine fw_id" ) fw_id = fw_dict['fw_id'] lp = LaunchPad.auto_load() wf = lp.get_wf_by_fw_id_lzyfw(fw_id) wf_module = importlib.import_module(wf.metadata['workflow_module']) wf_class = getattr(wf_module, wf.metadata['workflow_class']) get_results_method = getattr(wf_class, 'get_mongoengine_results') #TODO extend for multiple documents? document = get_results_method(wf) with self.db_data.switch_collection( document.__class__) as document.__class__: #TODO it would be better to try to remove automatically the FileFields already saved if the save of # the document fails. document.save()
def get_lp(args): try: if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, DEFAULT_LPAD_YAML)): args.launchpad_file = os.path.join(args.config_dir, DEFAULT_LPAD_YAML) if args.launchpad_file: return LaunchPad.from_file(args.launchpad_file) else: args.loglvl = 'CRITICAL' if args.silencer else args.loglvl return LaunchPad(logdir=args.logdir, strm_lvl=args.loglvl) except: traceback.print_exc() err_message = 'FireWorks was not able to connect to MongoDB. Is the server running? The database file specified was {}.'.format(args.launchpad_file) if not args.launchpad_file: err_message += ' Type "lpad init" if you would like to set up a file that specifies location and credentials of your Mongo database (otherwise use default localhost configuration).' raise ValueError(err_message)
def run_task(self, fw_spec): # the FW.json/yaml file is mandatory to get the fw_id # no need to deserialize the whole FW try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError("No FW.json nor FW.yaml file present: impossible to determine fw_id") fw_id = fw_dict['fw_id'] lp = LaunchPad.auto_load() wf = lp.get_wf_by_fw_id_lzyfw(fw_id) wf_module = importlib.import_module(wf.metadata['workflow_module']) wf_class = getattr(wf_module, wf.metadata['workflow_class']) get_results_method = getattr(wf_class, 'get_final_structure_and_history') #TODO: make this more general ... just to test right now ... results = get_results_method(wf) database = MongoDatabase.from_dict(fw_spec['mongo_database']) database.insert_entry({'structure': results['structure'], 'history': results['history']}) logging.info("Inserted data:\n something") return FWAction()
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the central database and ' \ 'runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exist on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker, None, args.nlaunches, -1, args.sleep, args.loglvl) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def get_lp_and_fw_id_from_task(task, fw_spec): """ Given an instance of a running task and its spec, tries to load the LaunchPad and the current fw_id. It will first check for "_add_launchpad_and_fw_id", then try to load from FW.json/FW.yaml file. Should be used inside tasks that require to access to the LaunchPad and to the whole workflow. Args: task: An instance of a running task fw_spec: The spec of the task Returns: an instance of LaunchPah and the fw_id of the current task """ if '_add_launchpad_and_fw_id' in fw_spec: lp = task.launchpad fw_id = task.fw_id # lp may be None in offline mode if lp is None: raise RuntimeError("The LaunchPad in spec is None.") else: try: with open('FW.json', "rt") as fh: fw_dict = json.load(fh, cls=MontyDecoder) except IOError: try: with open('FW.yaml', "rt") as fh: fw_dict = yaml.load(fh) except IOError: raise RuntimeError("Launchpad/fw_id not present in spec and No FW.json nor FW.yaml file present: " "impossible to determine fw_id") logger.warning("LaunchPad not available from spec. Generated with auto_load.") lp = LaunchPad.auto_load() fw_id = fw_dict['fw_id'] # since it is not given that the LaunchPad is the correct one, try to verify if the workflow # and the fw_id are being accessed correctly try: fw = lp.get_fw_by_id(fw_id) except ValueError as e: traceback.print_exc() raise RuntimeError("The firework with id {} is not present in the LaunchPad {}. The LaunchPad is " "probably incorrect.". format(fw_id, lp)) if fw.state != "RUNNING": raise RuntimeError("The firework with id {} from LaunchPad {} is {}. There might be an error in the " "selection of the LaunchPad". format(fw_id, lp, fw.state)) if len(fw.tasks) != len(fw_dict['spec']['_tasks']): raise RuntimeError("The firework with id {} from LaunchPad {} is has different number of tasks " "from the current.".format(fw_id, lp)) for db_t, dict_t in zip(fw.tasks, fw_dict['spec']['_tasks']): if db_t.fw_name != dict_t['_fw_name']: raise RuntimeError("The firework with id {} from LaunchPad {} has task that don't match: " "{} and {}.".format(fw_id, lp, db_t.fw_name, dict_t['fw_name'])) return lp, fw_id
def auto_load(cls): sma = SubmissionMongoAdapter.auto_load() l_dir = FWConfig().CONFIG_FILE_DIR l_file = os.path.join(l_dir, 'my_launchpad.yaml') lp = LaunchPad.from_file(l_file) return SubmissionProcessor(sma, lp)
def setUpClass(cls): cls.lp = None cls.fworker = FWorker() try: cls.lp = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR') cls.lp.reset(password=None, require_password=False) except: raise unittest.SkipTest('MongoDB is not running in localhost:27017! Skipping tests.')
def polarization_wf(polar_structure, nonpolar_structure, submit=False, nimages=8, user_incar_settings={}, tags=[]): """ :param polar_structure: structure of polar structure :param nonpolar_structure: structure of nonpolar structure :param submit: boolean for submitting :param tags: list of string tags :return: """ if polar_structure.species != nonpolar_structure.species: raise ValueError("WRONG ORDER OF SPECIES: {} vs {}".format( polar_structure.species, nonpolar_structure.species)) vasp_input_set_params = {'user_incar_settings': user_incar_settings} wf = get_wf_ferroelectric( polar_structure, nonpolar_structure, vasp_cmd=">>vasp_cmd<<", db_file='>>db_file<<', vasp_input_set_polar="MPStaticSet", vasp_input_set_nonpolar="MPStaticSet", relax=False, vasp_relax_input_set_polar=vasp_input_set_params, vasp_relax_input_set_nonpolar=vasp_input_set_params, nimages=nimages, hse=False, add_analysis_task=True, tags=tags) print('workflow created with {} fws'.format(len(wf.fws))) if submit: print("\tSubmitting Polarization workflow") lp = LaunchPad().from_file(lpad_file_path) lp.add_wf(wf) else: return wf
def update_checkpoint(launchpad: LaunchPad, launch_dir: str, launch_id: int, checkpoint: Dict[str, any]) -> None: """ Helper function to update checkpoint Args: launchpad (LaunchPad): LaunchPad to ping with checkpoint data launch_dir (str): directory in which FW_offline.json was created launch_id (int): launch id to update checkpoint (dict): checkpoint data """ if launchpad: launchpad.ping_launch(launch_id, checkpoint=checkpoint) else: fpath = zpath(os.path.join(launch_dir, "FW_offline.json")) with zopen(fpath) as f_in: d = json.loads(f_in.read()) d["checkpoint"] = checkpoint with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False))
def bootstrap_app(*args, **kwargs): """Pass instead of `app` to a forking process. This is so a server process will re-initialize a MongoDB client connection after forking. This is useful to avoid deadlock when using pymongo with multiprocessing. """ import fireworks.flask_site.app fireworks.flask_site.app.lp = LaunchPad.from_dict( json.loads(os.environ["FWDB_CONFIG"])) return app(*args, **kwargs)
def __init__(self, launchpad=None): super().__init__() # Here store the launchpad in the class attribute so it can be reused.... if launchpad is not None: self.lpad = launchpad # Keep the launchpad FwScheduler._lpad = launchpad else: # Create and save the launchpad if FwScheduler._lpad is None: FwScheduler._lpad = LaunchPad.from_file(LAUNCHPAD_LOC) self.lpad = FwScheduler._lpad
def setup_fireworks(cls): """ Sets up the fworker and launchpad if a connection to a local mongodb is available. cls.lp is set to None if not available """ cls.fworker = FWorker() try: cls.lp = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR') cls.lp.reset(password=None, require_password=False) except: cls.lp = None
def setUp(self): if os.path.exists(self.scratch_dir): shutil.rmtree(self.scratch_dir) os.makedirs(self.scratch_dir) os.chdir(self.scratch_dir) try: self.lp = LaunchPad.from_file(os.path.join(db_dir, "my_launchpad.yaml")) self.lp.reset("", require_password=False) except: raise unittest.SkipTest( 'Cannot connect to MongoDB! Is the database server running? ' 'Are the credentials correct?')
def init_yaml(args): fields = (("host", "localhost"), ("port", 27017), ("name", "fireworks"), ("username", None), ("password", None)) doc = {} print("Please supply the following configuration values") print("(press Enter if you want to accept the defaults)\n") for k, v in fields: val = input("Enter {} (default: {}) : ".format(k, v)) doc[k] = val if val else v doc["port"] = int(doc["port"]) # enforce the port as an int with open(args.config_file, "w") as f: import yaml yaml.dump(LaunchPad.from_dict(doc).to_dict(), f) print("\nConfiguration written to {}!".format(args.config_file))
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout)
def restart_fizzled(): module_dir = os.path.dirname(os.path.abspath(__file__)) lp_f = os.path.join(module_dir, 'my_launchpad.yaml') lpdb = LaunchPad.from_file(lp_f) for fw in lpdb.fireworks.find({"state": "FIZZLED"}, {"fw_id": 1, "spec.task_type": 1}): fw_id = fw['fw_id'] task_type = fw['spec']['task_type'] restart_id = fw_id if 'VASP db insertion' in task_type: restart_id = fw_id - 1 elif 'Controller' in task_type: restart_id = fw_id - 2 lpdb.rerun_fw(restart_id)
def run_task(self, fw_spec): # the FW.json/yaml file is mandatory to get the fw_id # no need to deserialize the whole FW if '_add_launchpad_and_fw_id' in fw_spec: lp = self.launchpad fw_id = self.fw_id else: try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError( "Launchpad/fw_id not present in spec and No FW.json nor FW.yaml file present: " "impossible to determine fw_id") lp = LaunchPad.auto_load() fw_id = fw_dict['fw_id'] wf = lp.get_wf_by_fw_id(fw_id) wf_module = importlib.import_module(wf.metadata['workflow_module']) wf_class = getattr(wf_module, wf.metadata['workflow_class']) database = fw_spec['mongo_database'] if self.criteria is not None: entry = database.get_entry(criteria=self.criteria) else: entry = {} inserted = [] for root_key, method_name in self.insertion_data.items(): get_results_method = getattr(wf_class, method_name) results = get_results_method(wf) for key, val in results.items(): entry[key] = jsanitize(val) inserted.append(key) if self.criteria is not None: database.save_entry(entry=entry) else: database.insert_entry(entry=entry) logging.info("Inserted data:\n{}".format('- {}\n'.join(inserted))) return FWAction()
def archive_deprecated_fws(): # find all snlgroups that are deprecated, and archive all WFs that have deprecated fw_ids so we don't run them module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snlgroups = snldb.snlgroups lp_f = os.path.join(module_dir, 'my_launchpad.yaml') lpdb = LaunchPad.from_file(lp_f) for g in snlgroups.find({'canonical_snl.about.remarks':'DEPRECATED'}, {'snlgroup_id': 1}): while lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}): fw = lpdb.fireworks.find_one({'spec.snlgroup_id': g['snlgroup_id'], 'state': {'$ne': 'ARCHIVED'}}, {'fw_id': 1}) print fw['fw_id'] lpdb.archive_wf(fw['fw_id']) print 'DONE'
def run_task(self, fw_spec): # the FW.json/yaml file is mandatory to get the fw_id # no need to deserialize the whole FW if '_add_launchpad_and_fw_id' in fw_spec: lp = self.launchpad fw_id = self.fw_id else: try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError("Launchpad/fw_id not present in spec and No FW.json nor FW.yaml file present: " "impossible to determine fw_id") lp = LaunchPad.auto_load() fw_id = fw_dict['fw_id'] wf = lp.get_wf_by_fw_id(fw_id) wf_module = importlib.import_module(wf.metadata['workflow_module']) wf_class = getattr(wf_module, wf.metadata['workflow_class']) database = fw_spec['mongo_database'] if self.criteria is not None: entry = database.get_entry(criteria=self.criteria) else: entry = {} inserted = [] for root_key, method_name in self.insertion_data.items(): get_results_method = getattr(wf_class, method_name) results = get_results_method(wf) for key, val in results.items(): entry[key] = jsanitize(val) inserted.append(key) if self.criteria is not None: database.save_entry(entry=entry) else: database.insert_entry(entry=entry) logging.info("Inserted data:\n{}".format('- {}\n'.join(inserted))) return FWAction()
def get_colls(): colls = namedtuple('Collections', ['snl', 'snlgroups']) sma = SNLMongoAdapter.from_file(snl_f) lp = LaunchPad.from_file(fw_f) colls.snl = sma.snl colls.snlgroups = sma.snlgroups colls.fireworks = lp.fireworks colls.launches = lp.launches with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) colls.tasks = db['tasks'] return colls
def run_task(self, fw_spec): # the FW.json/yaml file is mandatory to get the fw_id # no need to deserialize the whole FW if '_add_launchpad_and_fw_id' in fw_spec: lp = self.launchpad fw_id = self.fw_id else: try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError( "Launchpad/fw_id not present in spec and No FW.json nor FW.yaml file present: " "impossible to determine fw_id") lp = LaunchPad.auto_load() fw_id = fw_dict['fw_id'] wf = lp.get_wf_by_fw_id_lzyfw(fw_id) deleted_files = [] # iterate over all the fws and launches for fw_id, fw in wf.id_fw.items(): for l in fw.launches + fw.archived_launches: l_dir = l.launch_dir deleted_files.extend( self.delete_files(os.path.join(l_dir, TMPDIR_NAME))) deleted_files.extend( self.delete_files(os.path.join(l_dir, INDIR_NAME))) deleted_files.extend( self.delete_files(os.path.join(l_dir, OUTDIR_NAME), self.out_exts)) logging.info("Deleted files:\n {}".format("\n".join(deleted_files))) return FWAction(stored_data={'deleted_files': deleted_files})
def process_task(self, data): try: dir_name = data[0] parse_dos = data[1] prev_info = self.tasks.find_one({'dir_name_full': dir_name}, {'task_type': 1, 'snl_final': 1, 'snlgroup_id_final': 1, 'snlgroup_changed': 1}) drone = MPVaspDrone( host=self.host, port=self.port, database=self.database, user=self.admin_user, password=self.admin_password, collection=self.collection, parse_dos=parse_dos, additional_fields={}, update_duplicates=True) t_id, d = drone.assimilate(dir_name, launches_coll=LaunchPad.auto_load().launches) self.tasks.update({"task_id": t_id}, {"$set": {"snl_final": prev_info['snl_final'], "snlgroup_id_final": prev_info['snlgroup_id_final'], "snlgroup_changed": prev_info['snlgroup_changed']}}) print 'FINISHED', t_id except: print '-----' print 'ENCOUNTERED AN EXCEPTION!!!', data[0] traceback.print_exc() print '-----'
x = tdb.tasks.find_one({"fw_id": fw_id}, {"analysis": 1}) warnings = x['analysis'].get('warnings', []) warnings.extend(x['analysis']['errors_MP']['signals']) errors = x['analysis'].get('errors', []) errors.extend(x['analysis']['errors_MP']['critical_signals']) warnings = set(warnings) errors = set(errors) warnings = warnings.difference(errors) return set(warnings), set(errors) if __name__ == '__main__': module_dir = os.path.dirname(os.path.abspath(__file__)) lp_f = os.path.join(module_dir, 'my_launchpad.yaml') lpdb = LaunchPad.from_file(lp_f) tasks_f = os.path.join(module_dir, 'tasks_read.yaml') creds = {} with open(tasks_f) as f: creds = yaml.load(f) connection = MongoClient(creds['host'], creds['port']) tdb = connection[creds['db']] tdb.authenticate(creds['username'], creds['password']) except_dict = defaultdict(int) fizzled_fws = []
def arlaunch(): """ Function rapid-fire job launching """ m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser( 'rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser( 'multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') single_parser.add_argument('--pdb', help='shortcut to invoke debugger on error', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument( '--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) rapid_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument( '--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") multi_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file') parser.add_argument('-w', '--fworker_file', required=True, help='path to fworker file') parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = AiiDAFWorker.from_file(args.fworker_file) # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as fhandle: total_node_list = [ line.strip() for line in fhandle.readlines() ] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl, pdb_on_exception=args.pdb)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser('multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument('--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument('--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
from fireworks.core.launchpad import LaunchPad from fireworks.core.firework import Firework, Workflow from mpworks.firetasks.controller_tasks import AddEStructureTask from fireworks.utilities.fw_utilities import get_slug from mpworks.snl_utils.snl_mongo import SNLMongoAdapter from pymongo import MongoClient from collections import Counter from datetime import datetime from fnmatch import fnmatch from custodian.vasp.handlers import VaspErrorHandler cwd = os.getcwd() # DONE manually: "mp-987" -> fw_id: 119629 lpdb = LaunchPad.from_file( '/global/homes/m/matcomp/mp_prod/config/config_Mendel/my_launchpad.yaml') spec = { 'task_type': 'Controller: add Electronic Structure v2', '_priority': 100000 } sma = SNLMongoAdapter.from_file( '/global/homes/m/matcomp/mp_prod/config/dbs/snl_db.yaml') with open('/global/homes/m/matcomp/mp_prod/materials_db_prod.yaml') as f: creds = yaml.load(f) client = MongoClient(creds['host'], creds['port']) db = client[creds['db']] db.authenticate(creds['username'], creds['password']) materials = db['materials'] tasks = db['tasks'] print materials.count()
from atomate.qchem.workflows.base.FF_then_fragment import get_wf_FF_then_fragment from fireworks.core.launchpad import LaunchPad from pymatgen.core import Molecule mol = Molecule.from_file("BF4-.xyz") wf = get_wf_FF_then_fragment(molecule=mol, max_cores=32) lp = LaunchPad.auto_load() lp.add_wf(wf)
def auto_load(cls): spsma = SPSubmissionsMongoAdapter.auto_load() lp = LaunchPad.auto_load() return SPSubmissionProcessor(spsma, lp)
def run(self, pdb_on_exception=False): """ Run the rocket (check out a job from the database and execute it) Args: pdb_on_exception (bool): whether to invoke the debugger on a caught exception. Default False. """ all_stored_data = {} # combined stored data for *all* the Tasks all_update_spec = {} # combined update_spec for *all* the Tasks all_mod_spec = [] # combined mod_spec for *all* the Tasks lp = self.launchpad launch_dir = os.path.abspath(os.getcwd()) logdir = lp.get_logdir() if lp else None l_logger = get_fw_logger('rocket.launcher', l_dir=logdir, stream_level=ROCKET_STREAM_LOGLEVEL) # check a FW job out of the launchpad if lp: m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id) else: # offline mode m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json")) # set the run start time fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['started_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) launch_id = None # we don't need this in offline mode... if not m_fw: print("No FireWorks are ready to run and match query! {}".format(self.fworker.query)) return False final_state = None ping_stop = None btask_stops = [] try: if '_launch_dir' in m_fw.spec and lp: prev_dir = launch_dir launch_dir = os.path.expandvars(m_fw.spec['_launch_dir']) if not os.path.abspath(launch_dir): launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir)) # thread-safe "mkdir -p" try: os.makedirs(launch_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise os.chdir(launch_dir) if not os.path.samefile(launch_dir, prev_dir): lp.change_launch_dir(launch_id, launch_dir) if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS: try: os.rmdir(prev_dir) except: pass recovery = m_fw.spec.get('_recovery', None) if recovery: recovery_dir = recovery.get('_prev_dir') recovery_mode = recovery.get('_mode') starting_task = recovery.get('_task_n') all_stored_data.update(recovery.get('_all_stored_data')) all_update_spec.update(recovery.get('_all_update_spec')) all_mod_spec.extend(recovery.get('_all_mod_spec')) if lp: l_logger.log( logging.INFO, 'Recovering from task number {} in folder {}.'.format(starting_task, recovery_dir)) if recovery_mode == 'cp' and launch_dir != recovery_dir: if lp: l_logger.log( logging.INFO, 'Copying data from recovery folder {} to folder {}.'.format(recovery_dir, launch_dir)) distutils.dir_util.copy_tree(recovery_dir, launch_dir, update=1) else: starting_task = 0 files_in = m_fw.spec.get("_files_in", {}) prev_files = m_fw.spec.get("_files_prev", {}) for f in set(files_in.keys()).intersection(prev_files.keys()): # We use zopen for the file objects for transparent handling # of zipped files. shutil.copyfileobj does the actual copy # in chunks that avoid memory issues. with zopen(prev_files[f], "rb") as fin, zopen(files_in[f], "wb") as fout: shutil.copyfileobj(fin, fout) if lp: message = 'RUNNING fw_id: {} in directory: {}'.\ format(m_fw.fw_id, os.getcwd()) l_logger.log(logging.INFO, message) # write FW.json and/or FW.yaml to the directory if PRINT_FW_JSON: m_fw.to_file('FW.json', indent=4) if PRINT_FW_YAML: m_fw.to_file('FW.yaml') my_spec = dict(m_fw.spec) # make a copy of spec, don't override original my_spec["_fw_env"] = self.fworker.env # set up heartbeat (pinging the server that we're still alive) ping_stop = start_ping_launch(lp, launch_id) # start background tasks if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: btask_stops.append(start_background_task(bt, m_fw.spec)) # execute the Firetasks! for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task): checkpoint = {'_task_n': t_counter, '_all_stored_data': all_stored_data, '_all_update_spec': all_update_spec, '_all_mod_spec': all_mod_spec} Rocket.update_checkpoint(lp, launch_dir, launch_id, checkpoint) if lp: l_logger.log(logging.INFO, "Task started: %s." % t.fw_name) if my_spec.get("_add_launchpad_and_fw_id"): t.fw_id = m_fw.fw_id if FWData().MULTIPROCESSING: # hack because AutoProxy manager can't access attributes t.launchpad = LaunchPad.from_dict(self.launchpad.to_dict()) else: t.launchpad = self.launchpad if my_spec.get("_add_fworker"): t.fworker = self.fworker try: m_action = t.run_task(my_spec) except BaseException as e: traceback.print_exc() tb = traceback.format_exc() stop_backgrounds(ping_stop, btask_stops) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # If the exception is serializable, save its details if pdb_on_exception: pdb.post_mortem() try: exception_details = e.to_dict() except AttributeError: exception_details = None except BaseException as e: if lp: l_logger.log(logging.WARNING, "Exception couldn't be serialized: %s " % e) exception_details = None try: m_task = t.to_dict() except: m_task = None m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': m_task, '_exception': {'_stacktrace': tb, '_details': exception_details}}, exit=True) m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'FIZZLED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True # read in a FWAction from a file, in case the task is not Python and cannot return # it explicitly if os.path.exists('FWAction.json'): m_action = FWAction.from_file('FWAction.json') elif os.path.exists('FWAction.yaml'): m_action = FWAction.from_file('FWAction.yaml') if not m_action: m_action = FWAction() # update the global stored data with the data to store and update from this # particular Task all_stored_data.update(m_action.stored_data) all_update_spec.update(m_action.update_spec) all_mod_spec.extend(m_action.mod_spec) # update spec for next task as well my_spec.update(m_action.update_spec) for mod in m_action.mod_spec: apply_mod(mod, my_spec) if lp: l_logger.log(logging.INFO, "Task completed: %s " % t.fw_name) if m_action.skip_remaining_tasks: break # add job packing info if this is needed if FWData().MULTIPROCESSING and STORE_PACKING_INFO: all_stored_data['multiprocess_name'] = multiprocessing.current_process().name # perform finishing operation stop_backgrounds(ping_stop, btask_stops) for b in btask_stops: b.set() do_ping(lp, launch_id) # one last ping, esp if there is a monitor # last background monitors if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: if bt.run_on_finish: for task in bt.tasks: task.run_task(m_fw.spec) m_action.stored_data = all_stored_data m_action.mod_spec = all_mod_spec m_action.update_spec = all_update_spec m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'COMPLETED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'COMPLETED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} reached final state {} but couldn't complete the update of " "the database. Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True except: # problems while processing the results. high probability of malformed data. traceback.print_exc() stop_backgrounds(ping_stop, btask_stops) # restore initial state to prevent the raise of further exceptions if lp: lp.restore_backup_data(launch_id, m_fw.fw_id) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # the action produced by the task is discarded m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None, '_exception': {'_stacktrace': traceback.format_exc(), '_details': None}}, exit=True) try: m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) except: traceback.print_exc() if lp: try: lp.complete_launch(launch_id, m_action, 'FIZZLED') except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} fizzled but couldn't complete the update of the database." " Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True
def run_task(self, fw_spec): # Get the fw_id and launchpad if '_add_launchpad_and_fw_id' in fw_spec: lp = self.launchpad fw_id = self.fw_id else: try: fw_dict = loadfn('FW.json') except IOError: try: fw_dict = loadfn('FW.yaml') except IOError: raise RuntimeError("Launchpad/fw_id not present in spec and No FW.json nor FW.yaml file present: " "impossible to determine fw_id") lp = LaunchPad.auto_load() fw_id = fw_dict['fw_id'] # Treat the case where there was some error that led to a fizzled state if '_fizzled_parents' in fw_spec: if len(fw_spec['_fizzled_parents']) != 1: raise ValueError('CheckTask\'s Firework should have exactly one parent firework') # Get the fizzled fw fizzled_fw_id = fw_spec['_fizzled_parents'][0]['fw_id'] fizzled_fw = lp.get_fw_by_id(fizzled_fw_id) # Sort handlers by their priority sorted_handlers = sorted([h for h in self.handlers if h.allow_fizzled], key=lambda x: x.handler_priority) # Get the corrections for all the handlers corrections = [] for handler in sorted_handlers: # Set needed data for the handlers (the spec of this check task/fw and the fw that has to be checked) handler.src_setup(fw_spec=fw_spec, fw_to_check=fizzled_fw) if handler.check(): corrections.append(handler.correct()) if handler.skip_remaining_handlers: break # In case of a fizzled parent, at least one correction is needed ! if len(corrections) == 0: raise RuntimeError('No corrections found for fizzled firework ...') # Apply the corrections fw_action = self.apply_corrections(fw_to_correct=fizzled_fw, corrections=corrections) return fw_action # Treat the case where there was no fizzled parents => forward "needed" outputs of the previous firework to the # next one. else: # Get the previous fw this_lzy_wf = lp.get_wf_by_fw_id_lzyfw(fw_id) parents_fw_ids = this_lzy_wf.links.parent_links[fw_id] if len(parents_fw_ids) != 1: raise ValueError('CheckTask\'s Firework should have exactly one parent firework') run_fw = lp.get_fw_by_id(parents_fw_ids[0]) # Get the corrections for all the handlers # Sort handlers by their priority if self.handlers is not None: sorted_handlers = sorted([h for h in self.handlers if h.allow_completed], key=lambda x: x.handler_priority) else: sorted_handlers = [] # Get the corrections for all the handlers corrections = [] for handler in sorted_handlers: # Set needed data for the handlers (the spec of this check task/fw and the fw that has to be checked) handler.src_setup(fw_spec=fw_spec, fw_to_check=run_fw) if handler.check(): corrections.append(handler.correct()) if handler.skip_remaining_handlers: break # If some corrections are found, apply and return the FWAction if len(corrections) > 0: fw_action = self.apply_corrections(fw_to_correct=run_fw, corrections=corrections) return fw_action # Validate the results if no error was found validators = self.validators if self.validators is not None else [] for validator in validators: if not validator.check(): raise RuntimeError('Validator invalidate results ...') stored_data = {} update_spec = {} mod_spec = [] for task_type, task_info in fw_spec['previous_fws'].items(): mod_spec.append({'_push_all': {'previous_fws->'+task_type: task_info}}) return FWAction(stored_data=stored_data, update_spec=update_spec, mod_spec=mod_spec)
def add_to_db(self, lpad=None): if not lpad: lpad = LaunchPad.auto_load() return lpad.add_wf(self.wf)
def add_to_db(self): from fireworks.core.launchpad import LaunchPad launchpad_file = os.path.join(os.environ['FW_CONFIG_DIR'], 'my_launchpad.yaml') lp = LaunchPad.from_file(launchpad_file) lp.add_wf(self.wf)
def auto_load(cls): sma = SubmissionMongoAdapterEG.auto_load() lp = LaunchPad.auto_load() return SubmissionProcessorEG(sma, lp)
def random_launch(lp_creds): lp = LaunchPad.from_dict(lp_creds) while lp.run_exists(None): launch_rocket(lp) time.sleep(random.random()/3+0.1)
def launch_workflow( self, launchpad_dir="", k_product=50, job=None, user_incar_settings=None, potcar_functional="PBE", additional_handlers=[], ): """ Creates a list of Fireworks. Each Firework represents calculations that will be done on a slab system of a compound in a specific orientation. Each Firework contains a oriented unit cell relaxation job and a WriteSlabVaspInputs which creates os. Firework(s) depending on whether or not Termination=True. Vasp outputs from all slab and oriented unit cell calculations will then be inserted into a database. Args: launchpad_dir (str path): The path to my_launchpad.yaml. Defaults to the current working directory containing your runs k_product: kpts[0][0]*a. Decide k density without kpoint0, default to 50 cwd: (str path): The curent working directory. Location of where you want your vasp outputs to be. job (VaspJob): The command (cmd) entered into VaspJob object. Default is specifically set for running vasp jobs on Carver at NERSC (use aprun for Hopper or Edison). user_incar_settings(dict): A dict specifying additional incar settings, default to None (ediff_per_atom=False) potcar_functional (str): default to PBE """ launchpad = LaunchPad.from_file(os.path.join(os.environ["HOME"], launchpad_dir, "my_launchpad.yaml")) if self.reset: launchpad.reset("", require_password=False) # Scratch directory reffered to by custodian. # May be different on non-Nersc systems. if not job: job = VaspJob(["mpirun", "-n", "64", "vasp"], auto_npar=False, copy_magmom=True) handlers = [ VaspErrorHandler(), NonConvergingErrorHandler(), UnconvergedErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(timeout=3600), ] if additional_handlers: handlers.extend(additional_handlers) cust_params = { "custodian_params": {"scratch_dir": os.path.join("/global/scratch2/sd/", os.environ["USER"])}, "jobs": job.double_relaxation_run(job.vasp_cmd, auto_npar=False), "handlers": handlers, "max_errors": 100, } # will return a list of jobs # instead of just being one job fws = [] for key in self.miller_dict.keys(): # Enumerate through all compounds in the dictionary, # the key is the compositional formula of the compound print key for miller_index in self.miller_dict[key]: # Enumerates through all miller indices we # want to create slabs of that compound from print str(miller_index) max_norm = max(miller_index) if self.max_normal_search else None # Whether or not we want to use the # max_normal_search algorithm from surface.py print "true or false max norm is ", max_norm, self.max_normal_search slab = SlabGenerator( self.unit_cells_dict[key][0], miller_index, self.ssize, self.vsize, max_normal_search=max_norm ) oriented_uc = slab.oriented_unit_cell if self.fail_safe and len(oriented_uc) > 199: break # This method only creates the oriented unit cell, the # slabs are created in the WriteSlabVaspInputs task. # WriteSlabVaspInputs will create the slabs from # the contcar of the oriented unit cell calculation handler = [] tasks = [] folderbulk = "/%s_%s_k%s_s%sv%s_%s%s%s" % ( oriented_uc.composition.reduced_formula, "bulk", k_product, self.ssize, self.vsize, str(miller_index[0]), str(miller_index[1]), str(miller_index[2]), ) cwd = os.getcwd() if self.get_bulk_e: tasks.extend( [ WriteUCVaspInputs( oriented_ucell=oriented_uc, folder=folderbulk, cwd=cwd, user_incar_settings=user_incar_settings, potcar_functional=potcar_functional, k_product=k_product, ), RunCustodianTask(dir=folderbulk, cwd=cwd, **cust_params), VaspSlabDBInsertTask( struct_type="oriented_unit_cell", loc=folderbulk, cwd=cwd, miller_index=miller_index, **self.vaspdbinsert_params ), ] ) # Slab will inherit average final magnetic moment # of the bulk from outcar, will have to generalize # this for systems with different elements later # element = oriented_uc.species[0] # out = Outcar(cwd+folderbulk) # out_mag = out.magnetization # tot_mag = [mag['tot'] for mag in out_mag] # magmom = np.mean(tot_mag) # user_incar_settings['MAGMOM'] = {element: magmom} tasks.append( WriteSlabVaspInputs( folder=folderbulk, cwd=cwd, user_incar_settings=user_incar_settings, terminations=self.terminations, custodian_params=cust_params, vaspdbinsert_parameters=self.vaspdbinsert_params, potcar_functional=potcar_functional, k_product=k_product, miller_index=miller_index, min_slab_size=self.ssize, min_vacuum_size=self.vsize, ucell=self.unit_cells_dict[key][0], ) ) fw = Firework(tasks, name=folderbulk) fws.append(fw) wf = Workflow(fws, name="Surface Calculations") launchpad.add_wf(wf)
def fix(): # initialize databases module_dir = os.path.dirname(os.path.abspath(__file__)) snl_f = os.path.join(module_dir, 'snl.yaml') snldb = SNLMongoAdapter.from_file(snl_f) snl = snldb.snl snlgroups = snldb.snlgroups tasks_f = os.path.join(module_dir, 'tasks.yaml') with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) tasks = db['tasks'] tasks_f = os.path.join(module_dir, 'tasks.yaml') with open(tasks_f) as f2: task_creds = yaml.load(f2) mc = MongoClient(task_creds['host'], task_creds['port']) db = mc[task_creds['database']] db.authenticate(task_creds['admin_user'], task_creds['admin_password']) tasks = db['tasks'] lp_f = os.path.join(module_dir, 'my_launchpad.yaml') lpdb = LaunchPad.from_file(lp_f) fws = lpdb.fireworks launches = lpdb.launches sb_f = os.path.join(module_dir, 'submission.yaml') sbdb = SubmissionMongoAdapter.from_file(sb_f) submissions = sbdb.jobs bad_crystal_ids = [] crystals_file = os.path.join(module_dir, 'bad_crystals.txt') with open(crystals_file) as f: for line in f: bad_crystal_ids.append(int(line.strip())) for c_id in bad_crystal_ids: if c_id == 100892 or c_id == 100202: print 'SKIP' else: # FIX SNL for s in snl.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snl_id': 1}): snl.update({'snl_id': s['snl_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX SNLGROUPS for s in snlgroups.find({'canonical_snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'snlgroup_id': 1}): snlgroups.update({'snlgroup_id': s['snlgroup_id']}, {'$pushAll': {"canonical_snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX FWs pt 1 for s in fws.find({'spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}): fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX FWs pt 2 for s in fws.find({'spec.force_mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'fw_id': 1}): fws.update({'fw_id': s['fw_id']}, {'$pushAll': {"spec.force_mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX Launches for s in launches.find({'action.update_spec.mpsnl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'launch_id': 1}): launches.update({'launch_id': s['launch_id']}, {'$pushAll': {"action.update_spec.mpsnl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX TASKS for s in tasks.find({'snl.about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'task_id': 1}): tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) tasks.update({'task_id': s['task_id']}, {'$pushAll': {"snl_final.about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) # FIX SUBMISSIONS for s in submissions.find({'about._materialsproject.deprecated.crystal_id_deprecated': c_id}, {'submission_id': 1}): submissions.update({'submission_id': s['submission_id']}, {'$pushAll': {"about.remarks": ['DEPRECATED', 'SEVERE BUG IN ICSD CONVERSION']}}) print 'FIXED', c_id
from flask import Flask, render_template, request, jsonify from flask import redirect, url_for, abort from fireworks import Firework from fireworks.utilities.fw_serializers import DATETIME_HANDLER from pymongo import DESCENDING import os, json from fireworks.core.launchpad import LaunchPad from flask.ext.paginate import Pagination app = Flask(__name__) app.use_reloader=True hello = __name__ lp = LaunchPad.from_dict(json.loads(os.environ["FWDB_CONFIG"])) PER_PAGE = 20 STATES = Firework.STATE_RANKS.keys() @app.template_filter('datetime') def datetime(value): import datetime as dt date = dt.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%f') return date.strftime('%m/%d/%Y') @app.template_filter('pluralize') def pluralize(number, singular='', plural='s'): if number == 1: return singular else: return plural
def run(self, pdb_on_exception=False): """ Run the rocket (check out a job from the database and execute it) Args: pdb_on_exception (bool): whether to invoke the debugger on a caught exception. Default False. """ all_stored_data = {} # combined stored data for *all* the Tasks all_update_spec = {} # combined update_spec for *all* the Tasks all_mod_spec = [] # combined mod_spec for *all* the Tasks lp = self.launchpad launch_dir = os.path.abspath(os.getcwd()) logdir = lp.get_logdir() if lp else None l_logger = get_fw_logger('rocket.launcher', l_dir=logdir, stream_level=ROCKET_STREAM_LOGLEVEL) # check a FW job out of the launchpad if lp: m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id) else: # offline mode m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json")) # set the run start time fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['started_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) launch_id = None # we don't need this in offline mode... if not m_fw: print("No FireWorks are ready to run and match query! {}".format(self.fworker.query)) return False final_state = None ping_stop = None btask_stops = [] try: if '_launch_dir' in m_fw.spec and lp: prev_dir = launch_dir launch_dir = os.path.expandvars(m_fw.spec['_launch_dir']) if not os.path.abspath(launch_dir): launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir)) # thread-safe "mkdir -p" try: os.makedirs(launch_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise os.chdir(launch_dir) if not os.path.samefile(launch_dir, prev_dir): lp.change_launch_dir(launch_id, launch_dir) if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS: try: os.rmdir(prev_dir) except Exception: pass recovery = m_fw.spec.get('_recovery', None) if recovery: recovery_dir = recovery.get('_prev_dir') recovery_mode = recovery.get('_mode') starting_task = recovery.get('_task_n') all_stored_data.update(recovery.get('_all_stored_data')) all_update_spec.update(recovery.get('_all_update_spec')) all_mod_spec.extend(recovery.get('_all_mod_spec')) if lp: l_logger.log( logging.INFO, 'Recovering from task number {} in folder {}.'.format(starting_task, recovery_dir)) if recovery_mode == 'cp' and launch_dir != recovery_dir: if lp: l_logger.log( logging.INFO, 'Copying data from recovery folder {} to folder {}.'.format(recovery_dir, launch_dir)) distutils.dir_util.copy_tree(recovery_dir, launch_dir, update=1) else: starting_task = 0 files_in = m_fw.spec.get("_files_in", {}) prev_files = m_fw.spec.get("_files_prev", {}) for f in set(files_in.keys()).intersection(prev_files.keys()): # We use zopen for the file objects for transparent handling # of zipped files. shutil.copyfileobj does the actual copy # in chunks that avoid memory issues. with zopen(prev_files[f], "rb") as fin, zopen(files_in[f], "wb") as fout: shutil.copyfileobj(fin, fout) if lp: message = 'RUNNING fw_id: {} in directory: {}'. \ format(m_fw.fw_id, os.getcwd()) l_logger.log(logging.INFO, message) # write FW.json and/or FW.yaml to the directory if PRINT_FW_JSON: m_fw.to_file('FW.json', indent=4) if PRINT_FW_YAML: m_fw.to_file('FW.yaml') my_spec = dict(m_fw.spec) # make a copy of spec, don't override original my_spec["_fw_env"] = self.fworker.env # set up heartbeat (pinging the server that we're still alive) ping_stop = start_ping_launch(lp, launch_id) # start background tasks if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: btask_stops.append(start_background_task(bt, m_fw.spec)) # execute the Firetasks! for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task): checkpoint = {'_task_n': t_counter, '_all_stored_data': all_stored_data, '_all_update_spec': all_update_spec, '_all_mod_spec': all_mod_spec} Rocket.update_checkpoint(lp, launch_dir, launch_id, checkpoint) if lp: l_logger.log(logging.INFO, "Task started: %s." % t.fw_name) if my_spec.get("_add_launchpad_and_fw_id"): t.fw_id = m_fw.fw_id if FWData().MULTIPROCESSING: # hack because AutoProxy manager can't access attributes t.launchpad = LaunchPad.from_dict(self.launchpad.to_dict()) else: t.launchpad = self.launchpad if my_spec.get("_add_fworker"): t.fworker = self.fworker try: m_action = t.run_task(my_spec) except BaseException as e: traceback.print_exc() tb = traceback.format_exc() stop_backgrounds(ping_stop, btask_stops) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # If the exception is serializable, save its details if pdb_on_exception: pdb.post_mortem() try: exception_details = e.to_dict() except AttributeError: exception_details = None except BaseException as e: if lp: l_logger.log(logging.WARNING, "Exception couldn't be serialized: %s " % e) exception_details = None try: m_task = t.to_dict() except Exception: m_task = None m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': m_task, '_exception': {'_stacktrace': tb, '_details': exception_details}}, exit=True) m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'FIZZLED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True # read in a FWAction from a file, in case the task is not Python and cannot return # it explicitly if os.path.exists('FWAction.json'): m_action = FWAction.from_file('FWAction.json') elif os.path.exists('FWAction.yaml'): m_action = FWAction.from_file('FWAction.yaml') if not m_action: m_action = FWAction() # update the global stored data with the data to store and update from this # particular Task all_stored_data.update(m_action.stored_data) all_update_spec.update(m_action.update_spec) all_mod_spec.extend(m_action.mod_spec) # update spec for next task as well my_spec.update(m_action.update_spec) for mod in m_action.mod_spec: apply_mod(mod, my_spec) if lp: l_logger.log(logging.INFO, "Task completed: %s " % t.fw_name) if m_action.skip_remaining_tasks: break # add job packing info if this is needed if FWData().MULTIPROCESSING and STORE_PACKING_INFO: all_stored_data['multiprocess_name'] = multiprocessing.current_process().name # perform finishing operation stop_backgrounds(ping_stop, btask_stops) for b in btask_stops: b.set() do_ping(lp, launch_id) # one last ping, esp if there is a monitor # last background monitors if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: if bt.run_on_finish: for task in bt.tasks: task.run_task(m_fw.spec) m_action.stored_data = all_stored_data m_action.mod_spec = all_mod_spec m_action.update_spec = all_update_spec m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'COMPLETED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'COMPLETED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} reached final state {} but couldn't complete the update of " "the database. Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True except Exception: # problems while processing the results. high probability of malformed data. traceback.print_exc() stop_backgrounds(ping_stop, btask_stops) # restore initial state to prevent the raise of further exceptions if lp: lp.restore_backup_data(launch_id, m_fw.fw_id) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # the action produced by the task is discarded m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None, '_exception': {'_stacktrace': traceback.format_exc(), '_details': None}}, exit=True) try: m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) except Exception: traceback.print_exc() if lp: try: lp.complete_launch(launch_id, m_action, 'FIZZLED') except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} fizzled but couldn't complete the update of the database." " Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True