def test_has_inserted(self): self.lp.add_wf(self.wf_stop_early) rapidfire( self.lp, fworker=FWorker( env={ "db_file": os.path.join(db_dir, "db.json"), "vasp_cmd": ["echo", "fake"], } ), ) formula = self.get_task_collection(coll_name="tasks").distinct("formula_pretty") self.assertEqual(set(formula), {"Y2Mg(PO4)2", "YPO4"}) self.lp.add_wf(self.wf) rapidfire( self.lp, fworker=FWorker( env={ "db_file": os.path.join(db_dir, "db.json"), "vasp_cmd": ["echo", "fake"], } ), ) # Check that all of the inserted pretty formulas are present formula = self.get_task_collection(coll_name="tasks").distinct("formula_pretty") self.assertEqual( set(formula), {"Y2Mg(PO4)2", "Y2Mg3(PO4)2", "YMg2PO4", "YMgPO4", "YPO4"} )
def test_category_pt2(self): task1 = ScriptTask.from_str('echo "Task 1"') task2 = ScriptTask.from_str('echo "Task 2"') fw1 = Firework(task1, fw_id=1, name='Task 1') fw2 = Firework(task2, fw_id=2, name='Task 2') self.lp.add_wf(Workflow([fw1, fw2])) self.assertFalse(self.lp.run_exists(FWorker(category="dummy_category"))) self.assertTrue(self.lp.run_exists(FWorker(category="__none__"))) self.assertTrue(self.lp.run_exists(FWorker())) # can run any category self.assertFalse(self.lp.run_exists(FWorker(category=["dummy_category", "other category"])))
def test_wf(self): wf_1 = get_simulated_wf(self.wf_1) wf_2 = get_simulated_wf(self.wf_2) wf_3 = get_simulated_wf(self.wf_3) wf_4 = get_simulated_wf(self.wf_4) wf_5 = get_simulated_wf(self.wf_5) wf_6 = get_simulated_wf(self.wf_6) wf_1_ids = self.lp.add_wf(wf_1) wf_2_ids = self.lp.add_wf(wf_2) wf_3_ids = self.lp.add_wf(wf_3) wf_4_ids = self.lp.add_wf(wf_4) wf_5_ids = self.lp.add_wf(wf_5) wf_6_ids = self.lp.add_wf(wf_6) # get fw ids that can be used to identify the workflows from the DB fw_wf_1 = list(wf_1_ids.values())[0] fw_wf_2 = list(wf_2_ids.values())[0] fw_wf_3 = list(wf_3_ids.values())[0] fw_wf_4 = list(wf_4_ids.values())[0] fw_wf_5 = list(wf_5_ids.values())[0] fw_wf_6 = list(wf_6_ids.values())[0] fw_ids = [fw_wf_1, fw_wf_2, fw_wf_3, fw_wf_4, fw_wf_5, fw_wf_6] # Use scratch directory as destination directory for testing fworker = FWorker(env={"run_dest_root": self.scratch_dir}) rapidfire(self.lp, fworker=fworker) for i in fw_ids: wf = self.lp.get_wf_by_fw_id(i) is_completed = [s == "COMPLETED" for s in wf.fw_states.values()] self.assertTrue(all(is_completed))
def test_xas_wflow_abatom_by_idx(self): if not FEFF_CMD: # fake run xmu_file_path = os.path.abspath( os.path.join(module_dir, "../../test_files/xmu.dat")) feff_bin = "cp {} .".format(xmu_file_path) else: feff_bin = FEFF_CMD wf = get_wf_xas(self.absorbing_atom, self.structure, feff_input_set="XANES", edge="K", feff_cmd=feff_bin, db_file=">>db_file<<", use_primitive=False, user_tag_settings=self.user_tag_settings) self.assertEqual(len(wf.as_dict()["fws"]), 1) self.lp.add_wf(wf) # run rapidfire( self.lp, fworker=FWorker(env={"db_file": os.path.join(db_dir, "db.json")})) d = self.get_task_collection().find_one({"spectrum_type": "XANES"}) self._check_run(d)
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker, queueadapter, args.launch_dir, args.nlaunches, args.maxjobs_queue, args.maxjobs_block, args.sleep, args.reserve, args.loglvl) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False)
def test_eels_wflow_abatom_by_idx(self): # for the sake of test just copy xmu to eels xmu_file_path = os.path.abspath( os.path.join(module_dir, "../../test_files/xmu.dat")) feff_bin = f"cp {xmu_file_path} eels.dat" wf = get_wf_eels( self.absorbing_atom, self.structure, feff_input_set="ELNES", edge="L1", user_tag_settings=self.user_tag_settings, use_primitive=False, feff_cmd=feff_bin, db_file=">>db_file<<", ) self.assertEqual(len(wf.as_dict()["fws"]), 1) self.lp.add_wf(wf) # run rapidfire( self.lp, fworker=FWorker(env={"db_file": os.path.join(db_dir, "db.json")})) d = self.get_task_collection().find_one({"spectrum_type": "ELNES"}) self._check_run(d)
def __init__(self, state, launch_dir, fworker=None, host=None, ip=None, trackers=None, action=None, state_history=None, launch_id=None, fw_id=None): """ Args: state (str): the state of the Launch (e.g. RUNNING, COMPLETED) launch_dir (str): the directory where the Launch takes place fworker (FWorker): The FireWorker running the Launch host (str): the hostname where the launch took place (set automatically if None) ip (str): the IP address where the launch took place (set automatically if None) trackers ([Tracker]): File Trackers for this Launch action (FWAction): the output of the Launch state_history ([dict]): a history of all states of the Launch and when they occurred launch_id (int): launch_id set by the LaunchPad fw_id (int): id of the Firework this Launch is running """ if state not in Firework.STATE_RANKS: raise ValueError("Invalid launch state: {}".format(state)) self.launch_dir = launch_dir self.fworker = fworker or FWorker() self.host = host or get_my_host() self.ip = ip or get_my_ip() self.trackers = trackers if trackers else [] self.action = action if action else None self.state_history = state_history if state_history else [] self.state = state self.launch_id = launch_id self.fw_id = fw_id
def do_launch(args): if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') elif not args.launchpad_file: args.launchpad_file = LAUNCHPAD_LOC if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') elif not args.fworker_file: args.fworker_file = FWORKER_LOC if not args.queueadapter_file and os.path.exists( os.path.join(args.config_dir, 'my_qadapter.yaml')): args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml') elif not args.queueadapter_file: args.queueadapter_file = QUEUEADAPTER_LOC launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) fworker = FWorker.from_file( args.fworker_file) if args.fworker_file else FWorker() queueadapter = load_object_from_file(args.queueadapter_file) args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir, nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue, njobs_block=args.maxjobs_block, sleep_time=args.sleep, reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode) else: launch_rocket_to_queue(launchpad, fworker, queueadapter, args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode, args.fw_id)
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the central database and ' \ 'runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser('rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; default 0 is all jobs in DB)', default=0) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exist on ^C if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker, None, args.nlaunches, -1, args.sleep, args.loglvl) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def setUpClass(cls): cls.lp = None cls.fworker = FWorker() try: cls.lp = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR') cls.lp.reset(password=None, require_password=False) except: raise unittest.SkipTest('MongoDB is not running in localhost:27017! Skipping tests.')
def get_fworker(fworker): if fworker: my_fwkr = fworker elif FWORKER_LOC: my_fwkr = FWorker.from_file(FWORKER_LOC) else: my_fwkr = FWorker() return my_fwkr
def rapidfire(launchpad, fworker=None, m_dir=None, nlaunches=0, max_loops=-1, sleep_time=None, strm_lvl='INFO'): """ Keeps running Rockets in m_dir until we reach an error. Automatically creates subdirectories for each Rocket. Usually stops when we run out of FireWorks from the LaunchPad. :param launchpad: (LaunchPad) :param fworker: (FWorker object) :param m_dir: (str) the directory in which to loop Rocket running :param nlaunches: (int) 0 means 'until completion', -1 or "infinite" means to loop forever :param max_loops: (int) maximum number of loops :param sleep_time: (int) secs to sleep between rapidfire loop iterations :param strm_lvl: (str) level at which to output logs to stdout """ sleep_time = sleep_time if sleep_time else RAPIDFIRE_SLEEP_SECS curdir = m_dir if m_dir else os.getcwd() l_logger = get_fw_logger('rocket.launcher', l_dir=launchpad.get_logdir(), stream_level=strm_lvl) nlaunches = -1 if nlaunches == 'infinite' else int(nlaunches) fworker = fworker if fworker else FWorker() num_launched = 0 num_loops = 0 while num_loops != max_loops: while launchpad.run_exists(fworker): os.chdir(curdir) launcher_dir = create_datestamp_dir(curdir, l_logger, prefix='launcher_') os.chdir(launcher_dir) rocket_ran = launch_rocket(launchpad, fworker, strm_lvl=strm_lvl) if rocket_ran: num_launched += 1 elif not os.listdir(launcher_dir): # remove the empty shell of a directory os.chdir(curdir) os.rmdir(launcher_dir) if num_launched == nlaunches: break time.sleep( 0.15 ) # add a small amount of buffer breathing time for DB to refresh, etc. if num_launched == nlaunches or nlaunches == 0: break log_multi(l_logger, 'Sleeping for {} secs'.format(sleep_time)) time.sleep(sleep_time) num_loops += 1 log_multi(l_logger, 'Checking for FWs to run...'.format(sleep_time))
def rapidfire(launchpad, fworker=None, m_dir=None, nlaunches=0, max_loops=-1, sleep_time=None, strm_lvl='INFO', timeout=None): """ Keeps running Rockets in m_dir until we reach an error. Automatically creates subdirectories for each Rocket. Usually stops when we run out of FireWorks from the LaunchPad. :param launchpad: (LaunchPad) :param fworker: (FWorker object) :param m_dir: (str) the directory in which to loop Rocket running :param nlaunches: (int) 0 means 'until completion', -1 or "infinite" means to loop until max_loops :param max_loops: (int) maximum number of loops (default -1 is infinite) :param sleep_time: (int) secs to sleep between rapidfire loop iterations :param strm_lvl: (str) level at which to output logs to stdout :param timeout: (int) # of seconds after which to stop the rapidfire process """ sleep_time = sleep_time if sleep_time else RAPIDFIRE_SLEEP_SECS curdir = m_dir if m_dir else os.getcwd() l_logger = get_fw_logger('rocket.launcher', l_dir=launchpad.get_logdir(), stream_level=strm_lvl) nlaunches = -1 if nlaunches == 'infinite' else int(nlaunches) fworker = fworker if fworker else FWorker() num_launched = 0 start_time = datetime.now() num_loops = 0 while num_loops != max_loops and (not timeout or (datetime.now() - start_time).total_seconds() < timeout): skip_check = False # this is used to speed operation while (skip_check or launchpad.run_exists(fworker)) and \ (not timeout or (datetime.now() - start_time).total_seconds() < timeout): os.chdir(curdir) launcher_dir = create_datestamp_dir(curdir, l_logger, prefix='launcher_') os.chdir(launcher_dir) rocket_ran = launch_rocket(launchpad, fworker, strm_lvl=strm_lvl) if rocket_ran: num_launched += 1 elif not os.listdir(launcher_dir): # remove the empty shell of a directory os.chdir(curdir) os.rmdir(launcher_dir) if num_launched == nlaunches: break if launchpad.run_exists(fworker): skip_check = True # don't wait, pull the next FW right away else: time.sleep(0.15) # add a small amount of buffer breathing time for DB to refresh in case we have a dynamic WF skip_check = False if num_launched == nlaunches or nlaunches == 0: break log_multi(l_logger, 'Sleeping for {} secs'.format(sleep_time)) time.sleep(sleep_time) num_loops += 1 log_multi(l_logger, 'Checking for FWs to run...'.format(sleep_time)) os.chdir(curdir)
def test_fw_env(self): t = DummyFWEnvTask() fw = Firework(t) self.lp.add_wf(fw) launch_rocket(self.lp, self.fworker) self.assertEqual(self.lp.get_launch_by_id(1).action.stored_data['data'], "hello") self.lp.add_wf(fw) launch_rocket(self.lp, FWorker(env={"hello": "world"})) self.assertEqual(self.lp.get_launch_by_id(2).action.stored_data[ 'data'], "world")
def setup_fireworks(cls): """ Sets up the fworker and launchpad if a connection to a local mongodb is available. cls.lp is set to None if not available """ cls.fworker = FWorker() try: cls.lp = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR') cls.lp.reset(password=None, require_password=False) except: cls.lp = None
def basic_fw_ex(): print("--- BASIC FIREWORK EXAMPLE ---") # setup launchpad = setup() # add Firework firetask = ScriptTask.from_str('echo "howdy, your job launched successfully!"') firework = Firework(firetask) launchpad.add_wf(firework) # launch Rocket launch_rocket(launchpad, FWorker())
def mlaunch(): m_description = 'This program launches multiple Rockets simultaneously' parser = ArgumentParser(description=m_description) parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0) parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int) parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str) parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int) args = parser.parse_args() if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn)
def rapidfire(launchpad, fworker=None, m_dir=None, logdir=None, strm_lvl=None, nlaunches=0, sleep_time=60, max_loops=-1): """ Keeps running Rockets in m_dir until we reach an error. Automatically creates subdirectories for each Rocket. Usually stops when we run out of FireWorks from the LaunchPad. :param launchpad: a LaunchPad object :param fworker: a FWorker object :param m_dir: the directory in which to loop Rocket running :param nlaunches: 0 means 'until completion', -1 means 'infinity' """ curdir = m_dir if m_dir else os.getcwd() fworker = fworker if fworker else FWorker() # initialize logger l_logger = get_fw_logger('rocket.launcher', l_dir=logdir, stream_level=strm_lvl) nlaunches = -1 if nlaunches == 'infinite' else int(nlaunches) # TODO: wrap in try-except. Use log_exception for exceptions EXCEPT running out of jobs. # TODO: always chdir() back to curdir when finished...then delete cruft from MongoTests num_launched = 0 num_loops = 0 while num_loops != max_loops: while launchpad.run_exists(): os.chdir(curdir) launcher_dir = create_datestamp_dir(curdir, l_logger, prefix='launcher_') os.chdir(launcher_dir) launch_rocket(launchpad, fworker, logdir, strm_lvl) num_launched += 1 if num_launched == nlaunches: break time.sleep( 0.1 ) # add a small amount of buffer breathing time for DB to refresh, etc. if num_launched == nlaunches or nlaunches == 0: break l_logger.info('Sleeping for {} secs'.format(sleep_time)) time.sleep(sleep_time) num_loops += 1 l_logger.info('Checking for FWs to run...'.format(sleep_time))
def multiple_tasks_ex(): print("--- MULTIPLE FIRETASKS EXAMPLE ---") # setup launchpad = setup() # add FireWorks firetask1 = ScriptTask.from_str('echo "This is TASK #1"') firetask2 = ScriptTask.from_str('echo "This is TASK #2"') firetask3 = ScriptTask.from_str('echo "This is TASK #3"') fw = Firework([firetask1, firetask2, firetask3]) launchpad.add_wf(fw) # launch Rocket rapidfire(launchpad, FWorker())
def launch_rocket(launchpad, fworker=None, fw_id=None, strm_lvl='INFO'): """ Run a single rocket in the current directory :param launchpad: (LaunchPad) :param fworker: (FWorker) :param fw_id: (int) if set, a particular Firework to run :param strm_lvl: (str) level at which to output logs to stdout """ fworker = fworker if fworker else FWorker() l_dir = launchpad.get_logdir() if launchpad else None l_logger = get_fw_logger('rocket.launcher', l_dir=l_dir, stream_level=strm_lvl) log_multi(l_logger, 'Launching Rocket') rocket = Rocket(launchpad, fworker, fw_id) rocket_ran = rocket.run() log_multi(l_logger, 'Rocket finished') return rocket_ran
def launch_rocket(launchpad, fworker=None, logdir=None, strm_lvl=None, fw_id=None): """ Run a single rocket in the current directory :param launchpad: a LaunchPad object :param fworker: a FWorker object """ fworker = fworker if fworker else FWorker() l_logger = get_fw_logger('rocket.launcher', l_dir=logdir, stream_level=strm_lvl) l_logger.info('Launching Rocket') rocket = Rocket(launchpad, fworker, fw_id) rocket.run() l_logger.info('Rocket finished')
def test_basic_fw_offline(self): test1 = ScriptTask.from_str("python -c 'print(\"test1\")'", {'store_stdout': True}) fw = Firework(test1) self.lp.add_wf(fw) fw, launch_id = self.lp.reserve_fw(FWorker(), os.getcwd()) setup_offline_job(self.lp, fw, launch_id) launch_rocket(None, self.fworker) with open(os.path.join(os.getcwd(), "FW_offline.json")) as f: fwo = json.load(f) self.assertEquals(fwo["state"], "COMPLETED") self.assertEquals(fwo["launch_id"], 1) self.assertEquals( fwo["fwaction"], { 'update_spec': {}, 'mod_spec': [], 'stored_data': { 'returncode': 0, 'stdout': u'test1\n', 'all_returncodes': [0] }, 'exit': False, 'detours': [], 'additions': [], 'defuse_children': False }) with open(os.path.join(os.getcwd(), "FW_ping.json")) as f: fwp = json.load(f) self.assertIsNotNone(fwp["ping_time"]) l = self.lp.offline_runs.find_one( { "completed": False, "deprecated": False }, {"launch_id": 1}) self.lp.recover_offline(l['launch_id']) self.assertEqual( self.lp.get_launch_by_id(1).action.stored_data['stdout'], 'test1\n')
def rapid_fire_ex(): print("--- RAPIDFIRE EXAMPLE ---") # setup launchpad = setup() # add FireWorks firetask = ScriptTask.from_str('echo "howdy, your job launched successfully!"') fw1 = Firework(firetask) launchpad.add_wf(fw1) # re-add multiple times fw2 = Firework(firetask) launchpad.add_wf(fw2) fw3 = Firework(firetask) launchpad.add_wf(fw3) # launch Rocket rapidfire(launchpad, FWorker())
def test_wf(self): self.wf_1 = self._simulate_vasprun(self.wf_1) self.wf_2 = self._simulate_vasprun(self.wf_2) self.wf_3 = self._simulate_vasprun(self.wf_3) self.wf_4 = self._simulate_vasprun(self.wf_4) self.wf_5 = self._simulate_vasprun(self.wf_5) self.wf_6 = self._simulate_vasprun(self.wf_6) self.lp.add_wf(self.wf_1) self.lp.add_wf(self.wf_2) self.lp.add_wf(self.wf_3) self.lp.add_wf(self.wf_4) self.lp.add_wf(self.wf_5) self.lp.add_wf(self.wf_6) # Use scratch directory as destination directory for testing rapidfire(self.lp, fworker=FWorker(env={"run_dest_root": self.scratch_dir})) wf = self.lp.get_wf_by_fw_id(1) self.assertTrue(all([s == 'COMPLETED' for s in wf.fw_states.values()]))
def test_multi_fw_complex(self): dest1 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'inputs.txt') dest2 = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'temp_file.txt') self._teardown([dest1, dest2]) try: # create the Firework consisting of multiple tasks firetask1 = TemplateWriterTask({ 'context': { 'opt1': 5.0, 'opt2': 'fast method' }, 'template_file': 'simple_template.txt', 'output_file': dest1 }) firetask2 = FileTransferTask({ 'files': [{ 'src': dest1, 'dest': dest2 }], 'mode': 'copy' }) fw = Firework([firetask1, firetask2]) # store workflow and launch it locally, single shot self.lp.add_wf(fw) launch_rocket(self.lp, FWorker()) # read inputs.txt, words.txt, dest for d in [dest1, dest2]: with open(d) as f: self.assertEqual(f.read(), 'option1 = 5.0\noption2 = fast method') finally: self._teardown([dest1, dest2])
def basic_wf_ex(): print("--- BASIC WORKFLOW EXAMPLE ---") # setup launchpad = setup() # add FireWorks task1 = ScriptTask.from_str('echo "Ingrid is the CEO."') task2 = ScriptTask.from_str('echo "Jill is a manager."') task3 = ScriptTask.from_str('echo "Jack is a manager."') task4 = ScriptTask.from_str('echo "Kip is an intern."') fw1 = Firework(task1, fw_id=1) fw2 = Firework(task2, fw_id=2) fw3 = Firework(task3, fw_id=3) fw4 = Firework(task4, fw_id=4) # make workflow workflow = Workflow([fw1, fw2, fw3, fw4], {1: [2, 3], 2: [4], 3: [4]}) launchpad.add_wf(workflow) # launch Rocket rapidfire(launchpad, FWorker())
def rlaunch(): m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \ 'central database and runs it. The "single-shot" option launches a single Rocket, ' \ 'whereas the "rapidfire" option loops until all FireWorks are completed.' parser = ArgumentParser(description=m_description) subparsers = parser.add_subparsers(help='command', dest='command') single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket') rapid_parser = subparsers.add_parser( 'rapidfire', help='launch multiple Rockets (loop until all FireWorks complete)') multi_parser = subparsers.add_parser( 'multi', help='launches multiple Rockets simultaneously') single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int) single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true') rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; ' 'default 0 is all jobs in DB)', default=0) rapid_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) rapid_parser.add_argument( '--max_loops', help='after this many sleep loops, quit even in ' 'infinite nlaunches mode (default -1 is infinite loops)', default=-1, type=int) rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int) rapid_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int) multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per ' 'parallel job (int or "infinite"; default 0 is ' 'all jobs in DB)', default=0) multi_parser.add_argument( '--sleep', help='sleep time between loops in infinite launch mode' '(secs)', default=None, type=int) multi_parser.add_argument( '--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int) multi_parser.add_argument( '--nodefile', help='nodefile name or environment variable name ' 'containing the node file name (for populating' ' FWData only)', default=None, type=str) multi_parser.add_argument( '--ppn', help='processors per node (for populating FWData only)', default=1, type=int) multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node" "as compute node", action="store_true") multi_parser.add_argument( '--local_redirect', help="Redirect stdout and stderr to the launch directory", action="store_true") parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC) parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC) parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file ' '(used if -l, -w unspecified)', default=CONFIG_FILE_DIR) parser.add_argument('--loglvl', help='level to print log messages', default='INFO') parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true') try: import argcomplete argcomplete.autocomplete(parser) # This supports bash autocompletion. To enable this, pip install # argcomplete, activate global completion, or add # eval "$(register-python-argcomplete rlaunch)" # into your .bash_profile or .bashrc except ImportError: pass args = parser.parse_args() signal.signal(signal.SIGINT, handle_interrupt) # graceful exit on ^C if not args.launchpad_file and os.path.exists( os.path.join(args.config_dir, 'my_launchpad.yaml')): args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml') if not args.fworker_file and os.path.exists( os.path.join(args.config_dir, 'my_fworker.yaml')): args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml') args.loglvl = 'CRITICAL' if args.silencer else args.loglvl if args.command == 'singleshot' and args.offline: launchpad = None else: launchpad = LaunchPad.from_file( args.launchpad_file) if args.launchpad_file else LaunchPad( strm_lvl=args.loglvl) if args.fworker_file: fworker = FWorker.from_file(args.fworker_file) else: fworker = FWorker() # prime addr lookups _log = get_fw_logger("rlaunch", stream_level="INFO") _log.info("Hostname/IP lookup (this will take a few seconds)") get_my_host() get_my_ip() if args.command == 'rapidfire': rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches, max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl, timeout=args.timeout, local_redirect=args.local_redirect) elif args.command == 'multi': total_node_list = None if args.nodefile: if args.nodefile in os.environ: args.nodefile = os.environ[args.nodefile] with open(args.nodefile, 'r') as f: total_node_list = [line.strip() for line in f.readlines()] launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs, args.sleep, total_node_list, args.ppn, timeout=args.timeout, exclude_current_node=args.exclude_current_node, local_redirect=args.local_redirect) else: launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
def launch_rocket_to_queue(launchpad, fworker, qadapter, launcher_dir='.', reserve=False, strm_lvl='INFO', create_launcher_dir=False, fill_mode=False, fw_id=None): """ Submit a single job to the queue. Args: launchpad (LaunchPad) fworker (FWorker) qadapter (QueueAdapterBase) launcher_dir (str): The directory where to submit the job reserve (bool): Whether to queue in reservation mode strm_lvl (str): level at which to stream log messages create_launcher_dir (bool): Whether to create a subfolder launcher+timestamp, if needed fill_mode (bool): whether to submit jobs even when there is nothing to run (only in non-reservation mode) fw_id (int): specific fw_id to reserve (reservation mode only) """ fworker = fworker if fworker else FWorker() launcher_dir = os.path.abspath(launcher_dir) l_logger = get_fw_logger('queue.launcher', l_dir=launchpad.logdir, stream_level=strm_lvl) l_logger.debug('getting queue adapter') qadapter = load_object(qadapter.to_dict( )) # make a defensive copy, mainly for reservation mode fw, launch_id = None, None # only needed in reservation mode if not os.path.exists(launcher_dir): raise ValueError( 'Desired launch directory {} does not exist!'.format(launcher_dir)) if '--offline' in qadapter['rocket_launch'] and not reserve: raise ValueError("Must use reservation mode (-r option) of qlaunch " "when using offline option of rlaunch!!") if reserve and 'singleshot' not in qadapter.get('rocket_launch', ''): raise ValueError( 'Reservation mode of queue launcher only works for singleshot Rocket Launcher!' ) if fill_mode and reserve: raise ValueError( "Fill_mode cannot be used in conjunction with reserve mode!") if fw_id and not reserve: raise ValueError( "qlaunch for specific fireworks may only be used in reservation mode." ) if fill_mode or launchpad.run_exists(fworker): launch_id = None try: if reserve: if fw_id: l_logger.debug('finding a FW to reserve...') fw, launch_id = launchpad.reserve_fw(fworker, launcher_dir, fw_id=fw_id) if not fw: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!' ) return False l_logger.info('reserved FW with fw_id: {}'.format(fw.fw_id)) # update qadapter job_name based on FW name job_name = get_slug(fw.name)[0:QUEUE_JOBNAME_MAXLEN] qadapter.update({'job_name': job_name}) if '_queueadapter' in fw.spec: l_logger.debug( 'updating queue params using Firework spec..') qadapter.update(fw.spec['_queueadapter']) # reservation mode includes --fw_id in rocket launch qadapter['rocket_launch'] += ' --fw_id {}'.format(fw.fw_id) # update launcher_dir if _launch_dir is selected in reserved fw if '_launch_dir' in fw.spec: fw_launch_dir = os.path.expandvars(fw.spec['_launch_dir']) if not os.path.isabs(fw_launch_dir): fw_launch_dir = os.path.join(launcher_dir, fw_launch_dir) launcher_dir = fw_launch_dir makedirs_p(launcher_dir) launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') # move to the launch directory l_logger.info('moving to launch_dir {}'.format(launcher_dir)) with cd(launcher_dir): if '--offline' in qadapter['rocket_launch']: setup_offline_job(launchpad, fw, launch_id) l_logger.debug('writing queue script') with open(SUBMIT_SCRIPT_NAME, 'w') as f: queue_script = qadapter.get_script_str(launcher_dir) f.write(queue_script) l_logger.info('submitting queue script') reservation_id = qadapter.submit_to_queue(SUBMIT_SCRIPT_NAME) if not reservation_id: raise RuntimeError( 'queue script could not be submitted, check queue ' 'script/queue adapter/queue server status!') elif reserve: launchpad.set_reservation_id(launch_id, reservation_id) return reservation_id except: log_exception(l_logger, 'Error writing/submitting queue script!') if reserve and launch_id is not None: try: l_logger.info( 'Un-reserving FW with fw_id, launch_id: {}, {}'.format( fw.fw_id, launch_id)) launchpad.cancel_reservation(launch_id) launchpad.forget_offline(launch_id) except: log_exception( l_logger, 'Error unreserving FW with fw_id {}'.format(fw.fw_id)) return False else: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!') return None # note: this is a hack (rather than False) to indicate a soft failure to rapidfire()
def launch_rocket_to_queue(launchpad, fworker, qadapter, launcher_dir='.', reserve=False, strm_lvl='INFO', create_launcher_dir=False): """ Submit a single job to the queue. :param launchpad: (LaunchPad) :param fworker: (FWorker) :param qadapter: (QueueAdapterBase) :param launcher_dir: (str) The directory where to submit the job :param reserve: (bool) Whether to queue in reservation mode :param strm_lvl: (str) level at which to stream log messages :param create_launcher_dir: (bool) Whether to create a subfolder launcher+timestamp, if needed """ fworker = fworker if fworker else FWorker() launcher_dir = os.path.abspath(launcher_dir) l_logger = get_fw_logger('queue.launcher', l_dir=launchpad.logdir, stream_level=strm_lvl) l_logger.debug('getting queue adapter') qadapter = load_object(qadapter.to_dict( )) # make a defensive copy, mainly for reservation mode fw, launch_id = None, None # only needed in reservation mode if not os.path.exists(launcher_dir): raise ValueError( 'Desired launch directory {} does not exist!'.format(launcher_dir)) if '--offline' in qadapter['rocket_launch'] and not reserve: raise ValueError( "Must use reservation mode (-r option) of qlaunch when using offline option of rlaunch!!" ) if reserve and 'singleshot' not in qadapter.get('rocket_launch', ''): raise ValueError( 'Reservation mode of queue launcher only works for singleshot Rocket Launcher!' ) if launchpad.run_exists(fworker): try: if reserve: l_logger.debug('finding a FW to reserve...') fw, launch_id = launchpad.reserve_fw(fworker, launcher_dir) if not fw: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!' ) return False l_logger.info('reserved FW with fw_id: {}'.format(fw.fw_id)) # update qadapter job_name based on FW name job_name = get_slug(fw.name)[0:QUEUE_JOBNAME_MAXLEN] qadapter.update({'job_name': job_name}) if '_queueadapter' in fw.spec: l_logger.debug( 'updating queue params using Firework spec..') qadapter.update(fw.spec['_queueadapter']) # reservation mode includes --fw_id in rocket launch qadapter['rocket_launch'] += ' --fw_id {}'.format(fw.fw_id) # update launcher_dir if _launch_dir is selected in reserved fw if '_launch_dir' in fw.spec: fw_launch_dir = os.path.expandvars(fw.spec['_launch_dir']) if not os.path.isabs(fw_launch_dir): fw_launch_dir = os.path.join(launcher_dir, fw_launch_dir) launcher_dir = fw_launch_dir try: os.makedirs(launcher_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') # move to the launch directory l_logger.info('moving to launch_dir {}'.format(launcher_dir)) with cd(launcher_dir): if '--offline' in qadapter['rocket_launch']: setup_offline_job(launchpad, fw, launch_id) l_logger.debug('writing queue script') with open(SUBMIT_SCRIPT_NAME, 'w') as f: queue_script = qadapter.get_script_str(launcher_dir) f.write(queue_script) l_logger.info('submitting queue script') reservation_id = qadapter.submit_to_queue(SUBMIT_SCRIPT_NAME) if not reservation_id: if reserve: l_logger.info( 'Un-reserving FW with fw_id, launch_id: {}, {}'. format(fw.fw_id, launch_id)) launchpad.cancel_reservation(launch_id) raise RuntimeError( 'queue script could not be submitted, check queue script/queue adapter/queue server status!' ) elif reserve: launchpad.set_reservation_id(launch_id, reservation_id) return reservation_id except: log_exception(l_logger, 'Error writing/submitting queue script!') return False else: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!') return False
def launch_rocket_to_queue(launchpad, fworker, qadapter, launcher_dir='.', reserve=False, strm_lvl='INFO'): """ Submit a single job to the queue. :param launchpad: (LaunchPad) :param fworker: (FWorker) :param qadapter: (QueueAdapterBase) :param launcher_dir: (str) The directory where to submit the job :param reserve: (bool) Whether to queue in reservation mode :param strm_lvl: (str) level at which to stream log messages """ fworker = fworker if fworker else FWorker() launcher_dir = os.path.abspath(launcher_dir) l_logger = get_fw_logger('queue.launcher', l_dir=launchpad.logdir, stream_level=strm_lvl) # get the queue adapter l_logger.debug('getting queue adapter') qadapter = load_object(qadapter.to_dict( )) # make a defensive copy, mainly for reservation mode # make sure launch_dir exists: if not os.path.exists(launcher_dir): raise ValueError( 'Desired launch directory {} does not exist!'.format(launcher_dir)) if launchpad.run_exists(fworker): try: # move to the launch directory l_logger.info('moving to launch_dir {}'.format(launcher_dir)) os.chdir(launcher_dir) oldlaunch_dir = None if '--offline' in qadapter['rocket_launch'] and not reserve: raise ValueError( "Must use reservation mode (-r option) of qlaunch when using offline mode (--offline option) of rlaunch!!" ) elif reserve: l_logger.debug('finding a FW to reserve...') fw, launch_id = launchpad._reserve_fw(fworker, launcher_dir) if not fw: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!' ) return False l_logger.info('reserved FW with fw_id: {}'.format(fw.fw_id)) # set job name to the FW name job_name = get_slug(fw.name) job_name = job_name[0:20] if len(job_name) > 20 else job_name qadapter.update({'job_name': job_name}) # set the job name to FW name if '_queueadapter' in fw.spec: l_logger.debug( 'updating queue params using FireWork spec..') qadapter.update(fw.spec['_queueadapter']) # update the exe to include the FW_id if 'singleshot' not in qadapter.get('rocket_launch', ''): raise ValueError( 'Reservation mode of queue launcher only works for singleshot Rocket Launcher!' ) qadapter['rocket_launch'] += ' --fw_id {}'.format(fw.fw_id) if '--offline' in qadapter['rocket_launch']: # handle _launch_dir parameter early... if '_launch_dir' in fw.spec: os.chdir(fw.spec['_launch_dir']) oldlaunch_dir = launcher_dir launcher_dir = os.path.abspath(os.getcwd()) launchpad._change_launch_dir(launch_id, launcher_dir) # write FW.json fw.to_file("FW.json") # write Launchid with open('FW_offline.json', 'w') as f: f.write('{"launch_id":%s}' % launch_id) launchpad.add_offline_run(launch_id, fw.fw_id, fw.name) # write and submit the queue script using the queue adapter l_logger.debug('writing queue script') with open(SUBMIT_SCRIPT_NAME, 'w') as f: queue_script = qadapter.get_script_str(launcher_dir) f.write(queue_script) l_logger.info('submitting queue script') reservation_id = qadapter.submit_to_queue(SUBMIT_SCRIPT_NAME) if not reservation_id: raise RuntimeError( 'queue script could not be submitted, check queue adapter and queue server status!' ) elif reserve: launchpad.set_reservation_id(launch_id, reservation_id) return reservation_id except: log_exception(l_logger, 'Error writing/submitting queue script!') return False finally: if oldlaunch_dir: os.chdir( oldlaunch_dir ) # this only matters in --offline mode with _launch_dir! else: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!') return False