Example #1
0
def do_launch(args):
    if not args.launchpad_file and os.path.exists(
            os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and os.path.exists(
            os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    if not args.queueadapter_file and os.path.exists(
            os.path.join(args.config_dir, 'my_qadapter.yaml')):
        args.queueadapter_file = os.path.join(args.config_dir,
                                              'my_qadapter.yaml')

    launchpad = LaunchPad.from_file(
        args.launchpad_file) if args.launchpad_file else LaunchPad(
        strm_lvl=args.loglvl)
    fworker = FWorker.from_file(
        args.fworker_file) if args.fworker_file else FWorker()
    queueadapter = load_object_from_file(args.queueadapter_file)
    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker, queueadapter, args.launch_dir,
                  args.nlaunches, args.maxjobs_queue,
                  args.maxjobs_block, args.sleep, args.reserve, args.loglvl)
    else:
        launch_rocket_to_queue(launchpad, fworker, queueadapter,
                               args.launch_dir, args.reserve, args.loglvl, False)
Example #2
0
def do_launch(args):
    if not args.launchpad_file and os.path.exists(
            os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')
    elif not args.launchpad_file:
        args.launchpad_file = LAUNCHPAD_LOC

    if not args.fworker_file and os.path.exists(
            os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')
    elif not args.fworker_file:
        args.fworker_file = FWORKER_LOC

    if not args.queueadapter_file and os.path.exists(
            os.path.join(args.config_dir, 'my_qadapter.yaml')):
        args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml')
    elif not args.queueadapter_file:
        args.queueadapter_file = QUEUEADAPTER_LOC

    launchpad = LaunchPad.from_file(
        args.launchpad_file) if args.launchpad_file else LaunchPad(
        strm_lvl=args.loglvl)
    fworker = FWorker.from_file(
        args.fworker_file) if args.fworker_file else FWorker()
    queueadapter = load_object_from_file(args.queueadapter_file)
    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir,
                  nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue,
                  njobs_block=args.maxjobs_block, sleep_time=args.sleep,
                  reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode)
    else:
        launch_rocket_to_queue(launchpad, fworker, queueadapter,
                               args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode, args.fw_id)
Example #3
0
    def test_has_inserted(self):
        self.lp.add_wf(self.wf_stop_early)
        rapidfire(
            self.lp,
            fworker=FWorker(
                env={
                    "db_file": os.path.join(db_dir, "db.json"),
                    "vasp_cmd": ["echo", "fake"],
                }
            ),
        )
        formula = self.get_task_collection(coll_name="tasks").distinct("formula_pretty")
        self.assertEqual(set(formula), {"Y2Mg(PO4)2", "YPO4"})

        self.lp.add_wf(self.wf)
        rapidfire(
            self.lp,
            fworker=FWorker(
                env={
                    "db_file": os.path.join(db_dir, "db.json"),
                    "vasp_cmd": ["echo", "fake"],
                }
            ),
        )
        # Check that all of the inserted pretty formulas are present
        formula = self.get_task_collection(coll_name="tasks").distinct("formula_pretty")
        self.assertEqual(
            set(formula), {"Y2Mg(PO4)2", "Y2Mg3(PO4)2", "YMg2PO4", "YMgPO4", "YPO4"}
        )
Example #4
0
def rlaunch():

    m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the central database and ' \
                    'runs it. The "single-shot" option launches a single Rocket, ' \
                    'whereas the "rapidfire" option loops until all FireWorks are completed.'

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help='command', dest='command')
    single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket')
    rapid_parser = subparsers.add_parser('rapidfire',
                                         help='launch multiple Rockets (loop until all FireWorks complete)')

    single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int)
    single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true')

    rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; default 0 is all jobs in DB)', default=0)
    rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exist on ^C

    if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'singleshot' and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker, None, args.nlaunches, -1, args.sleep, args.loglvl)

    else:
        launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
Example #5
0
def get_fworker(fworker):
    if fworker:
        my_fwkr = fworker
    elif FWORKER_LOC:
        my_fwkr = FWorker.from_file(FWORKER_LOC)
    else:
        my_fwkr = FWorker()
    return my_fwkr
Example #6
0
    def test_category_pt2(self):
        task1 = ScriptTask.from_str('echo "Task 1"')
        task2 = ScriptTask.from_str('echo "Task 2"')

        fw1 = Firework(task1, fw_id=1, name='Task 1')
        fw2 = Firework(task2, fw_id=2, name='Task 2')

        self.lp.add_wf(Workflow([fw1, fw2]))

        self.assertFalse(self.lp.run_exists(FWorker(category="dummy_category")))
        self.assertTrue(self.lp.run_exists(FWorker(category="__none__")))
        self.assertTrue(self.lp.run_exists(FWorker())) # can run any category
        self.assertFalse(self.lp.run_exists(FWorker(category=["dummy_category",
                                                             "other category"])))
Example #7
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0)
    parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str)
    parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int)

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                        args.sleep, total_node_list, args.ppn)
Example #8
0
def do_launch(args):
    if not args.launchpad_file and os.path.exists(
            os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')
    elif not args.launchpad_file:
        args.launchpad_file = LAUNCHPAD_LOC

    if not args.fworker_file and os.path.exists(
            os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')
    elif not args.fworker_file:
        args.fworker_file = FWORKER_LOC

    if not args.queueadapter_file and os.path.exists(
            os.path.join(args.config_dir, 'my_qadapter.yaml')):
        args.queueadapter_file = os.path.join(args.config_dir, 'my_qadapter.yaml')
    elif not args.queueadapter_file:
        args.queueadapter_file = QUEUEADAPTER_LOC

    launchpad = LaunchPad.from_file(
        args.launchpad_file) if args.launchpad_file else LaunchPad(
        strm_lvl=args.loglvl)
    fworker = FWorker.from_file(
        args.fworker_file) if args.fworker_file else FWorker()
    queueadapter = load_object_from_file(args.queueadapter_file)
    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker=fworker, qadapter=queueadapter, launch_dir=args.launch_dir,
                  nlaunches=args.nlaunches, njobs_queue=args.maxjobs_queue,
                  njobs_block=args.maxjobs_block, sleep_time=args.sleep,
                  reserve=args.reserve, strm_lvl=args.loglvl, timeout=args.timeout, fill_mode=args.fill_mode)
    else:
        launch_rocket_to_queue(launchpad, fworker, queueadapter,
                               args.launch_dir, args.reserve, args.loglvl, False, args.fill_mode)
Example #9
0
    def test_wf(self):
        wf_1 = get_simulated_wf(self.wf_1)
        wf_2 = get_simulated_wf(self.wf_2)
        wf_3 = get_simulated_wf(self.wf_3)
        wf_4 = get_simulated_wf(self.wf_4)
        wf_5 = get_simulated_wf(self.wf_5)
        wf_6 = get_simulated_wf(self.wf_6)

        wf_1_ids = self.lp.add_wf(wf_1)
        wf_2_ids = self.lp.add_wf(wf_2)
        wf_3_ids = self.lp.add_wf(wf_3)
        wf_4_ids = self.lp.add_wf(wf_4)
        wf_5_ids = self.lp.add_wf(wf_5)
        wf_6_ids = self.lp.add_wf(wf_6)

        # get fw ids that can be used to identify the workflows from the DB
        fw_wf_1 = list(wf_1_ids.values())[0]
        fw_wf_2 = list(wf_2_ids.values())[0]
        fw_wf_3 = list(wf_3_ids.values())[0]
        fw_wf_4 = list(wf_4_ids.values())[0]
        fw_wf_5 = list(wf_5_ids.values())[0]
        fw_wf_6 = list(wf_6_ids.values())[0]

        fw_ids = [fw_wf_1, fw_wf_2, fw_wf_3, fw_wf_4, fw_wf_5, fw_wf_6]

        # Use scratch directory as destination directory for testing
        fworker = FWorker(env={"run_dest_root": self.scratch_dir})
        rapidfire(self.lp, fworker=fworker)

        for i in fw_ids:
            wf = self.lp.get_wf_by_fw_id(i)
            is_completed = [s == "COMPLETED" for s in wf.fw_states.values()]
            self.assertTrue(all(is_completed))
Example #10
0
    def test_eels_wflow_abatom_by_idx(self):
        # for the sake of test just copy xmu to eels
        xmu_file_path = os.path.abspath(
            os.path.join(module_dir, "../../test_files/xmu.dat"))
        feff_bin = f"cp {xmu_file_path} eels.dat"
        wf = get_wf_eels(
            self.absorbing_atom,
            self.structure,
            feff_input_set="ELNES",
            edge="L1",
            user_tag_settings=self.user_tag_settings,
            use_primitive=False,
            feff_cmd=feff_bin,
            db_file=">>db_file<<",
        )
        self.assertEqual(len(wf.as_dict()["fws"]), 1)

        self.lp.add_wf(wf)
        # run
        rapidfire(
            self.lp,
            fworker=FWorker(env={"db_file": os.path.join(db_dir, "db.json")}))

        d = self.get_task_collection().find_one({"spectrum_type": "ELNES"})
        self._check_run(d)
Example #11
0
 def from_dict(cls, m_dict):
     fworker = FWorker.from_dict(m_dict['fworker']) if m_dict['fworker'] else None
     action = FWAction.from_dict(m_dict['action']) if m_dict.get('action') else None
     trackers = [Tracker.from_dict(f) for f in m_dict['trackers']] if m_dict.get('trackers') else None
     return Launch(m_dict['state'], m_dict['launch_dir'], fworker,
                   m_dict['host'], m_dict['ip'], trackers, action,
                   m_dict['state_history'], m_dict['launch_id'], m_dict['fw_id'])
    def test_xas_wflow_abatom_by_idx(self):
        if not FEFF_CMD:
            # fake run
            xmu_file_path = os.path.abspath(
                os.path.join(module_dir, "../../test_files/xmu.dat"))
            feff_bin = "cp {} .".format(xmu_file_path)
        else:
            feff_bin = FEFF_CMD

        wf = get_wf_xas(self.absorbing_atom,
                        self.structure,
                        feff_input_set="XANES",
                        edge="K",
                        feff_cmd=feff_bin,
                        db_file=">>db_file<<",
                        use_primitive=False,
                        user_tag_settings=self.user_tag_settings)
        self.assertEqual(len(wf.as_dict()["fws"]), 1)

        self.lp.add_wf(wf)
        # run
        rapidfire(
            self.lp,
            fworker=FWorker(env={"db_file": os.path.join(db_dir, "db.json")}))

        d = self.get_task_collection().find_one({"spectrum_type": "XANES"})
        self._check_run(d)
Example #13
0
 def from_dict(cls, m_dict):
     fworker = FWorker.from_dict(m_dict['fworker']) if m_dict['fworker'] else None
     action = FWAction.from_dict(m_dict['action']) if m_dict.get('action') else None
     trackers = [Tracker.from_dict(f) for f in m_dict['trackers']] if m_dict.get('trackers') else None
     return Launch(m_dict['state'], m_dict['launch_dir'], fworker,
                   m_dict['host'], m_dict['ip'], trackers, action,
                   m_dict['state_history'], m_dict['launch_id'], m_dict['fw_id'])
Example #14
0
 def __init__(self, state, launch_dir, fworker=None, host=None, ip=None, trackers=None,
              action=None, state_history=None, launch_id=None, fw_id=None):
     """
     Args:
         state (str): the state of the Launch (e.g. RUNNING, COMPLETED)
         launch_dir (str): the directory where the Launch takes place
         fworker (FWorker): The FireWorker running the Launch
         host (str): the hostname where the launch took place (set automatically if None)
         ip (str): the IP address where the launch took place (set automatically if None)
         trackers ([Tracker]): File Trackers for this Launch
         action (FWAction): the output of the Launch
         state_history ([dict]): a history of all states of the Launch and when they occurred
         launch_id (int): launch_id set by the LaunchPad
         fw_id (int): id of the Firework this Launch is running
     """
     if state not in Firework.STATE_RANKS:
         raise ValueError("Invalid launch state: {}".format(state))
     self.launch_dir = launch_dir
     self.fworker = fworker or FWorker()
     self.host = host or get_my_host()
     self.ip = ip or get_my_ip()
     self.trackers = trackers if trackers else []
     self.action = action if action else None
     self.state_history = state_history if state_history else []
     self.state = state
     self.launch_id = launch_id
     self.fw_id = fw_id
Example #15
0
def recover_offline(args):
    lp = get_lp(args)
    fworker_name = FWorker.from_file(
        args.fworker_file).name if args.fworker_file else None
    failed_fws = []
    recovered_fws = []

    for l in lp.offline_runs.find({
            "completed": False,
            "deprecated": False
    }, {
            "launch_id": 1,
            "fw_id": 1
    }):
        if fworker_name and lp.launches.count({
                "launch_id": l["launch_id"],
                "fworker.name": fworker_name
        }) == 0:
            continue
        fw = lp.recover_offline(l['launch_id'], args.ignore_errors,
                                args.print_errors)
        if fw:
            failed_fws.append(l['fw_id'])
        else:
            recovered_fws.append(l['fw_id'])

    lp.m_logger.info(
        "FINISHED recovering offline runs. {} job(s) recovered: {}".format(
            len(recovered_fws), recovered_fws))
    if failed_fws:
        lp.m_logger.info(
            "FAILED to recover offline fw_ids: {}".format(failed_fws))
Example #16
0
def rlaunch():

    m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the central database and ' \
                    'runs it. The "single-shot" option launches a single Rocket, ' \
                    'whereas the "rapidfire" option loops until all FireWorks are completed.'

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help='command', dest='command')
    single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket')
    rapid_parser = subparsers.add_parser('rapidfire',
                                         help='launch multiple Rockets (loop until all FireWorks complete)')

    single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int)
    single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true')

    rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; default 0 is all jobs in DB)', default=0)
    rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exist on ^C

    if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'singleshot' and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker, None, args.nlaunches, -1, args.sleep, args.loglvl)

    else:
        launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
Example #17
0
 def from_dict(cls, m_dict):
     fworker = FWorker.from_dict(m_dict['fworker'])
     action = FWAction.from_dict(
         m_dict['action']) if m_dict.get('action') else None
     return Launch(m_dict['state'], m_dict['launch_dir'], fworker,
                   m_dict['host'], m_dict['ip'], action,
                   m_dict['state_history'], m_dict['launch_id'],
                   m_dict['fw_id'])
Example #18
0
 def setUpClass(cls):
     cls.lp = None
     cls.fworker = FWorker()
     try:
         cls.lp = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR')
         cls.lp.reset(password=None, require_password=False)
     except:
         raise unittest.SkipTest('MongoDB is not running in localhost:27017! Skipping tests.')
Example #19
0
def rapidfire(launchpad,
              fworker=None,
              m_dir=None,
              nlaunches=0,
              max_loops=-1,
              sleep_time=None,
              strm_lvl='INFO'):
    """
    Keeps running Rockets in m_dir until we reach an error. Automatically creates subdirectories for each Rocket.
    Usually stops when we run out of FireWorks from the LaunchPad.

    :param launchpad: (LaunchPad)
    :param fworker: (FWorker object)
    :param m_dir: (str) the directory in which to loop Rocket running
    :param nlaunches: (int) 0 means 'until completion', -1 or "infinite" means to loop forever
    :param max_loops: (int) maximum number of loops
    :param sleep_time: (int) secs to sleep between rapidfire loop iterations
    :param strm_lvl: (str) level at which to output logs to stdout
    """

    sleep_time = sleep_time if sleep_time else RAPIDFIRE_SLEEP_SECS
    curdir = m_dir if m_dir else os.getcwd()
    l_logger = get_fw_logger('rocket.launcher',
                             l_dir=launchpad.get_logdir(),
                             stream_level=strm_lvl)
    nlaunches = -1 if nlaunches == 'infinite' else int(nlaunches)
    fworker = fworker if fworker else FWorker()

    num_launched = 0
    num_loops = 0

    while num_loops != max_loops:
        while launchpad.run_exists(fworker):
            os.chdir(curdir)
            launcher_dir = create_datestamp_dir(curdir,
                                                l_logger,
                                                prefix='launcher_')
            os.chdir(launcher_dir)
            rocket_ran = launch_rocket(launchpad, fworker, strm_lvl=strm_lvl)
            if rocket_ran:
                num_launched += 1
            elif not os.listdir(launcher_dir):
                # remove the empty shell of a directory
                os.chdir(curdir)
                os.rmdir(launcher_dir)
            if num_launched == nlaunches:
                break
            time.sleep(
                0.15
            )  # add a small amount of buffer breathing time for DB to refresh, etc.
        if num_launched == nlaunches or nlaunches == 0:
            break
        log_multi(l_logger, 'Sleeping for {} secs'.format(sleep_time))
        time.sleep(sleep_time)
        num_loops += 1
        log_multi(l_logger, 'Checking for FWs to run...'.format(sleep_time))
Example #20
0
 def test_fw_env(self):
     t = DummyFWEnvTask()
     fw = Firework(t)
     self.lp.add_wf(fw)
     launch_rocket(self.lp, self.fworker)
     self.assertEqual(self.lp.get_launch_by_id(1).action.stored_data['data'], "hello")
     self.lp.add_wf(fw)
     launch_rocket(self.lp, FWorker(env={"hello": "world"}))
     self.assertEqual(self.lp.get_launch_by_id(2).action.stored_data[
                          'data'],
                      "world")
Example #21
0
def rapidfire(launchpad, fworker=None, m_dir=None, nlaunches=0, max_loops=-1,
              sleep_time=None, strm_lvl='INFO', timeout=None):
    """
    Keeps running Rockets in m_dir until we reach an error. Automatically creates subdirectories for each Rocket.
    Usually stops when we run out of FireWorks from the LaunchPad.

    :param launchpad: (LaunchPad)
    :param fworker: (FWorker object)
    :param m_dir: (str) the directory in which to loop Rocket running
    :param nlaunches: (int) 0 means 'until completion', -1 or "infinite" means to loop until max_loops
    :param max_loops: (int) maximum number of loops (default -1 is infinite)
    :param sleep_time: (int) secs to sleep between rapidfire loop iterations
    :param strm_lvl: (str) level at which to output logs to stdout
    :param timeout: (int) # of seconds after which to stop the rapidfire process
    """

    sleep_time = sleep_time if sleep_time else RAPIDFIRE_SLEEP_SECS
    curdir = m_dir if m_dir else os.getcwd()
    l_logger = get_fw_logger('rocket.launcher', l_dir=launchpad.get_logdir(), stream_level=strm_lvl)
    nlaunches = -1 if nlaunches == 'infinite' else int(nlaunches)
    fworker = fworker if fworker else FWorker()

    num_launched = 0
    start_time = datetime.now()
    num_loops = 0

    while num_loops != max_loops and (not timeout or (datetime.now() - start_time).total_seconds() < timeout):
        skip_check = False  # this is used to speed operation
        while (skip_check or launchpad.run_exists(fworker)) and \
                (not timeout or (datetime.now() - start_time).total_seconds() < timeout):
            os.chdir(curdir)
            launcher_dir = create_datestamp_dir(curdir, l_logger, prefix='launcher_')
            os.chdir(launcher_dir)
            rocket_ran = launch_rocket(launchpad, fworker, strm_lvl=strm_lvl)
            if rocket_ran:
                num_launched += 1
            elif not os.listdir(launcher_dir):
                # remove the empty shell of a directory
                os.chdir(curdir)
                os.rmdir(launcher_dir)
            if num_launched == nlaunches:
                break
            if launchpad.run_exists(fworker):
                skip_check = True  # don't wait, pull the next FW right away
            else:
                time.sleep(0.15)  # add a small amount of buffer breathing time for DB to refresh in case we have a dynamic WF
                skip_check = False
        if num_launched == nlaunches or nlaunches == 0:
            break
        log_multi(l_logger, 'Sleeping for {} secs'.format(sleep_time))
        time.sleep(sleep_time)
        num_loops += 1
        log_multi(l_logger, 'Checking for FWs to run...'.format(sleep_time))
    os.chdir(curdir)
Example #22
0
    def setup_fireworks(cls):
        """
        Sets up the fworker and launchpad if a connection to a local mongodb is available.
        cls.lp is set to None if not available
        """

        cls.fworker = FWorker()
        try:
            cls.lp = LaunchPad(name=TESTDB_NAME, strm_lvl='ERROR')
            cls.lp.reset(password=None, require_password=False)
        except:
            cls.lp = None
Example #23
0
def basic_fw_ex():
    print("--- BASIC FIREWORK EXAMPLE ---")

    # setup
    launchpad = setup()

    # add Firework
    firetask = ScriptTask.from_str('echo "howdy, your job launched successfully!"')
    firework = Firework(firetask)
    launchpad.add_wf(firework)

    # launch Rocket
    launch_rocket(launchpad, FWorker())
Example #24
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0)
    parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int)
    parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str)
    parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int)

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                        args.sleep, total_node_list, args.ppn, timeout=args.timeout)
Example #25
0
def rapidfire(launchpad,
              fworker=None,
              m_dir=None,
              logdir=None,
              strm_lvl=None,
              nlaunches=0,
              sleep_time=60,
              max_loops=-1):
    """
    Keeps running Rockets in m_dir until we reach an error. Automatically creates subdirectories for each Rocket.
    Usually stops when we run out of FireWorks from the LaunchPad.

    :param launchpad: a LaunchPad object
    :param fworker: a FWorker object
    :param m_dir: the directory in which to loop Rocket running
    :param nlaunches: 0 means 'until completion', -1 means 'infinity'
    """
    curdir = m_dir if m_dir else os.getcwd()
    fworker = fworker if fworker else FWorker()
    # initialize logger
    l_logger = get_fw_logger('rocket.launcher',
                             l_dir=logdir,
                             stream_level=strm_lvl)
    nlaunches = -1 if nlaunches == 'infinite' else int(nlaunches)

    # TODO: wrap in try-except. Use log_exception for exceptions EXCEPT running out of jobs.
    # TODO: always chdir() back to curdir when finished...then delete cruft from MongoTests
    num_launched = 0
    num_loops = 0
    while num_loops != max_loops:
        while launchpad.run_exists():
            os.chdir(curdir)
            launcher_dir = create_datestamp_dir(curdir,
                                                l_logger,
                                                prefix='launcher_')
            os.chdir(launcher_dir)
            launch_rocket(launchpad, fworker, logdir, strm_lvl)
            num_launched += 1
            if num_launched == nlaunches:
                break
            time.sleep(
                0.1
            )  # add a small amount of buffer breathing time for DB to refresh, etc.
        if num_launched == nlaunches or nlaunches == 0:
            break
        l_logger.info('Sleeping for {} secs'.format(sleep_time))
        time.sleep(sleep_time)
        num_loops += 1
        l_logger.info('Checking for FWs to run...'.format(sleep_time))
Example #26
0
def multiple_tasks_ex():
    print("--- MULTIPLE FIRETASKS EXAMPLE ---")

    # setup
    launchpad = setup()

    # add FireWorks
    firetask1 = ScriptTask.from_str('echo "This is TASK #1"')
    firetask2 = ScriptTask.from_str('echo "This is TASK #2"')
    firetask3 = ScriptTask.from_str('echo "This is TASK #3"')
    fw = Firework([firetask1, firetask2, firetask3])
    launchpad.add_wf(fw)

    # launch Rocket
    rapidfire(launchpad, FWorker())
Example #27
0
 def from_dict(cls, m_dict):
     fworker = FWorker.from_dict(m_dict["fworker"]) if m_dict["fworker"] else None
     action = FWAction.from_dict(m_dict["action"]) if m_dict.get("action") else None
     trackers = [Tracker.from_dict(f) for f in m_dict["trackers"]] if m_dict.get("trackers") else None
     return Launch(
         m_dict["state"],
         m_dict["launch_dir"],
         fworker,
         m_dict["host"],
         m_dict["ip"],
         trackers,
         action,
         m_dict["state_history"],
         m_dict["launch_id"],
         m_dict["fw_id"],
     )
Example #28
0
def launch_rocket(launchpad, fworker=None, fw_id=None, strm_lvl='INFO'):
    """
    Run a single rocket in the current directory
    :param launchpad: (LaunchPad)
    :param fworker: (FWorker)
    :param fw_id: (int) if set, a particular Firework to run
    :param strm_lvl: (str) level at which to output logs to stdout
    """
    fworker = fworker if fworker else FWorker()
    l_dir = launchpad.get_logdir() if launchpad else None
    l_logger = get_fw_logger('rocket.launcher', l_dir=l_dir, stream_level=strm_lvl)

    log_multi(l_logger, 'Launching Rocket')
    rocket = Rocket(launchpad, fworker, fw_id)
    rocket_ran = rocket.run()
    log_multi(l_logger, 'Rocket finished')
    return rocket_ran
Example #29
0
def launch_rocket(launchpad,
                  fworker=None,
                  logdir=None,
                  strm_lvl=None,
                  fw_id=None):
    """
    Run a single rocket in the current directory
    :param launchpad: a LaunchPad object
    :param fworker: a FWorker object
    """
    fworker = fworker if fworker else FWorker()
    l_logger = get_fw_logger('rocket.launcher',
                             l_dir=logdir,
                             stream_level=strm_lvl)
    l_logger.info('Launching Rocket')
    rocket = Rocket(launchpad, fworker, fw_id)
    rocket.run()
    l_logger.info('Rocket finished')
Example #30
0
def recover_offline(args):
    lp = get_lp(args)
    fworker_name = FWorker.from_file(args.fworker_file).name if args.fworker_file else None
    failed_fws = []
    recovered_fws = []

    for l in lp.offline_runs.find({"completed": False, "deprecated": False}, {"launch_id": 1, "fw_id":1}):
        if fworker_name and lp.launches.count({"launch_id": l["launch_id"], "fworker.name": fworker_name}) == 0:
            continue
        fw = lp.recover_offline(l['launch_id'], args.ignore_errors, args.print_errors)
        if fw:
            failed_fws.append(l['fw_id'])
        else:
            recovered_fws.append(l['fw_id'])

    lp.m_logger.info("FINISHED recovering offline runs. {} job(s) recovered: {}".format(len(recovered_fws), recovered_fws))
    if failed_fws:
        lp.m_logger.info("FAILED to recover offline fw_ids: {}".format(failed_fws))
Example #31
0
    def test_basic_fw_offline(self):
        test1 = ScriptTask.from_str("python -c 'print(\"test1\")'",
                                    {'store_stdout': True})
        fw = Firework(test1)
        self.lp.add_wf(fw)

        fw, launch_id = self.lp.reserve_fw(FWorker(), os.getcwd())

        setup_offline_job(self.lp, fw, launch_id)

        launch_rocket(None, self.fworker)

        with open(os.path.join(os.getcwd(), "FW_offline.json")) as f:
            fwo = json.load(f)
            self.assertEquals(fwo["state"], "COMPLETED")
            self.assertEquals(fwo["launch_id"], 1)
            self.assertEquals(
                fwo["fwaction"], {
                    'update_spec': {},
                    'mod_spec': [],
                    'stored_data': {
                        'returncode': 0,
                        'stdout': u'test1\n',
                        'all_returncodes': [0]
                    },
                    'exit': False,
                    'detours': [],
                    'additions': [],
                    'defuse_children': False
                })

        with open(os.path.join(os.getcwd(), "FW_ping.json")) as f:
            fwp = json.load(f)
            self.assertIsNotNone(fwp["ping_time"])

        l = self.lp.offline_runs.find_one(
            {
                "completed": False,
                "deprecated": False
            }, {"launch_id": 1})
        self.lp.recover_offline(l['launch_id'])
        self.assertEqual(
            self.lp.get_launch_by_id(1).action.stored_data['stdout'],
            'test1\n')
Example #32
0
def rapid_fire_ex():
    print("--- RAPIDFIRE EXAMPLE ---")

    # setup
    launchpad = setup()

    # add FireWorks
    firetask = ScriptTask.from_str('echo "howdy, your job launched successfully!"')
    fw1 = Firework(firetask)
    launchpad.add_wf(fw1)

    # re-add multiple times
    fw2 = Firework(firetask)
    launchpad.add_wf(fw2)
    fw3 = Firework(firetask)
    launchpad.add_wf(fw3)

    # launch Rocket
    rapidfire(launchpad, FWorker())
Example #33
0
    def test_wf(self):
        self.wf_1 = self._simulate_vasprun(self.wf_1)
        self.wf_2 = self._simulate_vasprun(self.wf_2)
        self.wf_3 = self._simulate_vasprun(self.wf_3)
        self.wf_4 = self._simulate_vasprun(self.wf_4)
        self.wf_5 = self._simulate_vasprun(self.wf_5)
        self.wf_6 = self._simulate_vasprun(self.wf_6)

        self.lp.add_wf(self.wf_1)
        self.lp.add_wf(self.wf_2)
        self.lp.add_wf(self.wf_3)
        self.lp.add_wf(self.wf_4)
        self.lp.add_wf(self.wf_5)
        self.lp.add_wf(self.wf_6)

        # Use scratch directory as destination directory for testing
        rapidfire(self.lp, fworker=FWorker(env={"run_dest_root": self.scratch_dir}))

        wf = self.lp.get_wf_by_fw_id(1)
        self.assertTrue(all([s == 'COMPLETED' for s in wf.fw_states.values()]))
Example #34
0
    def test_multi_fw_complex(self):

        dest1 = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'inputs.txt')
        dest2 = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'temp_file.txt')
        self._teardown([dest1, dest2])
        try:
            # create the Firework consisting of multiple tasks
            firetask1 = TemplateWriterTask({
                'context': {
                    'opt1': 5.0,
                    'opt2': 'fast method'
                },
                'template_file': 'simple_template.txt',
                'output_file': dest1
            })
            firetask2 = FileTransferTask({
                'files': [{
                    'src': dest1,
                    'dest': dest2
                }],
                'mode':
                'copy'
            })
            fw = Firework([firetask1, firetask2])

            # store workflow and launch it locally, single shot
            self.lp.add_wf(fw)
            launch_rocket(self.lp, FWorker())

            # read inputs.txt, words.txt, dest
            for d in [dest1, dest2]:
                with open(d) as f:
                    self.assertEqual(f.read(),
                                     'option1 = 5.0\noption2 = fast method')

        finally:
            self._teardown([dest1, dest2])
Example #35
0
def basic_wf_ex():
    print("--- BASIC WORKFLOW EXAMPLE ---")

    # setup
    launchpad = setup()

    # add FireWorks
    task1 = ScriptTask.from_str('echo "Ingrid is the CEO."')
    task2 = ScriptTask.from_str('echo "Jill is a manager."')
    task3 = ScriptTask.from_str('echo "Jack is a manager."')
    task4 = ScriptTask.from_str('echo "Kip is an intern."')

    fw1 = Firework(task1, fw_id=1)
    fw2 = Firework(task2, fw_id=2)
    fw3 = Firework(task3, fw_id=3)
    fw4 = Firework(task4, fw_id=4)

    # make workflow
    workflow = Workflow([fw1, fw2, fw3, fw4], {1: [2, 3], 2: [4], 3: [4]})
    launchpad.add_wf(workflow)

    # launch Rocket
    rapidfire(launchpad, FWorker())
Example #36
0
def rlaunch():

    m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \
                    'central database and runs it. The "single-shot" option launches a single Rocket, ' \
                    'whereas the "rapidfire" option loops until all FireWorks are completed.'

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help='command', dest='command')
    single_parser = subparsers.add_parser('singleshot',
                                          help='launch a single Rocket')
    rapid_parser = subparsers.add_parser(
        'rapidfire',
        help='launch multiple Rockets (loop until all FireWorks complete)')
    multi_parser = subparsers.add_parser(
        'multi', help='launches multiple Rockets simultaneously')

    single_parser.add_argument('-f',
                               '--fw_id',
                               help='specific fw_id to run',
                               default=None,
                               type=int)
    single_parser.add_argument('--offline',
                               help='run in offline mode (FW.json required)',
                               action='store_true')

    rapid_parser.add_argument('--nlaunches',
                              help='num_launches (int or "infinite"; '
                              'default 0 is all jobs in DB)',
                              default=0)
    rapid_parser.add_argument(
        '--timeout',
        help='timeout (secs) after which to quit (default None)',
        default=None,
        type=int)
    rapid_parser.add_argument(
        '--max_loops',
        help='after this many sleep loops, quit even in '
        'infinite nlaunches mode (default -1 is infinite loops)',
        default=-1,
        type=int)
    rapid_parser.add_argument('--sleep',
                              help='sleep time between loops (secs)',
                              default=None,
                              type=int)
    rapid_parser.add_argument(
        '--local_redirect',
        help="Redirect stdout and stderr to the launch directory",
        action="store_true")

    multi_parser.add_argument('num_jobs',
                              help='the number of jobs to run in parallel',
                              type=int)
    multi_parser.add_argument('--nlaunches',
                              help='number of FireWorks to run in series per '
                              'parallel job (int or "infinite"; default 0 is '
                              'all jobs in DB)',
                              default=0)
    multi_parser.add_argument(
        '--sleep',
        help='sleep time between loops in infinite launch mode'
        '(secs)',
        default=None,
        type=int)
    multi_parser.add_argument(
        '--timeout',
        help='timeout (secs) after which to quit (default None)',
        default=None,
        type=int)
    multi_parser.add_argument(
        '--nodefile',
        help='nodefile name or environment variable name '
        'containing the node file name (for populating'
        ' FWData only)',
        default=None,
        type=str)
    multi_parser.add_argument(
        '--ppn',
        help='processors per node (for populating FWData only)',
        default=1,
        type=int)
    multi_parser.add_argument('--exclude_current_node',
                              help="Don't use the script launching node"
                              "as compute node",
                              action="store_true")
    multi_parser.add_argument(
        '--local_redirect',
        help="Redirect stdout and stderr to the launch directory",
        action="store_true")

    parser.add_argument('-l',
                        '--launchpad_file',
                        help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w',
                        '--fworker_file',
                        help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c',
                        '--config_dir',
                        help='path to a directory containing the config file '
                        '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl',
                        help='level to print log messages',
                        default='INFO')
    parser.add_argument('-s',
                        '--silencer',
                        help='shortcut to mute log messages',
                        action='store_true')

    try:
        import argcomplete
        argcomplete.autocomplete(parser)
        # This supports bash autocompletion. To enable this, pip install
        # argcomplete, activate global completion, or add
        #      eval "$(register-python-argcomplete rlaunch)"
        # into your .bash_profile or .bashrc
    except ImportError:
        pass

    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exit on ^C

    if not args.launchpad_file and os.path.exists(
            os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir,
                                           'my_launchpad.yaml')

    if not args.fworker_file and os.path.exists(
            os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'singleshot' and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(
            args.launchpad_file) if args.launchpad_file else LaunchPad(
                strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == 'rapidfire':
        rapidfire(launchpad,
                  fworker=fworker,
                  m_dir=None,
                  nlaunches=args.nlaunches,
                  max_loops=args.max_loops,
                  sleep_time=args.sleep,
                  strm_lvl=args.loglvl,
                  timeout=args.timeout,
                  local_redirect=args.local_redirect)
    elif args.command == 'multi':
        total_node_list = None
        if args.nodefile:
            if args.nodefile in os.environ:
                args.nodefile = os.environ[args.nodefile]
            with open(args.nodefile, 'r') as f:
                total_node_list = [line.strip() for line in f.readlines()]
        launch_multiprocess(launchpad,
                            fworker,
                            args.loglvl,
                            args.nlaunches,
                            args.num_jobs,
                            args.sleep,
                            total_node_list,
                            args.ppn,
                            timeout=args.timeout,
                            exclude_current_node=args.exclude_current_node,
                            local_redirect=args.local_redirect)
    else:
        launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
Example #37
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job '
                                            '(int or "infinite"; default 0 is all jobs in DB)', default=0)
    parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)',
                        default=None, type=int)
    parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)',
                        default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file '
                                                   '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    parser.add_argument('--nodefile', help='nodefile name or environment variable name containing '
                                           'the node file name (for populating FWData only)',
                        default=None, type=str)
    parser.add_argument('--ppn', help='processors per node (for populating FWData only)',
                        default=1, type=int)
    parser.add_argument('--exclude_current_node', help="Don't use the script launching node as compute node",
                        action="store_true")

    try:
        import argcomplete
        argcomplete.autocomplete(parser)
        # This supports bash autocompletion. To enable this, pip install
        # argcomplete, activate global completion, or add
        #      eval "$(register-python-argcomplete mlaunch)"
        # into your .bash_profile or .bashrc
    except ImportError:
        pass

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir,
                                                                                   'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir,
                                                                                 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                        args.sleep, total_node_list, args.ppn, timeout=args.timeout,
                        exclude_current_node=args.exclude_current_node)
Example #38
0
def rlaunch():

    m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the ' \
                    'central database and runs it. The "single-shot" option launches a single Rocket, ' \
                    'whereas the "rapidfire" option loops until all FireWorks are completed.'

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help='command', dest='command')
    single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket')
    rapid_parser = subparsers.add_parser('rapidfire',
                                         help='launch multiple Rockets (loop until all FireWorks complete)')
    multi_parser = subparsers.add_parser('multi',
                                         help='launches multiple Rockets simultaneously')

    single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int)
    single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true')

    rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; '
                                                  'default 0 is all jobs in DB)', default=0)
    rapid_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)',
                              default=None, type=int)
    rapid_parser.add_argument('--max_loops', help='after this many sleep loops, quit even in '
                                                  'infinite nlaunches mode (default -1 is infinite loops)',
                              default=-1, type=int)
    rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None,
                              type=int)

    multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per '
                                                  'parallel job (int or "infinite"; default 0 is '
                                                  'all jobs in DB)',
                              default=0)
    multi_parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode'
                                              '(secs)',
                              default=None, type=int)
    multi_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)',
                              default=None, type=int)
    multi_parser.add_argument('--nodefile', help='nodefile name or environment variable name '
                                                 'containing the node file name (for populating'
                                                 ' FWData only)',
                              default=None, type=str)
    multi_parser.add_argument('--ppn', help='processors per node (for populating FWData only)',
                              default=1, type=int)
    multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node"
                                                             "as compute node",
                              action="store_true")

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file '
                                                   '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')


    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exit on ^C

    if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'singleshot' and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(
            strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches,
                  max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl,
                  timeout=args.timeout)
    elif args.command == 'multi':
        total_node_list = None
        if args.nodefile:
            if args.nodefile in os.environ:
                args.nodefile = os.environ[args.nodefile]
            with open(args.nodefile, 'r') as f:
                total_node_list = [line.strip() for line in f.readlines()]

        launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                            args.sleep, total_node_list, args.ppn, timeout=args.timeout,
                            exclude_current_node=args.exclude_current_node)
    else:
        launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
Example #39
0
    parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=None)

    parser.add_argument('--logdir', help='path to a directory for logging', default=None)
    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('--silencer', help='shortcut to mute log messages', action='store_true')

    args = parser.parse_args()

    if not args.launchpad_file and os.path.exists('my_launchpad.yaml'):
        args.launchpad_file = 'my_launchpad.yaml'

    if not args.fworker_file and os.path.exists('my_fworker.yaml'):
        args.fworker_file = 'my_fworker.yaml'

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.launchpad_file:
        launchpad = LaunchPad.from_file(args.launchpad_file)
    else:
        launchpad = LaunchPad(logdir=args.logdir, strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker, None, args.logdir, args.loglvl, args.nlaunches, args.sleep)

    else:
        launch_rocket(launchpad, fworker, args.logdir, args.loglvl, args.fw_id)
Example #40
0
    parser.add_argument('--logdir', help='path to a directory for logging', default=None)
    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('--silencer', help='shortcut to mute log messages', action='store_true')
    parser.add_argument('-r', '--reserve', help='reserve a fw', action='store_true')
    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=None)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=None)

    rapid_parser.add_argument('-q', '--njobs_queue', help='maximum jobs to keep in queue for this user', default=10, type=int)
    rapid_parser.add_argument('-b', '--njobs_block', help='maximum jobs to put in a block', default=500, type=int)
    rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite")')
    rapid_parser.add_argument('--sleep', help='sleep time between loops', default=60, type=int)

    args = parser.parse_args()

    if not args.launchpad_file and os.path.exists('my_launchpad.yaml'):
        args.launchpad_file = 'my_launchpad.yaml'

    if not args.fworker_file and os.path.exists('my_fworker.yaml'):
        args.fworker_file = 'my_fworker.yaml'

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else None
    fworker = FWorker.from_file(args.fworker_file) if args.fworker_file else FWorker()
    rocket_params = QueueParams.from_file(args.queue_params_file)
    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    # TODO: the number of arguments here is crazy!
    if args.command == 'rapidfire':
        rapidfire(rocket_params, args.launch_dir, args.njobs_queue, args.njobs_block, args.loglvl, args.nlaunches, args.sleep, launchpad, fworker, args.reserve)
    else:
        launch_rocket_to_queue(rocket_params, args.launch_dir, args.loglvl, launchpad, fworker, args.reserve)
Example #41
0
 def from_dict(cls, m_dict):
     fworker = FWorker.from_dict(m_dict['fworker'])
     action = FWAction.from_dict(m_dict['action']) if m_dict.get('action') else None
     return Launch(m_dict['state'], m_dict['launch_dir'], fworker, m_dict['host'], m_dict['ip'], action,
                   m_dict['state_history'], m_dict['launch_id'], m_dict['fw_id'])
import os, json
from pymongo import DESCENDING, ASCENDING
from fireworks.fw_config import CONFIG_FILE_DIR, SORT_FWS
from fireworks.core.fworker import FWorker
from fireworks.core.launchpad import LaunchPad
from pymongo import ReturnDocument

launchpad = LaunchPad.from_file(os.path.join(CONFIG_FILE_DIR, 'my_launchpad.yaml'))
fworker = FWorker.from_file(os.path.join(CONFIG_FILE_DIR, 'my_fworker.yaml'))
#print launchpad._get_a_fw_to_run(query=fworker.query, checkout=False)
m_query = dict(fworker.query)
m_query['state'] = 'READY'
sortby = [("spec._priority", DESCENDING)]
if SORT_FWS.upper() == "FIFO":
    sortby.append(("created_on", ASCENDING))
elif SORT_FWS.upper() == "FILO":
    sortby.append(("created_on", DESCENDING))
#print json.dumps(m_query, indent=4)
projection = {
    '_id': 0, 'fw_id': 1, 'spec._fworker': 1, 'spec.task_type': 1, 'spec._queueadapter': 1,
    'spec.mpsnl.about.remarks': 1, 'spec.snl.about.remarks': 1, 'spec.prev_vasp_dir': 1,
    'updated_on': 1, 'state': 1
}

fw_ids = []
for idoc, doc in enumerate(launchpad.fireworks.find(m_query, projection=projection, sort=sortby).limit(100)):
    #print doc
    if 'walltime' in doc['spec']['_queueadapter']:
        walltime = doc['spec']['_queueadapter']['walltime'] 
        if int(walltime.split(':')[0]) > 48:
            launchpad.fireworks.find_one_and_update(