Exemple #1
0
 def test_early_exit(self):
     os.chdir(MODULE_DIR)
     script_text = "echo hello from process $PPID; sleep 2"
     fw1 = Firework(ScriptTask.from_str(shell_cmd=script_text,
                                        parameters={"stdout_file": "task.out"}),
                    fw_id=1)
     fw2 = Firework(ScriptTask.from_str(shell_cmd=script_text,
                                        parameters={"stdout_file": "task.out"}),
                    fw_id=2)
     fw3 = Firework(ScriptTask.from_str(shell_cmd=script_text,
                                        parameters={"stdout_file": "task.out"}),
                    fw_id=3)
     fw4 = Firework(ScriptTask.from_str(shell_cmd=script_text,
                                        parameters={"stdout_file": "task.out"}),
                    fw_id=4)
     wf = Workflow([fw1, fw2, fw3, fw4], {1: [2, 3], 2: [4], 3: [4]})
     self.lp.add_wf(wf)
     launch_multiprocess(self.lp, FWorker(), 'DEBUG', 0, 2, sleep_time=0.5)
     fw2 = self.lp.get_fw_by_id(2)
     fw3 = self.lp.get_fw_by_id(3)
     with open(os.path.join(fw2.launches[0].launch_dir, "task.out")) as f:
         fw2_text = f.read()
     with open(os.path.join(fw3.launches[0].launch_dir, "task.out")) as f:
         fw3_text = f.read()
     self.assertNotEqual(fw2_text, fw3_text)
Exemple #2
0
    def test_tracker_mlaunch(self):
        """
        Test the tracker for mlaunch
        """
        self._teardown([self.dest1, self.dest2])
        try:

            def add_wf(j, dest, tracker, name):
                fts = []
                for i in range(j, j + 25):
                    ft = ScriptTask.from_str(
                        'echo "' + str(i) + '" >> ' + dest,
                        {'store_stdout': True})
                    fts.append(ft)
                fw1 = Firework(fts,
                               spec={'_trackers': [tracker]},
                               fw_id=j + 1,
                               name=name + '1')

                fts = []
                for i in range(j + 25, j + 50):
                    ft = ScriptTask.from_str(
                        'echo "' + str(i) + '" >> ' + dest,
                        {'store_stdout': True})
                    fts.append(ft)
                fw2 = Firework(fts,
                               spec={'_trackers': [tracker]},
                               fw_id=j + 2,
                               name=name + '2')
                wf = Workflow([fw1, fw2], links_dict={fw1: [fw2]})
                self.lp.add_wf(wf)

            add_wf(0, self.dest1, self.tracker1, 'a_test')
            add_wf(50, self.dest2, self.tracker2, 'b_test')

            try:
                launch_multiprocess(self.lp,
                                    self.fworker,
                                    'ERROR',
                                    0,
                                    2,
                                    0,
                                    ppn=2)
            except:
                pass

            self.assertEqual('48\n49', self.tracker1.track_file())
            self.assertEqual('98\n99', self.tracker2.track_file())

        finally:
            self._teardown([self.dest1, self.dest2])
            pwd = os.getcwd()
            for ldir in glob.glob(os.path.join(pwd, 'launcher_*')):
                shutil.rmtree(ldir)
            pass
Exemple #3
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0)
    parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int)
    parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)', default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str)
    parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int)

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                        args.sleep, total_node_list, args.ppn, timeout=args.timeout)
Exemple #4
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job (int or "infinite"; default 0 is all jobs in DB)', default=0)
    parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)', default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file (used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    parser.add_argument('--nodefile', help='nodefile name or environment variable name containing the node file name (for populating FWData only)', default=None, type=str)
    parser.add_argument('--ppn', help='processors per node (for populating FWData only)', default=1, type=int)

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                        args.sleep, total_node_list, args.ppn)
Exemple #5
0
    def test_tracker_mlaunch(self):
        """
        Test the tracker for mlaunch
        """
        self._teardown([self.dest1,self.dest2])
        try:
            def add_wf(j, dest, tracker, name):
                fts =  []
                for i in range(j,j+25):
                    ft = ScriptTask.from_str('echo "' + str(i) + '" >> '+ dest,
                                              {'store_stdout':True})
                    fts.append(ft)
                fw1 = Firework(fts, spec={'_trackers':[tracker]},
                               fw_id=j+1, name=name+'1')

                fts = []
                for i in range(j+25,j+50):
                    ft = ScriptTask.from_str('echo "' + str(i) + '" >> ' + dest,
                                              {'store_stdout':True})
                    fts.append(ft)
                fw2 = Firework(fts, spec={'_trackers':[tracker]},
                               fw_id=j+2, name=name+'2')
                wf = Workflow([fw1, fw2], links_dict={fw1:[fw2]})
                self.lp.add_wf(wf)

            add_wf(0, self.dest1, self.tracker1, 'a_test')
            add_wf(50, self.dest2, self.tracker2, 'b_test')

            try:
                launch_multiprocess(self.lp, self.fworker, 'ERROR',
                                    0, 2, 0, ppn=2)
            except:
                pass

            self.assertEqual('48\n49',self.tracker1.track_file())
            self.assertEqual('98\n99',self.tracker2.track_file())


        finally:
            self._teardown([self.dest1,self.dest2])
            pwd = os.getcwd()
            for ldir in glob.glob(os.path.join(pwd,'launcher_*')):
                shutil.rmtree(ldir)
            pass
Exemple #6
0
 def test_checkout_fw(self):
     os.chdir(MODULE_DIR)
     self.lp.add_wf(Firework(ScriptTask.from_str(
         shell_cmd='echo "hello 1"',
         parameters={"stdout_file": "task.out"}), fw_id=1))
     self.lp.add_wf(Firework(ScriptTask.from_str(
         shell_cmd='echo "hello 2"',
         parameters={"stdout_file": "task.out"}), fw_id=2))
     launch_multiprocess(self.lp, FWorker(), 'DEBUG', 0, 2, 10)
     fw1 = self.lp.get_fw_by_id(1)
     fw2 = self.lp.get_fw_by_id(2)
     self.assertEqual(fw1.launches[0].state_history[-1]["state"],
                      "COMPLETED")
     self.assertEqual(fw2.launches[0].state_history[-1]["state"],
                      "COMPLETED")
     with open(os.path.join(fw1.launches[0].launch_dir, "task.out")) as f:
         self.assertEqual(f.readlines(), ['hello 1\n'])
     with open(os.path.join(fw2.launches[0].launch_dir, "task.out")) as f:
         self.assertEqual(f.readlines(), ['hello 2\n'])
def arlaunch():
    """
    Function rapid-fire job launching
    """
    m_description = 'This program launches one or more Rockets. A Rocket retrieves a job from the ' \
                    'central database and runs it. The "single-shot" option launches a single Rocket, ' \
                    'whereas the "rapidfire" option loops until all FireWorks are completed.'

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help='command', dest='command')
    single_parser = subparsers.add_parser('singleshot',
                                          help='launch a single Rocket')
    rapid_parser = subparsers.add_parser(
        'rapidfire',
        help='launch multiple Rockets (loop until all FireWorks complete)')
    multi_parser = subparsers.add_parser(
        'multi', help='launches multiple Rockets simultaneously')

    single_parser.add_argument('-f',
                               '--fw_id',
                               help='specific fw_id to run',
                               default=None,
                               type=int)
    single_parser.add_argument('--offline',
                               help='run in offline mode (FW.json required)',
                               action='store_true')
    single_parser.add_argument('--pdb',
                               help='shortcut to invoke debugger on error',
                               action='store_true')

    rapid_parser.add_argument('--nlaunches',
                              help='num_launches (int or "infinite"; '
                              'default 0 is all jobs in DB)',
                              default=0)
    rapid_parser.add_argument(
        '--timeout',
        help='timeout (secs) after which to quit (default None)',
        default=None,
        type=int)
    rapid_parser.add_argument(
        '--max_loops',
        help='after this many sleep loops, quit even in '
        'infinite nlaunches mode (default -1 is infinite loops)',
        default=-1,
        type=int)
    rapid_parser.add_argument('--sleep',
                              help='sleep time between loops (secs)',
                              default=None,
                              type=int)
    rapid_parser.add_argument(
        '--local_redirect',
        help="Redirect stdout and stderr to the launch directory",
        action="store_true")

    multi_parser.add_argument('num_jobs',
                              help='the number of jobs to run in parallel',
                              type=int)
    multi_parser.add_argument('--nlaunches',
                              help='number of FireWorks to run in series per '
                              'parallel job (int or "infinite"; default 0 is '
                              'all jobs in DB)',
                              default=0)
    multi_parser.add_argument(
        '--sleep',
        help='sleep time between loops in infinite launch mode'
        '(secs)',
        default=None,
        type=int)
    multi_parser.add_argument(
        '--timeout',
        help='timeout (secs) after which to quit (default None)',
        default=None,
        type=int)
    multi_parser.add_argument(
        '--nodefile',
        help='nodefile name or environment variable name '
        'containing the node file name (for populating'
        ' FWData only)',
        default=None,
        type=str)
    multi_parser.add_argument(
        '--ppn',
        help='processors per node (for populating FWData only)',
        default=1,
        type=int)
    multi_parser.add_argument('--exclude_current_node',
                              help="Don't use the script launching node"
                              "as compute node",
                              action="store_true")
    multi_parser.add_argument(
        '--local_redirect',
        help="Redirect stdout and stderr to the launch directory",
        action="store_true")

    parser.add_argument('-l',
                        '--launchpad_file',
                        help='path to launchpad file')
    parser.add_argument('-w',
                        '--fworker_file',
                        required=True,
                        help='path to fworker file')
    parser.add_argument('-c',
                        '--config_dir',
                        help='path to a directory containing the config file '
                        '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl',
                        help='level to print log messages',
                        default='INFO')
    parser.add_argument('-s',
                        '--silencer',
                        help='shortcut to mute log messages',
                        action='store_true')

    try:
        import argcomplete
        argcomplete.autocomplete(parser)
        # This supports bash autocompletion. To enable this, pip install
        # argcomplete, activate global completion, or add
        #      eval "$(register-python-argcomplete rlaunch)"
        # into your .bash_profile or .bashrc
    except ImportError:
        pass

    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exit on ^C

    if not args.launchpad_file and os.path.exists(
            os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir,
                                           'my_launchpad.yaml')
    elif not args.launchpad_file:
        args.launchpad_file = LAUNCHPAD_LOC

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'singleshot' and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(
            args.launchpad_file) if args.launchpad_file else LaunchPad(
                strm_lvl=args.loglvl)

    fworker = AiiDAFWorker.from_file(args.fworker_file)

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == 'rapidfire':
        rapidfire(launchpad,
                  fworker=fworker,
                  m_dir=None,
                  nlaunches=args.nlaunches,
                  max_loops=args.max_loops,
                  sleep_time=args.sleep,
                  strm_lvl=args.loglvl,
                  timeout=args.timeout,
                  local_redirect=args.local_redirect)
    elif args.command == 'multi':
        total_node_list = None
        if args.nodefile:
            if args.nodefile in os.environ:
                args.nodefile = os.environ[args.nodefile]
            with open(args.nodefile, 'r') as fhandle:
                total_node_list = [
                    line.strip() for line in fhandle.readlines()
                ]
        launch_multiprocess(launchpad,
                            fworker,
                            args.loglvl,
                            args.nlaunches,
                            args.num_jobs,
                            args.sleep,
                            total_node_list,
                            args.ppn,
                            timeout=args.timeout,
                            exclude_current_node=args.exclude_current_node,
                            local_redirect=args.local_redirect)
    else:
        launch_rocket(launchpad,
                      fworker,
                      args.fw_id,
                      args.loglvl,
                      pdb_on_exception=args.pdb)
Exemple #8
0
def rlaunch():

    m_description = 'This program launches one or more Rockets. A Rocket grabs a job from the ' \
                    'central database and runs it. The "single-shot" option launches a single Rocket, ' \
                    'whereas the "rapidfire" option loops until all FireWorks are completed.'

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help='command', dest='command')
    single_parser = subparsers.add_parser('singleshot', help='launch a single Rocket')
    rapid_parser = subparsers.add_parser('rapidfire',
                                         help='launch multiple Rockets (loop until all FireWorks complete)')
    multi_parser = subparsers.add_parser('multi',
                                         help='launches multiple Rockets simultaneously')

    single_parser.add_argument('-f', '--fw_id', help='specific fw_id to run', default=None, type=int)
    single_parser.add_argument('--offline', help='run in offline mode (FW.json required)', action='store_true')

    rapid_parser.add_argument('--nlaunches', help='num_launches (int or "infinite"; '
                                                  'default 0 is all jobs in DB)', default=0)
    rapid_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)',
                              default=None, type=int)
    rapid_parser.add_argument('--max_loops', help='after this many sleep loops, quit even in '
                                                  'infinite nlaunches mode (default -1 is infinite loops)',
                              default=-1, type=int)
    rapid_parser.add_argument('--sleep', help='sleep time between loops (secs)', default=None,
                              type=int)

    multi_parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    multi_parser.add_argument('--nlaunches', help='number of FireWorks to run in series per '
                                                  'parallel job (int or "infinite"; default 0 is '
                                                  'all jobs in DB)',
                              default=0)
    multi_parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode'
                                              '(secs)',
                              default=None, type=int)
    multi_parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)',
                              default=None, type=int)
    multi_parser.add_argument('--nodefile', help='nodefile name or environment variable name '
                                                 'containing the node file name (for populating'
                                                 ' FWData only)',
                              default=None, type=str)
    multi_parser.add_argument('--ppn', help='processors per node (for populating FWData only)',
                              default=1, type=int)
    multi_parser.add_argument('--exclude_current_node', help="Don't use the script launching node"
                                                             "as compute node",
                              action="store_true")

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file', default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file', default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file '
                                                   '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')


    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exit on ^C

    if not args.launchpad_file and os.path.exists(os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and os.path.exists(os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    if args.command == 'singleshot' and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(
            strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == 'rapidfire':
        rapidfire(launchpad, fworker=fworker, m_dir=None, nlaunches=args.nlaunches,
                  max_loops=args.max_loops, sleep_time=args.sleep, strm_lvl=args.loglvl,
                  timeout=args.timeout)
    elif args.command == 'multi':
        total_node_list = None
        if args.nodefile:
            if args.nodefile in os.environ:
                args.nodefile = os.environ[args.nodefile]
            with open(args.nodefile, 'r') as f:
                total_node_list = [line.strip() for line in f.readlines()]

        launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                            args.sleep, total_node_list, args.ppn, timeout=args.timeout,
                            exclude_current_node=args.exclude_current_node)
    else:
        launch_rocket(launchpad, fworker, args.fw_id, args.loglvl)
Exemple #9
0
 def runParallel(self):
     #test
     #launch two jobs simultaneously (2, one on each core)
     launch_multiprocess(self.launchpad, self.worker, 'INFO', 0, 2, 10)
Exemple #10
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs',
                        help='the number of jobs to run in parallel',
                        type=int)
    parser.add_argument(
        '--nlaunches',
        help='number of FireWorks to run in series per parallel job '
        '(int or "infinite"; default 0 is all jobs in DB)',
        default=0)
    parser.add_argument(
        '--sleep',
        help='sleep time between loops in infinite launch mode (secs)',
        default=None,
        type=int)
    parser.add_argument(
        '--timeout',
        help='timeout (secs) after which to quit (default None)',
        default=None,
        type=int)

    parser.add_argument('-l',
                        '--launchpad_file',
                        help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w',
                        '--fworker_file',
                        help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c',
                        '--config_dir',
                        help='path to a directory containing the config file '
                        '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl',
                        help='level to print log messages',
                        default='INFO')
    parser.add_argument('-s',
                        '--silencer',
                        help='shortcut to mute log messages',
                        action='store_true')

    parser.add_argument(
        '--nodefile',
        help='nodefile name or environment variable name containing '
        'the node file name (for populating FWData only)',
        default=None,
        type=str)
    parser.add_argument(
        '--ppn',
        help='processors per node (for populating FWData only)',
        default=1,
        type=int)
    parser.add_argument(
        '--exclude_current_node',
        help="Don't use the script launching node as compute node",
        action="store_true")

    try:
        import argcomplete
        argcomplete.autocomplete(parser)
        # This supports bash autocompletion. To enable this, pip install
        # argcomplete, activate global completion, or add
        #      eval "$(register-python-argcomplete mlaunch)"
        # into your .bash_profile or .bashrc
    except ImportError:
        pass

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(
            os.path.join(args.config_dir, 'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir,
                                           'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(
            os.path.join(args.config_dir, 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(
        args.launchpad_file) if args.launchpad_file else LaunchPad(
            strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad,
                        fworker,
                        args.loglvl,
                        args.nlaunches,
                        args.num_jobs,
                        args.sleep,
                        total_node_list,
                        args.ppn,
                        timeout=args.timeout,
                        exclude_current_node=args.exclude_current_node)
Exemple #11
0
def mlaunch():

    m_description = 'This program launches multiple Rockets simultaneously'

    parser = ArgumentParser(description=m_description)

    parser.add_argument('num_jobs', help='the number of jobs to run in parallel', type=int)
    parser.add_argument('--nlaunches', help='number of FireWorks to run in series per parallel job '
                                            '(int or "infinite"; default 0 is all jobs in DB)', default=0)
    parser.add_argument('--sleep', help='sleep time between loops in infinite launch mode (secs)',
                        default=None, type=int)
    parser.add_argument('--timeout', help='timeout (secs) after which to quit (default None)',
                        default=None, type=int)

    parser.add_argument('-l', '--launchpad_file', help='path to launchpad file',
                        default=LAUNCHPAD_LOC)
    parser.add_argument('-w', '--fworker_file', help='path to fworker file',
                        default=FWORKER_LOC)
    parser.add_argument('-c', '--config_dir', help='path to a directory containing the config file '
                                                   '(used if -l, -w unspecified)',
                        default=CONFIG_FILE_DIR)

    parser.add_argument('--loglvl', help='level to print log messages', default='INFO')
    parser.add_argument('-s', '--silencer', help='shortcut to mute log messages', action='store_true')

    parser.add_argument('--nodefile', help='nodefile name or environment variable name containing '
                                           'the node file name (for populating FWData only)',
                        default=None, type=str)
    parser.add_argument('--ppn', help='processors per node (for populating FWData only)',
                        default=1, type=int)
    parser.add_argument('--exclude_current_node', help="Don't use the script launching node as compute node",
                        action="store_true")

    try:
        import argcomplete
        argcomplete.autocomplete(parser)
        # This supports bash autocompletion. To enable this, pip install
        # argcomplete, activate global completion, or add
        #      eval "$(register-python-argcomplete mlaunch)"
        # into your .bash_profile or .bashrc
    except ImportError:
        pass

    args = parser.parse_args()

    if not args.launchpad_file and args.config_dir and os.path.exists(os.path.join(args.config_dir,
                                                                                   'my_launchpad.yaml')):
        args.launchpad_file = os.path.join(args.config_dir, 'my_launchpad.yaml')

    if not args.fworker_file and args.config_dir and os.path.exists(os.path.join(args.config_dir,
                                                                                 'my_fworker.yaml')):
        args.fworker_file = os.path.join(args.config_dir, 'my_fworker.yaml')

    args.loglvl = 'CRITICAL' if args.silencer else args.loglvl

    launchpad = LaunchPad.from_file(args.launchpad_file) if args.launchpad_file else LaunchPad(strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    total_node_list = None
    if args.nodefile:
        if args.nodefile in os.environ:
            args.nodefile = os.environ[args.nodefile]
        with open(args.nodefile, 'r') as f:
            total_node_list = [line.strip() for line in f.readlines()]

    launch_multiprocess(launchpad, fworker, args.loglvl, args.nlaunches, args.num_jobs,
                        args.sleep, total_node_list, args.ppn, timeout=args.timeout,
                        exclude_current_node=args.exclude_current_node)
Exemple #12
0
def rlaunch():
    m_description = (
        "This program launches one or more Rockets. A Rocket retrieves a job from the "
        'central database and runs it. The "single-shot" option launches a single Rocket, '
        'whereas the "rapidfire" option loops until all FireWorks are completed.'
    )

    parser = ArgumentParser(description=m_description)
    subparsers = parser.add_subparsers(help="command", dest="command")
    single_parser = subparsers.add_parser("singleshot",
                                          help="launch a single Rocket")
    rapid_parser = subparsers.add_parser(
        "rapidfire",
        help="launch multiple Rockets (loop until all FireWorks complete)")
    multi_parser = subparsers.add_parser(
        "multi", help="launches multiple Rockets simultaneously")

    single_parser.add_argument("-f",
                               "--fw_id",
                               help="specific fw_id to run",
                               default=None,
                               type=int)
    single_parser.add_argument("--offline",
                               help="run in offline mode (FW.json required)",
                               action="store_true")
    single_parser.add_argument("--pdb",
                               help="shortcut to invoke debugger on error",
                               action="store_true")

    rapid_parser.add_argument("--nlaunches",
                              help='num_launches (int or "infinite"; '
                              "default 0 is all jobs in DB)",
                              default=0)
    rapid_parser.add_argument(
        "--timeout",
        help="timeout (secs) after which to quit (default None)",
        default=None,
        type=int)
    rapid_parser.add_argument(
        "--max_loops",
        help=
        "after this many sleep loops, quit even in infinite nlaunches mode (default -1 is infinite loops)",
        default=-1,
        type=int,
    )
    rapid_parser.add_argument("--sleep",
                              help="sleep time between loops (secs)",
                              default=None,
                              type=int)
    rapid_parser.add_argument(
        "--local_redirect",
        help="Redirect stdout and stderr to the launch directory",
        action="store_true")

    multi_parser.add_argument("num_jobs",
                              help="the number of jobs to run in parallel",
                              type=int)
    multi_parser.add_argument(
        "--nlaunches",
        help="number of FireWorks to run in series per "
        'parallel job (int or "infinite"; default 0 is '
        "all jobs in DB)",
        default=0,
    )
    multi_parser.add_argument(
        "--sleep",
        help="sleep time between loops in infinite launch mode (secs)",
        default=None,
        type=int)
    multi_parser.add_argument(
        "--timeout",
        help="timeout (secs) after which to quit (default None)",
        default=None,
        type=int)
    multi_parser.add_argument(
        "--nodefile",
        help="nodefile name or environment variable name "
        "containing the node file name (for populating"
        " FWData only)",
        default=None,
        type=str,
    )
    multi_parser.add_argument(
        "--ppn",
        help="processors per node (for populating FWData only)",
        default=1,
        type=int)
    multi_parser.add_argument(
        "--exclude_current_node",
        help="Don't use the script launching node as compute node",
        action="store_true")
    multi_parser.add_argument(
        "--local_redirect",
        help="Redirect stdout and stderr to the launch directory",
        action="store_true")

    parser.add_argument("-l",
                        "--launchpad_file",
                        help="path to launchpad file")
    parser.add_argument("-w", "--fworker_file", help="path to fworker file")
    parser.add_argument(
        "-c",
        "--config_dir",
        help=
        "path to a directory containing the config file (used if -l, -w unspecified)",
        default=CONFIG_FILE_DIR,
    )

    parser.add_argument("--loglvl",
                        help="level to print log messages",
                        default="INFO")
    parser.add_argument("-s",
                        "--silencer",
                        help="shortcut to mute log messages",
                        action="store_true")

    try:
        import argcomplete

        argcomplete.autocomplete(parser)
        # This supports bash autocompletion. To enable this, pip install
        # argcomplete, activate global completion, or add
        #      eval "$(register-python-argcomplete rlaunch)"
        # into your .bash_profile or .bashrc
    except ImportError:
        pass

    args = parser.parse_args()

    signal.signal(signal.SIGINT, handle_interrupt)  # graceful exit on ^C

    if not args.launchpad_file and os.path.exists(
            os.path.join(args.config_dir, "my_launchpad.yaml")):
        args.launchpad_file = os.path.join(args.config_dir,
                                           "my_launchpad.yaml")
    elif not args.launchpad_file:
        args.launchpad_file = LAUNCHPAD_LOC

    if not args.fworker_file and os.path.exists(
            os.path.join(args.config_dir, "my_fworker.yaml")):
        args.fworker_file = os.path.join(args.config_dir, "my_fworker.yaml")
    elif not args.fworker_file:
        args.fworker_file = FWORKER_LOC

    args.loglvl = "CRITICAL" if args.silencer else args.loglvl

    if args.command == "singleshot" and args.offline:
        launchpad = None
    else:
        launchpad = LaunchPad.from_file(
            args.launchpad_file) if args.launchpad_file else LaunchPad(
                strm_lvl=args.loglvl)

    if args.fworker_file:
        fworker = FWorker.from_file(args.fworker_file)
    else:
        fworker = FWorker()

    # prime addr lookups
    _log = get_fw_logger("rlaunch", stream_level="INFO")
    _log.info("Hostname/IP lookup (this will take a few seconds)")
    get_my_host()
    get_my_ip()

    if args.command == "rapidfire":
        rapidfire(
            launchpad,
            fworker=fworker,
            m_dir=None,
            nlaunches=args.nlaunches,
            max_loops=args.max_loops,
            sleep_time=args.sleep,
            strm_lvl=args.loglvl,
            timeout=args.timeout,
            local_redirect=args.local_redirect,
        )
    elif args.command == "multi":
        total_node_list = None
        if args.nodefile:
            if args.nodefile in os.environ:
                args.nodefile = os.environ[args.nodefile]
            with open(args.nodefile) as f:
                total_node_list = [line.strip() for line in f.readlines()]
        launch_multiprocess(
            launchpad,
            fworker,
            args.loglvl,
            args.nlaunches,
            args.num_jobs,
            args.sleep,
            total_node_list,
            args.ppn,
            timeout=args.timeout,
            exclude_current_node=args.exclude_current_node,
            local_redirect=args.local_redirect,
        )
    else:
        launch_rocket(launchpad,
                      fworker,
                      args.fw_id,
                      args.loglvl,
                      pdb_on_exception=args.pdb)