Beispiel #1
0
def submit(args):
    """Submit function of local jobs."""
    def mthread_submit(nworker, nserver, envs):
        """
        customized submit script, that submit nslave jobs, each must contain args as parameter
        note this can be a lambda function containing additional parameters in input

        Parameters
        ----------
        nworker: number of slave process to start up
        nserver: number of server nodes to start up
        envs: enviroment variables to be added to the starting programs
        """
        procs = {}
        for i in range(nworker + nserver):
            if i < nworker:
                role = 'worker'
            else:
                role = 'server'
            procs[i] = Thread(target=exec_cmd,
                              args=(args.command, args.local_num_attempt, role,
                                    i, envs))
            procs[i].setDaemon(True)
            procs[i].start()

    # call submit, with nslave, the commands to run each job and submit function
    tracker.submit(args.num_workers,
                   args.num_servers,
                   fun_submit=mthread_submit,
                   pscmd=(' '.join(args.command)))
Beispiel #2
0
def submit(args):
    assert args.host_file is not None
    with open(args.host_file) as f:
        tmp = f.readlines()
    assert len(tmp) > 0
    hosts = [host.strip() for host in tmp if len(host.strip()) > 0]

    # When submit is called, the workers are assumed to have run 'grpc_worker.py'.
    def gRPC_submit(nworker, nserver, pass_envs):
        for i in range(nworker):
            worker = hosts[i]
            print('connecting to worker | ip:port | -', worker)

            # Package dmlc variables into protobuf
            dmlc_vars = fxgb_pb2.DMLC_VARS(
                DMLC_TRACKER_URI=pass_envs['DMLC_TRACKER_URI'],
                DMLC_TRACKER_PORT=pass_envs['DMLC_TRACKER_PORT'],
                DMLC_ROLE='worker',
                DMLC_NODE_HOST=worker[:worker.index(':')],
                DMLC_NUM_WORKER=pass_envs['DMLC_NUM_WORKER'],
                DMLC_NUM_SERVER=pass_envs['DMLC_NUM_SERVER'],
            )

            # spawn thread to call RPC
            thread = Thread(target=run, args=(worker, dmlc_vars))
            thread.setDaemon(True)
            thread.start()

    tracker.submit(
        args.num_workers,
        args.num_servers,
        fun_submit=gRPC_submit,
        hostIP=args.host_ip,
    )
Beispiel #3
0
def submit(args):
    assert args.host_file is not None
    with open(args.host_file) as f:
        tmp = f.readlines()
    assert len(tmp) > 0
    hosts = []
    for h in tmp:
        if len(h.strip()) > 0:
            # parse addresses of the form ip:port
            h = h.strip()
            i = h.find(":")
            p = "22"
            if i != -1:
                p = h[i + 1:]
                h = h[:i]
            # hosts now contain the pair ip, port
            hosts.append((h, p))

    def ssh_submit(nworker, nserver, pass_envs):
        """
        customized submit script
        """

        # thread func to run the job
        def run(prog):
            subprocess.check_call(prog, shell=True)

        # sync programs if necessary
        local_dir = os.getcwd() + '/'
        working_dir = local_dir
        if args.sync_dst_dir is not None and args.sync_dst_dir != 'None':
            working_dir = args.sync_dst_dir
            pool = Pool(processes=len(hosts))
            for h in hosts:
                pool.apply_async(sync_dir, args=(local_dir, h, working_dir))
            pool.close()
            pool.join()

        # launch jobs
        for i in range(nworker + nserver):
            pass_envs['DMLC_ROLE'] = 'server' if i < nserver else 'worker'
            (node, port) = hosts[i % len(hosts)]
            pass_envs['DMLC_NODE_HOST'] = node
            pass_envs['PYTHONPATH'] = '/root/singa/build/python/'
            prog = get_env(pass_envs) + ' cd ' + working_dir + '; ' + (
                ' '.join(args.command))
            prog = 'ssh -o StrictHostKeyChecking=no ' + node + ' -p ' + port + ' \'' + prog + '\''
            thread = Thread(target=run, args=(prog, ))
            thread.setDaemon(True)
            thread.start()

        return ssh_submit

    tracker.submit(args.num_workers,
                   args.num_servers,
                   fun_submit=ssh_submit,
                   pscmd=(' '.join(args.command)),
                   hostIP=args.host_ip)
Beispiel #4
0
 def run(self):
     tracker.config_logger(self.args)
     env = {
         'fun_submit': self.submit(),
         'pscmd': self.cmd,
     }
     if self.args.localhost:
         env['hostIP'] = '127.0.0.1'
     tracker.submit(self.args.num_workers, self.args.num_servers, **env)
Beispiel #5
0
 def run(self):
     tracker.config_logger(self.args)
     env = {
         'fun_submit' : self.submit(),
         'pscmd' : self.cmd,
     }
     if self.args.localhost:
         env['hostIP'] = '127.0.0.1'
     tracker.submit(self.args.num_workers,
                    self.args.num_servers,
                    **env)
Beispiel #6
0
    for k, v in pass_env.items():
        env[k] = str(v)

    env['DMLC_CPU_VCORES'] = str(args.vcores)
    env['DMLC_MEMORY_MB'] = str(args.memory_mb)
    env['DMLC_NUM_WORKER'] = str(args.nworker)
    env['DMLC_NUM_SERVER'] = str(args.server_nodes)
    env['DMLC_HDFS_OPTS'] = str(args.libhdfs_opts)

    if args.files != None:
        for flst in args.files:
            for f in flst.split('#'):
                fset.add(f)
    for f in fset:
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
    cmd += ' -queue %s ' % args.queue
    cmd += (' '.join(['./run_hdfs_prog.py'] + args.command))
    def run():
        if args.verbose != 0:
            print cmd    
        subprocess.check_call(cmd, shell = True, env = env)
    thread = Thread(target = run, args=())
    thread.setDaemon(True)
    thread.start()    


tracker.submit(args.nworker, args.server_nodes, fun_submit = yarn_submit,
               verbose = args.verbose, pscmd= (' '.join(args.command)))
Beispiel #7
0
 def run(self):
     tracker.config_logger(self.args)
     tracker.submit(self.args.num_workers,
                    self.args.num_servers,
                    fun_submit=self.submit(),
                    pscmd=self.cmd)
Beispiel #8
0
            for f in flst.split('#'):
                fset.add(f)
    for f in fset:
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
    cmd += ' -queue %s ' % args.queue
    if args.app_classpath:
        cmd += ' -appcp %s ' % args.app_classpath
    for entry in args.env:
        cmd += ' -env %s ' % entry
    cmd += (' '.join(['./run_hdfs_prog.py'] + args.command))

    def run():
        logging.debug(cmd)
        subprocess.check_call(cmd, shell=True, env=env)

    if unknown:
        cmd += ' ' + ' '.join(unknown)
    thread = Thread(target=run, args=())
    thread.setDaemon(True)
    thread.start()


tracker.config_logger(args)
tracker.submit(args.nworker,
               args.server_nodes,
               fun_submit=yarn_submit,
               pscmd=(' '.join([YARN_BOOT_PY] + args.command) + ' ' +
                      ' '.join(unknown)))
Beispiel #9
0
    env = os.environ.copy()
    for k, v in pass_env.items():
        env[k] = str(v)

    env['DMLC_CPU_VCORES'] = str(args.vcores)
    env['DMLC_MEMORY_MB'] = str(args.memory_mb)
    env['DMLC_NUM_WORKER'] = str(args.nworker)
    env['DMLC_NUM_SERVER'] = str(args.server_nodes)
    env['DMLC_HDFS_OPTS'] = str(args.libhdfs_opts)

    if args.files != None:
        for flst in args.files:
            for f in flst.split('#'):
                fset.add(f)
    for f in fset:
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
    cmd += ' -queue %s ' % args.queue
    cmd += (' '.join(['./run_hdfs_prog.py'] + args.command))
    def run():
        logging.debug(cmd)
        subprocess.check_call(cmd, shell = True, env = env)
    thread = Thread(target = run, args=())
    thread.setDaemon(True)
    thread.start()    

tracker.config_logger(args)
tracker.submit(args.nworker, args.server_nodes, fun_submit = yarn_submit,
               pscmd= (' '.join([YARN_BOOT_PY] + args.command)))
Beispiel #10
0
    env['DMLC_NUM_WORKER'] = str(args.nworker)
    env['DMLC_NUM_SERVER'] = str(args.server_nodes)
    env['DMLC_HDFS_OPTS'] = str(args.libhdfs_opts)

    if args.files != None:
        for flst in args.files:
            for f in flst.split('#'):
                fset.add(f)
    for f in fset:
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
    cmd += ' -queue %s ' % args.queue
    cmd += (' '.join(['./run_hdfs_prog.py'] + args.command))

    def run():
        if args.verbose != 0:
            print cmd
        subprocess.check_call(cmd, shell=True, env=env)

    thread = Thread(target=run, args=())
    thread.setDaemon(True)
    thread.start()


tracker.submit(args.nworker,
               args.server_nodes,
               fun_submit=yarn_submit,
               verbose=args.verbose,
               pscmd=(' '.join(args.command)))
Beispiel #11
0
 def run(self):
     tracker.config_logger(self.args)
     tracker.submit(self.args.num_workers,
                    self.args.num_servers,
                    fun_submit = self.submit(),
                    pscmd = self.cmd)
Beispiel #12
0
    """
    def run(prog):
        """"""
        subprocess.check_call(prog, shell = True)

    cmd = ''
    if args.hostfile is not None:
        cmd = '--hostfile %s' % (args.hostfile)
    cmd += ' ' + ' '.join(args.command) + ' ' + ' '.join(unknown)

    # start servers
    if nserver > 0:
        pass_envs['DMLC_ROLE'] = 'server'
        prog = 'mpirun -n %d %s %s' % (nserver, get_mpi_env(pass_envs), cmd)
        thread = Thread(target = run, args=(prog,))
        thread.setDaemon(True)
        thread.start()

    if nworker > 0:
        pass_envs['DMLC_ROLE'] = 'worker'
        prog = 'mpirun -n %d %s %s' % (nworker, get_mpi_env(pass_envs), cmd)
        thread = Thread(target = run, args=(prog,))
        thread.setDaemon(True)
        thread.start()

tracker.config_logger(args)

tracker.submit(args.nworker, args.server_nodes, fun_submit = mpi_submit,
               hostIP=args.host_ip,
               pscmd=(' '.join(args.command) + ' ' + ' '.join(unknown)))
Beispiel #13
0
 def run(self):
     utils.config_logger(self.args)
     tracker.submit(
         self.num_workers, fun_submit=self.submit(), pscmd=self.cmd)
Beispiel #14
0
        subprocess.check_call(prog, shell = True)

    cmd = ''
    if args.hostfile is not None:
        cmd = '--hostfile %s' % (args.hostfile)
    cmd += ' ' + ' '.join(args.command) + ' ' + ' '.join(unknown)

    pass_envs['SEMI_SYNC_MODE'] = 1
    pass_envs['SYNC_MODE'] = 1
    pass_envs['LEARNING_RATE'] = 0.01
    # start servers
    if nserver > 0:
        pass_envs['DMLC_ROLE'] = 'server'
        prog = 'mpirun -n %d %s %s' % (nserver, get_mpi_env(pass_envs), cmd)
        thread = Thread(target = run, args=(prog,))
        thread.setDaemon(True)
        thread.start()

    if nworker > 0:
        pass_envs['DMLC_ROLE'] = 'worker'
        prog = 'mpirun -n %d %s %s' % (nworker, get_mpi_env(pass_envs), cmd)
        thread = Thread(target = run, args=(prog,))
        thread.setDaemon(True)
        thread.start()

tracker.config_logger(args)

tracker.submit(args.nworker, args.server_nodes, fun_submit = mpi_submit,
               hostIP=args.host_ip,
               pscmd=(' '.join(args.command) + ' ' + ' '.join(unknown)))
Beispiel #15
0
    sargs = " ".join(args.command)
    if args.hostfile is None:
        cmd = "mpirun -n %d" % (nworker + nserver)
    else:
        cmd = "mpirun -n %d --hostfile %s " % (nworker + nserver, args.hostfile)

    for k, v in pass_envs.items():
        cmd += " -env %s %s" % (k, v)
        # cmd += ' -x %s' % k
    cmd += " "
    cmd += " ".join(args.command)
    cmd += " "
    cmd += " ".join(unknown)

    # print '%s' % cmd
    # known issue: results do not show in emacs eshell
    def run():
        subprocess.check_call(cmd, shell=True, env=env)

    thread = Thread(target=run, args=())
    thread.setDaemon(True)
    thread.start()


tracker.config_logger(args)
# call submit, with nslave, the commands to run each job and submit function
tracker.submit(
    args.nworker, args.server_nodes, fun_submit=mpi_submit, pscmd=(" ".join(args.command) + " " + " ".join(unknown))
)
Beispiel #16
0
                raise Exception('Get nonzero return code=%d' % ret)

#
#  Note: this submit script is only used for demo purpose
#  submission script using pyhton multi-threading
#
def mthread_submit(nworker, nserver, envs):
    """
      customized submit script, that submit nslave jobs, each must contain args as parameter
      note this can be a lambda function containing additional parameters in input
      Parameters
         nworker number of slave process to start up
         nserver number of server nodes to start up
         envs enviroment variables to be added to the starting programs
    """
    procs = {}
    for i in range(nworker + nserver):
        if i < nworker:
            role = 'worker'
        else:
            role = 'server'
        procs[i] = Thread(target = exec_cmd, args = (args.command + unknown, role, i, envs))
        procs[i].setDaemon(True)
        procs[i].start()


tracker.config_logger(args)
# call submit, with nslave, the commands to run each job and submit function
tracker.submit(args.nworker, args.server_nodes, fun_submit = mthread_submit,
               pscmd= (' '.join(args.command) + ' ' + ' '.join(unknown)))
Beispiel #17
0
    env = os.environ.copy()
    for k, v in pass_env.items():
        env[k] = str(v)

    env['DMLC_CPU_VCORES'] = str(args.vcores)
    env['DMLC_MEMORY_MB'] = str(args.memory_mb)
    env['DMLC_NUM_WORKER'] = str(args.nworker)
    env['DMLC_NUM_SERVER'] = str(args.server_nodes)
    env['DMLC_HDFS_OPTS'] = str(args.libhdfs_opts)

    if args.files != None:
        for flst in args.files:
            for f in flst.split('#'):
                fset.add(f)
    for f in fset:
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
    cmd += ' -queue %s ' % args.queue
    cmd += (' '.join(['./run_hdfs_prog.py'] + args.command))
    def run():
        logging.debug(cmd)
        subprocess.check_call(cmd, shell = True, env = env)
    thread = Thread(target = run, args=())
    thread.setDaemon(True)
    thread.start()    

tracker.config_logger(args)
tracker.submit(args.nworker, args.server_nodes, fun_submit = yarn_submit,
               pscmd= (' '.join(['../yarn/run_hdfs_prog.py'] + args.command)))