Esempio n. 1
0
def main_basic(job, task):
    cfg = {"worker": ["localhost:2222",
                      "localhost:2223"]}
    make_distribute_host(cfg, job, task, None, 'worker', 0)
    master_host = Master.master_host()
    this_host = ThisHost.host()
    host1 = Host(job, 1)
    hmi = DistributeGraphInfo(None, None, None, master_host)
    with tf.variable_scope('scope_test'):
        t0 = TensorVariable(VariableInfo(None, [1], tf.float32),
                            hmi.update(name='v0'))
        aop = tf.assign(t0.data, tf.constant([3.]))
        t1 = TensorNumpyNDArray([1.0], None,
                                hmi.update(name='v1'))
        t1c = t1.copy_to(host1)
        t1p = Tensor(t1c.data + 1, t1c.data_info,
                     t1c.graph_info.update(name='t1_plus'))
    make_distribute_session()
    if task == 0:
        ptensor(t1)
        Server.join()
    if task == 1:
        ptensor(t1)
        ptensor(t1c)
        ptensor(t1p)
        ptensor(t0)
        ThisSession.run(aop)
        ptensor(t0)
Esempio n. 2
0
def main_summation(job, task):
    cfg = {"master": ["localhost:2221"],
           "worker": ["localhost:2222",
                      "localhost:2223"]}
    make_distribute_host(cfg, job, task, None, 'master', 0)
    master_host = Master.master_host()
    this_host = ThisHost.host()
    host0 = Host('worker', 0)
    host1 = Host('worker', 1)
    hmi = DistributeGraphInfo(None, None, None, master_host)
    tm = TensorNumpyNDArray([1.0], None,
                            DistributeGraphInfo.from_graph_info(hmi, name='t0'))
    t0c = tm.copy_to(host0)
    t1c = tm.copy_to(host1)
    m_sum = Summation(name='summation', graph_info=DistributeGraphInfo(
        'summation', None, None, host0))([t0c, t1c])
    make_distribute_session()
    if task == 0:
        ptensor(tm)
        Server.join()
    if task == 1:
        ptensor(tm, 'tm')
        ptensor(t0c, 't0c')
        ptensor(t1c, 't1c')
        ptensor(m_sum)
Esempio n. 3
0
def main_sync(job, task):
    cfg = {"master": ["localhost:2221"],
           "worker": ["localhost:2222",
                      "localhost:2223"]}
    make_distribute_host(cfg, job, task, None, 'master', 0)
    master_host = Master.master_host()
    this_host = ThisHost.host()
    host0 = Host('worker', 0)
    host1 = Host('worker', 1)

    def sleep(ips):
        for i in range(5, 0, -1):
            time.sleep(1)
        return 0
    # hmi = DistributeGraphInfo(None, None, None, master_host)
    tm = TensorNumpyNDArray([1.0], None,
                            DistributeGraphInfo.from_graph_info(hmi, name='t0'))
    tcs = []
    # t0c = tm.copy_to(host0)
    # t1c = tm.copy_to(host1)
    # m_sum = Summation(name='summation', graph_info=DistributeGraphInfo(
    #     'summation', None, None, host0))([t0c, t1c])
    ops = tf.FIFOQueue(2, tf.bool, shapes=[],
                       name='barrier', shared_name='barrier')
    # ptensor(tm)
    if ThisHost.host() == master_host:
        join = ops.dequeue_many(2)
    else:
        signal = ops.enqueue(False)
    no = tf.constant('tmp')
    ops = [tf.Print(no, data=[no], message='Done_{}'.format(i), name='p_{}'.format(i))
           for i in range(3)]
    # ops.enqueue()
    make_distribute_session()
    if ThisHost.host() == master_host:
        ThisSession.run(join)
        print('Joined.')
        time.sleep(2)
        ThisSession.run(ops[0])
        # Server.join()
    elif ThisHost.host() == host0:
        ThisSession.run(signal)
        ThisSession.run(ops[1])
    elif ThisHost.host() == host1:
        time.sleep(3)
        ThisSession.run(signal)
        ThisSession.run(ops[2])
Esempio n. 4
0
def main_sync_2(job, task):
    cfg = {"master": ["localhost:2221"],
           "worker": ["localhost:2222",
                      "localhost:2223"]}
    make_distribute_host(cfg, job, task, None, 'master', 0)
    master_host = Master.master_host()
    this_host = ThisHost.host()
    hosts = [Host('worker', i) for i in range(2)]
    hmi = DistributeGraphInfo(None, None, None, master_host)
    tm = TensorNumpyNDArray([0.0], None,
                            DistributeGraphInfo.from_graph_info(hmi, name='tm'))
    t_local_var = []
    t_local_copied = []
    for h in hosts:
        ta, tv = tm.copy_to(h, True)
        t_local_copied.append(ta)
        t_local_var.append(tv)
    t_local_plus = [TensorTest.from_(t) for t in t_local_copied]
    for i in range(1):
        t_local_plus = [t.add_one() for t in t_local_plus]
    t_write_back = []
    for i in range(len(hosts)):
        t_write_back.append(t_local_var[i].assign(t_local_plus[i]))
    t_global_pluses = [t.copy_to(master_host) for t in t_local_var]
    sm = Summation(name='summation', graph_info=hmi.update(name='summatin'))
    t_res = sm(t_global_pluses)
    br = Barrier('barrier', hosts)
    # ops = tf.FIFOQueue(2, tf.bool, shapes=[],
                    #    name='barrier', shared_name='barrier')
    # join = ops.dequeue_many(2)
    # signal = ops.enqueue(False)
    make_distribute_session()
    if ThisHost.host() == master_host:
        # ThisSession.run(join)
        # ThisSession.run(br)
        br.run()
        ptensor(t_res)
    else:
        time.sleep(5)
        ptensor(t_local_plus[task])
        ptensor(t_write_back[task])
        # ThisSession.run(signal)
        # ThisSession.run(br)
        br.run()
    Server.join()
Esempio n. 5
0
def main(job, task):
    tf.logging.set_verbosity(0)
    cfg = {"worker": ["localhost:2222",
                      "localhost:2223"]}
    make_distribute_host(cfg, job, task, None, 'worker', 0)
    # # if task == 1:
    #     # time.sleep(10)
    # with tf.device(Master.master_host().device_prefix()):
    #     with tf.variable_scope('test'):
    #         t1 = tf.get_variable('var', [], tf.float32)
    master_host = Master.master_host()
    this_host = ThisHost.host()
    host2 = Host(job, 1)
    hmi = DistributeGraphInfo(None, None, None, master_host)
    with tf.variable_scope('scope_test'):
        t0 = TensorVariable(VariableInfo(None, [1], tf.float32),
                            DistributeGraphInfo.from_(hmi, name='t1'))
        aop = tf.assign(t0.data, tf.constant([3.]))
        t1 = TensorNumpyNDArray([1.0], None,
                                DistributeGraphInfo.from_(hmi, name='t1_copy'))
        t1c = t1.copy_to(host2)
        t1p = Tensor(t1c.data + 1, t1c.data_info, DistributeGraphInfo.from_(t1c.graph_info, name='t1_plus'))
        # t2 = t0.copy_to(host2)
    make_distribute_session()
    if task == 0:
        # ThisSession.run(tf.global_variables_initializer())
        ptensor(t1)
        Server.join()
    if task == 1:
        ptensor(t1)
        ptensor(t1c)
        ptensor(t1p)
        # print(t2.run())
        # print(t2.data)
        # print(t0.run())
        # print(t0)
        ptensor(t0)
        print(ThisSession.run(aop))
        ptensor(t0)
Esempio n. 6
0
def main_add_one(job, task):
    cfg = {"worker": ["localhost:2222",
                      "localhost:2223"]}
    make_distribute_host(cfg, job, task, None, 'worker', 0)
    master_host = Master.master_host()
    this_host = ThisHost.host()
    host1 = Host(job, 1)
    hmi = DistributeGraphInfo(None, None, None, master_host)
    with tf.variable_scope('scope_test'):
        t0 = TensorNumpyNDArray([1.0], None,
                                hmi.update(name='v0'))
        t1 = TensorTest.from_(t0)
        t2 = t1.add_one()
    make_distribute_session()
    if task == 0:
        ptensor(t0)
        ptensor(t1)
        ptensor(t2)
        Server.join()
    if task == 1:
        ptensor(t0)
        ptensor(t1)
        ptensor(t2)
Esempio n. 7
0
    def __init__(self, job, task_index, task_info, distribute_configs):
        super().__init__(distribute_configs)
        self.job = job
        self.task_index = task_index
        self.task_info = task_info
        # initialize the cluster infomation
        self.cluster_init(job, task_index)
        # parse the task informations
        self.parse_task()

        self.pre_works()
        # do pre works needed before create the maps

        # create the master and worker graphs
        self.make_graphs()

        # binding static local data
        self.bind_local_data()

        # set the steps
        self.make_steps()
        # create the distribute session.
        make_distribute_session()