def main_basic(job, task): cfg = {"worker": ["localhost:2222", "localhost:2223"]} make_distribute_host(cfg, job, task, None, 'worker', 0) master_host = Master.master_host() this_host = ThisHost.host() host1 = Host(job, 1) hmi = DistributeGraphInfo(None, None, None, master_host) with tf.variable_scope('scope_test'): t0 = TensorVariable(VariableInfo(None, [1], tf.float32), hmi.update(name='v0')) aop = tf.assign(t0.data, tf.constant([3.])) t1 = TensorNumpyNDArray([1.0], None, hmi.update(name='v1')) t1c = t1.copy_to(host1) t1p = Tensor(t1c.data + 1, t1c.data_info, t1c.graph_info.update(name='t1_plus')) make_distribute_session() if task == 0: ptensor(t1) Server.join() if task == 1: ptensor(t1) ptensor(t1c) ptensor(t1p) ptensor(t0) ThisSession.run(aop) ptensor(t0)
def main_summation(job, task): cfg = {"master": ["localhost:2221"], "worker": ["localhost:2222", "localhost:2223"]} make_distribute_host(cfg, job, task, None, 'master', 0) master_host = Master.master_host() this_host = ThisHost.host() host0 = Host('worker', 0) host1 = Host('worker', 1) hmi = DistributeGraphInfo(None, None, None, master_host) tm = TensorNumpyNDArray([1.0], None, DistributeGraphInfo.from_graph_info(hmi, name='t0')) t0c = tm.copy_to(host0) t1c = tm.copy_to(host1) m_sum = Summation(name='summation', graph_info=DistributeGraphInfo( 'summation', None, None, host0))([t0c, t1c]) make_distribute_session() if task == 0: ptensor(tm) Server.join() if task == 1: ptensor(tm, 'tm') ptensor(t0c, 't0c') ptensor(t1c, 't1c') ptensor(m_sum)
def sino_range(self, task_index=None): if task_index is None: task_index = ThisHost.host().task_index if self._sino_range is not None: return self._maybe_broadcast_value(self._sino_range, task_index) else: return None
def main_sync(job, task): cfg = {"master": ["localhost:2221"], "worker": ["localhost:2222", "localhost:2223"]} make_distribute_host(cfg, job, task, None, 'master', 0) master_host = Master.master_host() this_host = ThisHost.host() host0 = Host('worker', 0) host1 = Host('worker', 1) def sleep(ips): for i in range(5, 0, -1): time.sleep(1) return 0 # hmi = DistributeGraphInfo(None, None, None, master_host) tm = TensorNumpyNDArray([1.0], None, DistributeGraphInfo.from_graph_info(hmi, name='t0')) tcs = [] # t0c = tm.copy_to(host0) # t1c = tm.copy_to(host1) # m_sum = Summation(name='summation', graph_info=DistributeGraphInfo( # 'summation', None, None, host0))([t0c, t1c]) ops = tf.FIFOQueue(2, tf.bool, shapes=[], name='barrier', shared_name='barrier') # ptensor(tm) if ThisHost.host() == master_host: join = ops.dequeue_many(2) else: signal = ops.enqueue(False) no = tf.constant('tmp') ops = [tf.Print(no, data=[no], message='Done_{}'.format(i), name='p_{}'.format(i)) for i in range(3)] # ops.enqueue() make_distribute_session() if ThisHost.host() == master_host: ThisSession.run(join) print('Joined.') time.sleep(2) ThisSession.run(ops[0]) # Server.join() elif ThisHost.host() == host0: ThisSession.run(signal) ThisSession.run(ops[1]) elif ThisHost.host() == host1: time.sleep(3) ThisSession.run(signal) ThisSession.run(ops[2])
def main_sync_2(job, task): cfg = {"master": ["localhost:2221"], "worker": ["localhost:2222", "localhost:2223"]} make_distribute_host(cfg, job, task, None, 'master', 0) master_host = Master.master_host() this_host = ThisHost.host() hosts = [Host('worker', i) for i in range(2)] hmi = DistributeGraphInfo(None, None, None, master_host) tm = TensorNumpyNDArray([0.0], None, DistributeGraphInfo.from_graph_info(hmi, name='tm')) t_local_var = [] t_local_copied = [] for h in hosts: ta, tv = tm.copy_to(h, True) t_local_copied.append(ta) t_local_var.append(tv) t_local_plus = [TensorTest.from_(t) for t in t_local_copied] for i in range(1): t_local_plus = [t.add_one() for t in t_local_plus] t_write_back = [] for i in range(len(hosts)): t_write_back.append(t_local_var[i].assign(t_local_plus[i])) t_global_pluses = [t.copy_to(master_host) for t in t_local_var] sm = Summation(name='summation', graph_info=hmi.update(name='summatin')) t_res = sm(t_global_pluses) br = Barrier('barrier', hosts) # ops = tf.FIFOQueue(2, tf.bool, shapes=[], # name='barrier', shared_name='barrier') # join = ops.dequeue_many(2) # signal = ops.enqueue(False) make_distribute_session() if ThisHost.host() == master_host: # ThisSession.run(join) # ThisSession.run(br) br.run() ptensor(t_res) else: time.sleep(5) ptensor(t_local_plus[task]) ptensor(t_write_back[task]) # ThisSession.run(signal) # ThisSession.run(br) br.run() Server.join()
def _maybe_broadcast_value(self, value, task_index=None, valid_type=(list, tuple)): if task_index is None: task_index = ThisHost.host().task_index if isinstance(value, valid_type): return value[task_index] else: return value
def bind_local_sino(self, task_index=None): if task_index is None: task_index = ThisHost.host().task_index if ThisHost.is_master(): logger.info("On Master node, skip bind local sino.") return else: logger.info( "On Worker node, local data for worker {}.".format(task_index)) worker_sinos = self.load_local_sino(task_index) self.worker_graphs[task_index].init_sino(worker_sinos)
def lors_ranges(self, axis, task_index=None): if task_index is None: task_index = ThisHost.host().task_index if self._lors_ranges is not None: return self._maybe_broadcast_value(self._lors_ranges[axis], task_index) elif self._lors_steps is not None: step = self._maybe_broadcast_value( self._lors_steps[axis], task_index) return [task_index * step, (task_index + 1) * step] else: return None
def bind_local_matrix(self, task_index=None): if task_index is None: task_index = ThisHost.host().task_index if ThisHost.is_master(): logger.info("On Master node, skip bind local matrix.") return else: logger.info( "On Worker node, local data for worker {}.".format(task_index)) worker_matrix = self.load_local_matrix(task_index) worker_matrix = sparse.coo_matrix(worker_matrix) self.worker_graphs[task_index].init_matrix(worker_matrix)
def bind_local_data(self, data_info, task_index=None): if task_index is None: task_index = ThisHost.host().task_index if ThisHost.is_master(): logger.info("On Master node, skip bind local data.") return else: logger.info( "On Worker node, local data for worker {}.".format(task_index)) emap, lors = self.load_local_data(data_info, task_index) self.worker_graphs[task_index].assign_efficiency_map(emap) self.worker_graphs[task_index].assign_lors(lors)
def bind_local_data(self): """ bind the static effmap data """ return map_file = self.work_directory + self.image_info.map_file #matrix_file = self.work_directory + self.Inputinfo.sm task_index = ThisHost.host().task_index if ThisHost.is_master(): logger.info("On Master node, skip bind local data.") return else: logger.info( "On Worker node, local data for worker {}.".format(task_index)) emap = self.load_local_effmap(map_file) self.worker_graphs[task_index].init_efficiency_map(emap) self.bind_local_sino() self.bind_local_matrix()
def main(job, task): tf.logging.set_verbosity(0) cfg = {"worker": ["localhost:2222", "localhost:2223"]} make_distribute_host(cfg, job, task, None, 'worker', 0) # # if task == 1: # # time.sleep(10) # with tf.device(Master.master_host().device_prefix()): # with tf.variable_scope('test'): # t1 = tf.get_variable('var', [], tf.float32) master_host = Master.master_host() this_host = ThisHost.host() host2 = Host(job, 1) hmi = DistributeGraphInfo(None, None, None, master_host) with tf.variable_scope('scope_test'): t0 = TensorVariable(VariableInfo(None, [1], tf.float32), DistributeGraphInfo.from_(hmi, name='t1')) aop = tf.assign(t0.data, tf.constant([3.])) t1 = TensorNumpyNDArray([1.0], None, DistributeGraphInfo.from_(hmi, name='t1_copy')) t1c = t1.copy_to(host2) t1p = Tensor(t1c.data + 1, t1c.data_info, DistributeGraphInfo.from_(t1c.graph_info, name='t1_plus')) # t2 = t0.copy_to(host2) make_distribute_session() if task == 0: # ThisSession.run(tf.global_variables_initializer()) ptensor(t1) Server.join() if task == 1: ptensor(t1) ptensor(t1c) ptensor(t1p) # print(t2.run()) # print(t2.data) # print(t0.run()) # print(t0) ptensor(t0) print(ThisSession.run(aop)) ptensor(t0)
def run(self): KS = self.KEYS.STEPS self.run_step_of_this_host(KS.INIT) logger.info('STEP: {} done.'.format(KS.INIT)) nb_iterations = self.Reconinfo.nb_iterations #nb_subsets = self.Reconinfo.nb_subsets image_name = self.image_info.name for i in tqdm(range(nb_iterations), ascii=True): self.run_step_of_this_host(KS.RECON) logger.info('STEP: {} done.'.format(KS.RECON)) self.run_step_of_this_host(KS.MERGE) logger.info('STEP: {} done.'.format(KS.MERGE)) self.run_and_print_if_not_master( self.worker_graphs[ThisHost.host().task].tensor( self.worker_graphs[0].KEYS.TENSOR.RESULT)) self.run_and_print_if_is_master(self.master_graph.tensor('x')) self.run_and_save_if_is_master(self.master_graph.tensor('x'), image_name + '_{}.npy'.format(i)) logger.info('Recon {} steps done.'.format(nb_iterations))
def main_add_one(job, task): cfg = {"worker": ["localhost:2222", "localhost:2223"]} make_distribute_host(cfg, job, task, None, 'worker', 0) master_host = Master.master_host() this_host = ThisHost.host() host1 = Host(job, 1) hmi = DistributeGraphInfo(None, None, None, master_host) with tf.variable_scope('scope_test'): t0 = TensorNumpyNDArray([1.0], None, hmi.update(name='v0')) t1 = TensorTest.from_(t0) t2 = t1.add_one() make_distribute_session() if task == 0: ptensor(t0) ptensor(t1) ptensor(t2) Server.join() if task == 1: ptensor(t0) ptensor(t1) ptensor(t2)
def lors_steps(self, axis, task_index=None): if task_index is None: task_index = ThisHost.host().task_index return self._lors_steps[axis]
def matrix_steps(self, task_index = None): if task_index is None: task_index = ThisHost.host().task_index return self._matrix_steps
def sino_steps(self, task_index = None): if task_index is None: task_index = ThisHost.host().task_index return self._sino_steps