def __do_grad_average(self): how_much_nodes = GlobalSettings.get_default().node_count if self.__current_recv == how_much_nodes: # 执行梯度平均 self.set_result(self.__global_weights / how_much_nodes) # 重设梯度值,等待下一批次的循环 self.__global_weights = np.asarray(0.0) self.__current_recv = 0
def receive_blocks( self, content: Tuple[int, ndarray] ) -> Union[Iterable[netEncapsulation], netEncapsulation, None]: """ PA Server receive a json_dict and send back a request :param content: :return: """ # update global current state self.Bak_Weights_Node[content[0]] = content[1] if len(self.Bak_Weights_Node) == GlobalSettings.get_default( ).node_count: global_weight = np.mean(list(self.Bak_Weights_Node.values()), axis=0) self.dispose() return netEncapsulation(GlobalSettings.get_default().nodes, (Parameter_Server, global_weight))
def check_for_combine(self): if len(self.BlockWeights) < GlobalSettings.get_default().block_count: return res = 0 for val in self.BlockWeights.values(): res += val self.set_result(res / len(self.BlockWeights)) self.BlockWeights.clear()
def update_blocks( self, block_weight: BlockWeight ) -> netEncapsulation[Tuple[int, ndarray]]: """ Try collect all blocks. """ self.BlockWeights[block_weight.block_id] = block_weight.content self.check_for_combine() send_to = GlobalSettings.get_default().get_adversary( block_weight.block_id) return netEncapsulation(send_to, (block_weight.block_id, block_weight.content))
def update_blocks(self, block_weight: BlockWeight) -> netEncapsulation[Dict[str, np.ndarray]]: print('Weights delta received.') print('from block: {}'.format(block_weight.block_id)) print('It has a content with shape: {}'.format(block_weight.content.shape)) # 获取没有该数据的节点 send_to = GlobalSettings.get_default().get_adversary(block_weight.block_id) # 我们使用 'data' 字符串来标记我们的梯度内容 pkg = { 'data': block_weight.content } # 记录本机梯度 self.__global_weights += block_weight.content self.__current_recv += 1 # 检查是否接受完所有数据 self.__do_grad_average() # 发送梯度 return netEncapsulation(send_to, pkg)
def start(self, com: ICommunication_Controller) -> None: state, report = self.__check() self.__log.log_message("Ready:{} \n\t Check List:\n\t\t--> {}".format(state, "\n\t\t--> ".join(report))) # get dataset train_x, train_y, test_x, test_y = self.__data.load() self.__log.log_message('Dataset is ready, type: ({})'.format(self.__data)) # build data feeder block_ids = GlobalSettings.get_default().node_2_block[com.Node_Id] feeder = PSGDBlockDataFeeder(train_x, train_y, batch_iter=self.__batch_iter, block_ids=block_ids) # assemble optimizer self.__optimizer.assemble(transfer=self.__trans, block_mgr=feeder) # compile model self.__model.compile(self.__optimizer) # summary summary = self.__model.summary() self.__log.log_message(summary) trace_head = '{}-N({})'.format(self.__misc.mission_title, self.node_id) self.__log.log_message('Model set to ready.') log_head = self.__log.Title # start ! GlobalSettings.deprecated_global_logger = self.__log self.__trans.start_transfer(com, group_offset=list(self.group)[0], printer=self.__log) # record data time_start = time.time() data_send_start = com.Com.bytes_sent data_recv_start = com.Com.bytes_read evaluation_history = [] title = [] r = {} # do until reach the target accuracy for i in range(self.__misc.epoch): # change title self.__log.Title = log_head + "-Epo-{}".format(i + 1) history = self.__model.fit(feeder, epoch=1, printer=self.__log) # do tests r = self.__model.evaluate(test_x, test_y) title = r.keys() row = r.values() self.__log.log_message('Evaluate result: {}'.format(r)) evaluation_history.append(row) if self.__misc.target_acc is not None: # only one metric in model metrics list. # evaluation[0] refers to loss # evaluation[1] refers to accuracy. if r[1] > self.__misc.target_acc: break # record data time_end = time.time() data_sent_end = com.Com.bytes_sent data_recv_end = com.Com.bytes_read training_history = self.__model.fit_history() # save training history data training_name = "TR-" + trace_head + ".csv" training_trace = pd.DataFrame(training_history.history, columns=training_history.title) training_trace.to_csv(training_name, index=False) # save evaluation history data evaluation_name = "EV-" + trace_head + ".csv" evaluation_trace = pd.DataFrame(evaluation_history, columns=title) evaluation_trace.to_csv(evaluation_name, index=False) # save model model_name = "MODEL-" + trace_head + ".model" self.__model.compile(nn.gradient_descent.SGDOptimizer(learn_rate=1e-5)) self.__model.save(model_name) self.__trace_filename.append(training_name) self.__trace_filename.append(evaluation_name) self.__trace_filename.append(model_name) self.__log.log_message('Execution complete, time: {}.'.format(time_end - time_start)) self.__log.log_message('Execution complete, Total bytes sent: {}.'.format(data_sent_end - data_send_start)) self.__log.log_message('Execution complete, Total bytes read: {}.'.format(data_recv_end - data_recv_start)) self.__log.log_message('Trace file has been saved to {}.'.format(trace_head)) # set marker self.__done = True # dispose self.__model.clear() del train_x, train_y, test_x, test_y # return last evaluation result return r
def record(self, message: str): from codec import GlobalSettings GlobalSettings.global_logger().log_message( "Codec: {}, Report: {}.".format(self.__class__.__name__, message))
# const parameters SLAVE_CNT = 4 REDUNDANCY = 1 TEST_ROUNDS = 10 WEIGHTS_SHAPE = np.random.randint(3, 1024, size=2) LAYER = 0 BATCHSIZE = 64 SYNCWAITTIMEOUT = 1000 #ms # setup global parameters GlobalSettings.deprecated_default_settings = DuplicateAssignment( SLAVE_CNT, REDUNDANCY) # default setting Default = GlobalSettings.get_default() # build codec slave_codec = [SLAVE_CODEC(node_id=i) for i in range(SLAVE_CNT)] for i in range(TEST_ROUNDS): # starting consensus stage node_id = 0 for slave in slave_codec: # build each block for block_id in Default.node_2_block[node_id]: # get random arr = np.random.random(size=WEIGHTS_SHAPE) # build block weights block_weight = BlockWeight(block_id=block_id, content=arr) pkg = slave.update_blocks(block_weight)