def __do_grad_average(self):
     how_much_nodes = GlobalSettings.get_default().node_count
     if self.__current_recv == how_much_nodes:
         # 执行梯度平均
         self.set_result(self.__global_weights / how_much_nodes)
         # 重设梯度值,等待下一批次的循环
         self.__global_weights = np.asarray(0.0)
         self.__current_recv = 0
Exemple #2
0
 def receive_blocks(
     self, content: Tuple[int, ndarray]
 ) -> Union[Iterable[netEncapsulation], netEncapsulation, None]:
     """
         PA Server receive a json_dict and send back a request
     :param content:
     :return:
     """
     # update global current state
     self.Bak_Weights_Node[content[0]] = content[1]
     if len(self.Bak_Weights_Node) == GlobalSettings.get_default(
     ).node_count:
         global_weight = np.mean(list(self.Bak_Weights_Node.values()),
                                 axis=0)
         self.dispose()
         return netEncapsulation(GlobalSettings.get_default().nodes,
                                 (Parameter_Server, global_weight))
Exemple #3
0
    def check_for_combine(self):

        if len(self.BlockWeights) < GlobalSettings.get_default().block_count:
            return

        res = 0
        for val in self.BlockWeights.values():
            res += val
        self.set_result(res / len(self.BlockWeights))
        self.BlockWeights.clear()
Exemple #4
0
 def update_blocks(
         self, block_weight: BlockWeight
 ) -> netEncapsulation[Tuple[int, ndarray]]:
     """
         Try collect all blocks.
     """
     self.BlockWeights[block_weight.block_id] = block_weight.content
     self.check_for_combine()
     send_to = GlobalSettings.get_default().get_adversary(
         block_weight.block_id)
     return netEncapsulation(send_to,
                             (block_weight.block_id, block_weight.content))
    def update_blocks(self, block_weight: BlockWeight) -> netEncapsulation[Dict[str, np.ndarray]]:
        print('Weights delta received.')
        print('from block: {}'.format(block_weight.block_id))
        print('It has a content with shape: {}'.format(block_weight.content.shape))

        # 获取没有该数据的节点
        send_to = GlobalSettings.get_default().get_adversary(block_weight.block_id)
        # 我们使用 'data' 字符串来标记我们的梯度内容
        pkg = {
            'data': block_weight.content
        }
        # 记录本机梯度
        self.__global_weights += block_weight.content
        self.__current_recv += 1
        # 检查是否接受完所有数据
        self.__do_grad_average()
        # 发送梯度
        return netEncapsulation(send_to, pkg)
Exemple #6
0
    def start(self, com: ICommunication_Controller) -> None:
        state, report = self.__check()
        self.__log.log_message("Ready:{} \n\t Check List:\n\t\t--> {}".format(state, "\n\t\t--> ".join(report)))
        # get dataset
        train_x, train_y, test_x, test_y = self.__data.load()
        self.__log.log_message('Dataset is ready, type: ({})'.format(self.__data))
        # build data feeder
        block_ids = GlobalSettings.get_default().node_2_block[com.Node_Id]
        feeder = PSGDBlockDataFeeder(train_x, train_y, batch_iter=self.__batch_iter, block_ids=block_ids)
        # assemble optimizer
        self.__optimizer.assemble(transfer=self.__trans, block_mgr=feeder)
        # compile model
        self.__model.compile(self.__optimizer)
        # summary
        summary = self.__model.summary()
        self.__log.log_message(summary)
        trace_head = '{}-N({})'.format(self.__misc.mission_title, self.node_id)
        self.__log.log_message('Model set to ready.')

        log_head = self.__log.Title
        # start !
        GlobalSettings.deprecated_global_logger = self.__log
        self.__trans.start_transfer(com, group_offset=list(self.group)[0], printer=self.__log)
        # record data
        time_start = time.time()
        data_send_start = com.Com.bytes_sent
        data_recv_start = com.Com.bytes_read

        evaluation_history = []
        title = []
        r = {}
        # do until reach the target accuracy
        for i in range(self.__misc.epoch):
            # change title
            self.__log.Title = log_head + "-Epo-{}".format(i + 1)
            history = self.__model.fit(feeder, epoch=1, printer=self.__log)
            # do tests
            r = self.__model.evaluate(test_x, test_y)
            title = r.keys()
            row = r.values()
            self.__log.log_message('Evaluate result: {}'.format(r))
            evaluation_history.append(row)

            if self.__misc.target_acc is not None:
                # only one metric in model metrics list.
                # evaluation[0] refers to loss
                # evaluation[1] refers to accuracy.
                if r[1] > self.__misc.target_acc:
                    break

        # record data
        time_end = time.time()
        data_sent_end = com.Com.bytes_sent
        data_recv_end = com.Com.bytes_read

        training_history = self.__model.fit_history()
        # save training history data
        training_name = "TR-" + trace_head + ".csv"
        training_trace = pd.DataFrame(training_history.history, columns=training_history.title)
        training_trace.to_csv(training_name, index=False)
        # save evaluation history data
        evaluation_name = "EV-" + trace_head + ".csv"
        evaluation_trace = pd.DataFrame(evaluation_history, columns=title)
        evaluation_trace.to_csv(evaluation_name, index=False)
        # save model
        model_name = "MODEL-" + trace_head + ".model"
        self.__model.compile(nn.gradient_descent.SGDOptimizer(learn_rate=1e-5))
        self.__model.save(model_name)
        self.__trace_filename.append(training_name)
        self.__trace_filename.append(evaluation_name)
        self.__trace_filename.append(model_name)

        self.__log.log_message('Execution complete, time: {}.'.format(time_end - time_start))
        self.__log.log_message('Execution complete, Total bytes sent: {}.'.format(data_sent_end - data_send_start))
        self.__log.log_message('Execution complete, Total bytes read: {}.'.format(data_recv_end - data_recv_start))
        self.__log.log_message('Trace file has been saved to {}.'.format(trace_head))

        # set marker
        self.__done = True
        # dispose
        self.__model.clear()
        del train_x, train_y, test_x, test_y

        # return last evaluation result
        return r
Exemple #7
0
 def record(self, message: str):
     from codec import GlobalSettings
     GlobalSettings.global_logger().log_message(
         "Codec: {}, Report: {}.".format(self.__class__.__name__, message))
# const parameters
SLAVE_CNT = 4
REDUNDANCY = 1
TEST_ROUNDS = 10
WEIGHTS_SHAPE = np.random.randint(3, 1024, size=2)
LAYER = 0
BATCHSIZE = 64
SYNCWAITTIMEOUT = 1000  #ms

# setup global parameters
GlobalSettings.deprecated_default_settings = DuplicateAssignment(
    SLAVE_CNT, REDUNDANCY)

# default setting
Default = GlobalSettings.get_default()

# build codec
slave_codec = [SLAVE_CODEC(node_id=i) for i in range(SLAVE_CNT)]

for i in range(TEST_ROUNDS):
    # starting consensus stage
    node_id = 0
    for slave in slave_codec:
        # build each block
        for block_id in Default.node_2_block[node_id]:
            # get random
            arr = np.random.random(size=WEIGHTS_SHAPE)
            # build block weights
            block_weight = BlockWeight(block_id=block_id, content=arr)
            pkg = slave.update_blocks(block_weight)