avg_loss_set.append(float(avg_loss_np)) acc_val_mean = numpy.array(acc_set).mean() avg_loss_val_mean = numpy.array(avg_loss_set).mean() return avg_loss_val_mean, acc_val_mean # for test while not trainer.stop(): epoch_id += 1 print("epoch %d start train" % (epoch_id)) for data in train_reader(): step_i += 1 trainer.step_id = step_i accuracy, = trainer.run(feed=feeder.feed(data), fetch=["accuracy_0.tmp_0"]) if step_i % 100 == 0: print("Epoch: {0}, step: {1}, accuracy: {2}".format( epoch_id, step_i, accuracy[0])) print(step_i) avg_loss_val, acc_val = train_test(train_test_program=test_program, train_test_reader=test_reader, train_test_feed=feeder) print("Test with Epoch %d, avg_cost: %s, acc: %s" % (epoch_id, avg_loss_val, acc_val)) if epoch_id > 40: break if epoch_id % 5 == 0: trainer.save_inference_program(output_folder)
def compute_privacy_budget(sample_ratio, epsilon, step, delta): E = 2 * epsilon * math.sqrt(step * sample_ratio) print("({0}, {1})-DP".format(E, delta)) output_folder = "model_node%d" % trainer_id epoch_id = 0 step = 0 while not trainer.stop(): epoch_id += 1 if epoch_id > 40: break print("epoch %d start train" % (epoch_id)) for step_id, data in enumerate(train_reader()): cost = trainer.run(feeder.feed(data), fetch=["mean_0.tmp_0"]) step += 1 print("train cost:%.3f" % (cost[0])) cost_val = train_test(train_test_program=test_program, train_test_reader=test_reader, train_test_feed=feeder) print("Test with epoch %d, cost: %s" % (epoch_id, cost_val)) compute_privacy_budget(sample_ratio=0.001, epsilon=0.1, step=step, delta=0.00001) save_dir = (output_folder + "/epoch_%d") % epoch_id trainer.save_inference_program(output_folder)
def reader(): for i in range(1000): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = message.split("trainer")[1] job_path = "job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, int(trainer_id)) job._scheduler_ep = scheduler_conf["ENDPOINT"] trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = endpoint trainer.start() print(trainer._scheduler_ep, trainer._current_ep) output_folder = "fl_model" epoch_id = 0 while not trainer.stop(): print("batch %d start train" % (step_i)) step_i = 0 for data in reader(): trainer.run(feed=data, fetch=[]) step_i += 1 if train_step == trainer._step: break epoch_id += 1 if epoch_id % 5 == 0: trainer.save_inference_program(output_folder)
trainer_id = int(sys.argv[1]) # trainer id for each guest place = fluid.CPUPlace() train_file_dir = "mid_data/node4/%d/" % trainer_id job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) trainer = FLTrainerFactory().create_fl_trainer(job) trainer.start() r = Gru4rec_Reader() train_reader = r.reader(train_file_dir, place, batch_size = 125) output_folder = "model_node4" step_i = 0 while not trainer.stop(): step_i += 1 print("batch %d start train" % (step_i)) for data in train_reader(): #print(np.array(data['src_wordseq'])) ret_avg_cost = trainer.run(feed=data, fetch=["mean_0.tmp_0"]) avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) print("ppl:%.3f" % (newest_ppl)) save_dir = (output_folder + "/epoch_%d") % step_i if trainer_id == 0: print("start save") trainer.save_inference_program(save_dir) if step_i >= 40: break