job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) place = fluid.CPUPlace() trainer.start(place) r = Gru4rec_Reader() train_reader = r.reader(train_file_dir, place, batch_size=125) output_folder = "model_node4" epoch_i = 0 while not trainer.stop(): epoch_i += 1 train_step = 0 for data in train_reader(): #print(np.array(data['src_wordseq'])) ret_avg_cost = trainer.run(feed=data, fetch=["mean_0.tmp_0"]) train_step += 1 if train_step == trainer._step: break avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) print("{} Epoch {} start train, train_step {}, ppl {}".format (time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_i, train_step, newest_ppl)) save_dir = (output_folder + "/epoch_%d") % epoch_i if trainer_id == 0: print("start save") trainer.save_inference_program(save_dir) if epoch_i >= 5: break
print("({0}, {1})-DP".format(E, delta)) output_folder = "model_node%d" % trainer_id epoch_id = 0 step = 0 while not trainer.stop(): epoch_id += 1 if epoch_id > 10: break print("{} Epoch {} start train".format( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), epoch_id)) for step_id, data in enumerate(train_reader()): acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"]) step += 1 # print("acc:%.3f" % (acc[0])) acc_val = train_test(train_test_program=test_program, train_test_reader=test_reader, train_test_feed=feeder) print("Test with epoch %d, accuracy: %s" % (epoch_id, acc_val)) compute_privacy_budget(sample_ratio=0.001, epsilon=0.1, step=step, delta=0.00001) save_dir = (output_folder + "/epoch_%d") % epoch_id trainer.save_inference_program(output_folder)