Exemple #1
0
job.load_trainer_job(job_path, trainer_id)
job._scheduler_ep = "127.0.0.1:9091"  # Inform the scheduler IP to trainer
trainer = FLTrainerFactory().create_fl_trainer(job)
trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id)
place = fluid.CPUPlace()
trainer.start(place)

r = Gru4rec_Reader()
train_reader = r.reader(train_file_dir, place, batch_size=125)

output_folder = "model_node4"
epoch_i = 0
while not trainer.stop():
    epoch_i += 1
    train_step = 0
    for data in train_reader():
        #print(np.array(data['src_wordseq']))
        ret_avg_cost = trainer.run(feed=data, fetch=["mean_0.tmp_0"])
        train_step += 1
        if train_step == trainer._step:
            break
        avg_ppl = np.exp(ret_avg_cost[0])
        newest_ppl = np.mean(avg_ppl)
        print("{} Epoch {} start train, train_step {}, ppl {}".format (time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), epoch_i, train_step, newest_ppl))
    save_dir = (output_folder + "/epoch_%d") % epoch_i
    if trainer_id == 0:
        print("start save")
        trainer.save_inference_program(save_dir)
    if epoch_i >= 5:
        break
Exemple #2
0
    E = 2 * epsilon * math.sqrt(step * sample_ratio)
    print("({0}, {1})-DP".format(E, delta))


output_folder = "model_node%d" % trainer_id
epoch_id = 0
step = 0
while not trainer.stop():
    epoch_id += 1
    if epoch_id > 10:
        break
    print("{} Epoch {} start train".format(
        time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),
        epoch_id))
    for step_id, data in enumerate(train_reader()):
        acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"])
        step += 1
    # print("acc:%.3f" % (acc[0]))

    acc_val = train_test(train_test_program=test_program,
                         train_test_reader=test_reader,
                         train_test_feed=feeder)

    print("Test with epoch %d, accuracy: %s" % (epoch_id, acc_val))
    compute_privacy_budget(sample_ratio=0.001,
                           epsilon=0.1,
                           step=step,
                           delta=0.00001)

    save_dir = (output_folder + "/epoch_%d") % epoch_id
    trainer.save_inference_program(output_folder)
Exemple #3
0
job = FLRunTimeJob()
job.load_trainer_job(job_path, trainer_id)
job._scheduler_ep = "127.0.0.1:9091"  # Inform the scheduler IP to trainer
# print(job._trainer_send_program)

trainer = FLTrainerFactory().create_fl_trainer(job)
use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer._current_ep = "127.0.0.1:8192"
trainer.start(place=place)
trainer._logger.setLevel(logging.DEBUG)

g = reader()
if trainer_id > 0:
    for i in range(trainer_id):
        next(g)
data = next(g)
print(data)

output_folder = "fl_model"
step_i = 0
while not trainer.stop():
    step_i += 1
    print("batch %d start train" % step_i)
    trainer.run(feed=data, fetch=[])
    if trainer_id == 0:
        print("start saving model")
        trainer.save_inference_program(output_folder)
    if step_i >= 10:
        break
# Summary
###########
data_writer = SummaryWriter(logdir=join(join(params["federated"]["logdir"],"data"),f"client_{trainer_id}"))

#  Run
#########
round_id = 0
while not trainer.stop():
    round_id += 1

    if round_id > params["federated"]["num_round"]:
        break

    for e in range(params["federated"]["num_epoch"]):
        for data in train_reader():
            trainer.run(feeder.feed(data), fetch=job._target_names)
    
    train_metrics = metrics(trainer.exe, test_program,feeder, train_reader, job._target_names)
    val_metrics = metrics(trainer.exe, test_program,feeder, val_reader, job._target_names)
    if trainer_id == 0:
        test_metrics = metrics(trainer.exe, test_program,feeder, test_reader, job._target_names)


    txt_log = "{} Round {} ".format(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())),
                                     round_id)

    for metric in range(len(job._target_names)):
        metric_name = job._target_names[metric]
        txt_log += f"Train {metric_name}: {train_metrics[metric]} Val {metric_name}: {val_metrics[metric]}"
        data_writer.add_scalar(f"train/{metric_name}", train_metrics[metric], round_id)
        data_writer.add_scalar(f"val/{metric_name}", val_metrics[metric], round_id)
Exemple #5
0
is_crowd = fluid.layers.data(name='is_crowd',
                             shape=[None, 1],
                             dtype='int32',
                             lod_level=1)
place = fluid.CUDAPlace(trainer_id)
feeder = fluid.DataFeeder(
    feed_list=[image, im_info, im_id, gt_bbox, gt_class, is_crowd],
    place=place)

output_folder = "5_model_node%d" % trainer_id
epoch_id = 0
step = 0

para_dir = "faster_rcnn_program"

while not trainer.stop():
    epoch_id += 1
    if epoch_id > 120:
        break
    print("epoch %d start train" % (epoch_id))
    test_class = DataReader()
    data_loader = test_class.test_loader()
    for step_id, data in enumerate(data_loader):
        acc = trainer.run(feeder.feed(data), fetch=['sum_0.tmp_0'])
        step += 1
        print("step: {}, loss: {}".format(step, acc))

    if trainer_id == 0:
        save_dir = (output_folder + "/epoch_%d") % epoch_id
        trainer.save(para_dir, save_dir)
        acc_set.append(float(acc_np[0]))
        loss_set.append(float(loss[0]))
    acc_val_mean = np.array(acc_set).mean()
    avg_loss_mean = np.array(loss_set).mean()
    return acc_val_mean, avg_loss_mean


output_folder = "models/model_node%d" % trainer_id
epoch_id = 0
step = 0
while not trainer.stop():
    epoch_id += 1
    if epoch_id > 10:
        break
    print("{} Epoch {} start train".format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), epoch_id))
    for step_id, data in enumerate(train_reader()):
        acc, loss = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0", "reduce_mean_0.tmp_0"])
        step += 1

    acc_val, avg_loss = train_test(
        train_test_program=test_program,
        train_test_reader=test_reader,
        train_test_feed=feeder)

    print("Test with epoch %d, accuracy: %s , loss: %s" % (epoch_id, acc_val, avg_loss))

    save_dir = (output_folder + "/epoch_%d") % epoch_id
    trainer.save_inference_program(output_folder)

print("{} Train is ended".format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))