Esempio n. 1
0
    def connect_server(self):
        from paddle import fluid
        from paddle_fl.paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
        from paddle_fl.paddle_fl.core.master.fl_job import FLRunTimeJob
        import numpy as np
        import sys

        import logging
        self.processLabel.setText('connecting')
        trainer_id = self.id
        job_path = self.config['path']['job_path']
        job = FLRunTimeJob()
        job.load_trainer_job(job_path, trainer_id)
        job._scheduler_ep = '{}:{}'.format(self.config['scheduler']['ip'],
                                           self.config['scheduler']['port'])
        # print(job._trainer_send_program)

        self.trainer = MFLTrainerFactory().create_fl_trainer(job)
        use_cuda = False
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        self.trainer._current_ep = '{}:{}'.format(
            self.config['client']['ip'],
            int(self.config['client']['port']) + self.id)
        print('prepared ok')
        self.trainer.start(place=place)
        self.trainer._logger.setLevel(logging.DEBUG)
        print('connected ok')
        self.processLabel.setText('connected')
        self.trainThread = threading.Thread(target=self.train)
        self.processLabel.setText('training')
        self.trainThread.start()
        self.trainThread.join()
        self.processLabel.setText('finished')
Esempio n. 2
0
import logging
import time

logging.basicConfig(
    filename="test.log",
    filemode="w",
    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
    datefmt="%d-%M-%Y %H:%M:%S",
    level=logging.DEBUG)

trainer_id = int(sys.argv[1])  # trainer id for each guest
place = fluid.CPUPlace()
train_file_dir = "mid_data/node4/%d/" % trainer_id
job_path = "fl_job_config"
job = FLRunTimeJob()
job.load_trainer_job(job_path, trainer_id)
job._scheduler_ep = "127.0.0.1:9091"  # Inform the scheduler IP to trainer
trainer = FLTrainerFactory().create_fl_trainer(job)
trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id)
place = fluid.CPUPlace()
trainer.start(place)

r = Gru4rec_Reader()
train_reader = r.reader(train_file_dir, place, batch_size=125)

output_folder = "model_node4"
epoch_i = 0
while not trainer.stop():
    epoch_i += 1
    train_step = 0
    for data in train_reader():
Esempio n. 3
0
    server._current_ep = endpoint
    server.start()
else:

    def reader():
        for i in range(1000):
            data_dict = {}
            for i in range(3):
                data_dict[str(i)] = np.random.rand(1, 5).astype('float32')
        data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64')
        yield data_dict

    trainer_id = message.split("trainer")[1]
    job_path = "job_config"
    job = FLRunTimeJob()
    job.load_trainer_job(job_path, int(trainer_id))
    job._scheduler_ep = scheduler_conf["ENDPOINT"]
    trainer = FLTrainerFactory().create_fl_trainer(job)
    trainer._current_ep = endpoint
    place = fluid.CPUPlace()
    trainer.start(place)
    print(trainer._scheduler_ep, trainer._current_ep)
    output_folder = "fl_model"
    epoch_id = 0
    while not trainer.stop():
        print("epoch %d start train" % (epoch_id))
        step_i = 0
        for data in reader():
            trainer.run(feed=data, fetch=[])
            step_i += 1
            if step_i == trainer._step: