コード例 #1
0
 def test_get_places(self):
     program = Program()
     with program_guard(program):
         x = get_places(device_count=4)
         self.assertIsNotNone(x)
     print(str(program))
コード例 #2
0
 def test_get_places(self):
     places = get_places()
     cpu = fluid.CPUPlace()
     exe = fluid.Executor(cpu)
     exe.run(fluid.default_main_program())
     self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
コード例 #3
0
def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
    PASS_NUM = 100
    EMBED_SIZE = 32
    HIDDEN_SIZE = 256
    N = 5
    BATCH_SIZE = 32
    IS_SPARSE = is_sparse

    def __network__(words):
        embed_first = fluid.layers.embedding(input=words[0],
                                             size=[dict_size, EMBED_SIZE],
                                             dtype='float32',
                                             is_sparse=IS_SPARSE,
                                             param_attr='shared_w')
        embed_second = fluid.layers.embedding(input=words[1],
                                              size=[dict_size, EMBED_SIZE],
                                              dtype='float32',
                                              is_sparse=IS_SPARSE,
                                              param_attr='shared_w')
        embed_third = fluid.layers.embedding(input=words[2],
                                             size=[dict_size, EMBED_SIZE],
                                             dtype='float32',
                                             is_sparse=IS_SPARSE,
                                             param_attr='shared_w')
        embed_forth = fluid.layers.embedding(input=words[3],
                                             size=[dict_size, EMBED_SIZE],
                                             dtype='float32',
                                             is_sparse=IS_SPARSE,
                                             param_attr='shared_w')

        concat_embed = fluid.layers.concat(
            input=[embed_first, embed_second, embed_third, embed_forth],
            axis=1)
        hidden1 = fluid.layers.fc(input=concat_embed,
                                  size=HIDDEN_SIZE,
                                  act='sigmoid')
        predict_word = fluid.layers.fc(input=hidden1,
                                       size=dict_size,
                                       act='softmax')
        cost = fluid.layers.cross_entropy(input=predict_word, label=words[4])
        avg_cost = fluid.layers.mean(cost)
        return avg_cost, predict_word

    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)

    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
    forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')

    if not is_parallel:
        avg_cost, predict_word = __network__(
            [first_word, second_word, third_word, forth_word, next_word])
    else:
        places = get_places()
        pd = ParallelDo(places)
        with pd.do():
            avg_cost, predict_word = __network__(
                list(
                    map(pd.read_input, [
                        first_word, second_word, third_word, forth_word,
                        next_word
                    ])))
            pd.write_output(avg_cost)

        avg_cost = fluid.layers.mean(pd())

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)

    train_reader = paddle.batch(paddle.dataset.imikolov.train(word_dict, N),
                                BATCH_SIZE)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(
        feed_list=[first_word, second_word, third_word, forth_word, next_word],
        place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in range(PASS_NUM):
            for data in train_reader():
                avg_cost_np = exe.run(main_program,
                                      feed=feeder.feed(data),
                                      fetch_list=[avg_cost])
                if avg_cost_np[0] < 5.0:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(
                            save_dirname,
                            ['firstw', 'secondw', 'thirdw', 'forthw'],
                            [predict_word], exe)
                    return
                if math.isnan(float(avg_cost_np[0])):
                    sys.exit("got NaN loss, training failed.")

        raise AssertionError("Cost is too large {0:2.2}".format(
            avg_cost_np[0]))

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
コード例 #4
0
# value accurately calculated by the default and the memory optimization
# version.
fluid.default_startup_program().random_seed = 111

x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')

device_type = 'CPU'
use_nccl = False
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
    device_type = 'CUDA'
    use_nccl = False
    place = fluid.CUDAPlace(0)

places = get_places(device_count=0, device_type=device_type)
pd = ParallelDo(places, use_nccl=use_nccl)
with pd.do():
    x_ = pd.read_input(x)
    y_ = pd.read_input(y)
    y_predict = fluid.layers.fc(input=x_, size=1, act=None)
    cost = fluid.layers.square_error_cost(input=y_predict, label=y_)
    avg_cost = fluid.layers.mean(x=cost)
    pd.write_output(avg_cost)

cost = pd()
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
sgd_optimizer.minimize(avg_cost)

fluid.memory_optimize(fluid.default_main_program(), print_log=True)
コード例 #5
0
def train(word_dict,
          net_method,
          use_cuda,
          parallel=False,
          save_dirname=None,
          is_local=True):
    BATCH_SIZE = 128
    PASS_NUM = 5
    dict_dim = len(word_dict)
    class_dim = 2

    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1)
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")

    if not parallel:
        cost, acc_out, prediction = net_method(
            data, label, input_dim=dict_dim, class_dim=class_dim)
    else:
        places = get_places()
        pd = ParallelDo(places)
        with pd.do():
            cost, acc, _ = net_method(
                pd.read_input(data),
                pd.read_input(label),
                input_dim=dict_dim,
                class_dim=class_dim)
            pd.write_output(cost)
            pd.write_output(acc)

        cost, acc = pd()
        cost = fluid.layers.mean(cost)
        acc_out = fluid.layers.mean(acc)
        prediction = None
        assert save_dirname is None

    adagrad = fluid.optimizer.Adagrad(learning_rate=0.002)
    adagrad.minimize(cost)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=1000),
        batch_size=BATCH_SIZE)
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in range(PASS_NUM):
            for data in train_data():
                cost_val, acc_val = exe.run(main_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[cost, acc_out])
                print("cost=" + str(cost_val) + " acc=" + str(acc_val))
                if cost_val < 0.4 and acc_val > 0.8:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, ["words"],
                                                      prediction, exe)
                    return
                if math.isnan(float(cost_val)):
                    sys.exit("got NaN loss, training failed.")
        raise AssertionError("Cost is too large for {0}".format(
            net_method.__name__))

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
コード例 #6
0
def train(nn_type,
          use_cuda,
          parallel,
          save_dirname=None,
          save_full_dirname=None,
          model_filename=None,
          params_filename=None,
          is_local=True):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if nn_type == 'mlp':
        net_conf = mlp
    else:
        net_conf = conv_net

    if parallel:
        places = get_places()
        pd = ParallelDo(places)
        with pd.do():
            img_ = pd.read_input(img)
            label_ = pd.read_input(label)
            prediction, avg_loss, acc = net_conf(img_, label_)
            for o in [avg_loss, acc]:
                pd.write_output(o)

        avg_loss, acc = pd()
        # get mean loss and acc through every devices.
        avg_loss = fluid.layers.mean(avg_loss)
        acc = fluid.layers.mean(acc)
    else:
        prediction, avg_loss, acc = net_conf(img, label)

    test_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(avg_loss)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    exe = fluid.Executor(place)

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=500),
        batch_size=BATCH_SIZE)
    test_reader = paddle.batch(
        paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                # train a mini-batch, fetch nothing
                exe.run(main_program, feed=feeder.feed(data))
                if (batch_id + 1) % 10 == 0:
                    acc_set = []
                    avg_loss_set = []
                    for test_data in test_reader():
                        acc_np, avg_loss_np = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[acc, avg_loss])
                        acc_set.append(float(acc_np))
                        avg_loss_set.append(float(avg_loss_np))
                    # get test acc and loss
                    acc_val = numpy.array(acc_set).mean()
                    avg_loss_val = numpy.array(avg_loss_set).mean()
                    if float(acc_val
                             ) > 0.2:  # Smaller value to increase CI speed
                        if save_dirname is not None:
                            fluid.io.save_inference_model(
                                save_dirname, ["img"], [prediction],
                                exe,
                                model_filename=model_filename,
                                params_filename=params_filename)
                        if save_full_dirname is not None:
                            fluid.io.save_inference_model(
                                save_full_dirname, [], [],
                                exe,
                                model_filename=model_filename,
                                params_filename=params_filename,
                                export_for_deployment=False)
                        return
                    else:
                        print(
                            'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
                            format(pass_id, batch_id + 1,
                                   float(avg_loss_val), float(acc_val)))
                        if math.isnan(float(avg_loss_val)):
                            sys.exit("got NaN loss, training failed.")
        raise AssertionError("Loss of recognize digits is too large")

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
コード例 #7
0
    def _run_test_impl_(self,
                        callback,
                        feed,
                        fetch,
                        place,
                        use_parallel=False,
                        use_nccl=False,
                        use_gpu=False):
        """
        Run a single test, returns the fetch values
        Args:
            place(Place): the computation place.
            use_parallel(bool): Whether use parallel.for or not.

        Returns:
            Fetched numpy arrays.

        """
        if isinstance(fetch, six.string_types):
            fetch = [fetch]
        main = fluid.Program()
        startup = fluid.Program()
        # Fix seed
        main.random_seed = 10
        startup.random_seed = 10

        with fluid.program_guard(main, startup):
            generator = callback()
            # Automatically insert parallel do if use_parallel = True
            if use_parallel:
                thread_num = fluid.core.get_cuda_device_count(
                ) if use_gpu else 8
                places = get_places(thread_num)
                pd = ParallelDo(places, use_nccl=use_nccl)
                data = next(generator)

                if isinstance(data, fluid.framework.Variable):
                    data = [data]

                with pd.do():
                    ins = list(map(pd.read_input, data))
                    if len(ins) == 1:
                        ins = ins[0]
                    loss = generator.send(ins)  # patch input
                    pd.write_output(loss)

                loss = pd()
            else:
                data = next(generator)
                loss = generator.send(data)
            self.assertIsNotNone(loss)
            avg_loss = fluid.layers.mean(loss)
            fluid.backward.append_backward(loss=avg_loss)

        exe = fluid.Executor(place)
        exe.run(startup)
        if use_gpu:
            profile_type = 'GPU'
        else:
            profile_type = 'CPU'
        with profiler.profiler(profile_type, 'total', '/tmp/profiler'):
            return exe.run(main, feed=feed, fetch_list=fetch)
コード例 #8
0
 def check_get_gpu_places(self):
     places = get_places(device_type='CUDA')
     gpu = fluid.CUDAPlace(0)
     exe = fluid.Executor(gpu)
     exe.run(fluid.default_main_program())
     self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)