Ejemplo n.º 1
0
def main():
    args = parse_args()
    print_arguments(args)

    # the unique trainer id, starting from 0, needed by trainer
    # only
    nccl_id_var, num_trainers, trainer_id = (
        None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1")))

    if args.use_cprof:
        pr = cProfile.Profile()
        pr.enable()
    model_def = __import__("models.%s" % args.model, fromlist=["models"])
    train_args = list(model_def.get_model(args))
    train_args.append(args)
    # Run optimizer.minimize(avg_loss)
    train_args[2].minimize(train_args[0])
    if args.memory_optimize:
        fluid.memory_optimize(fluid.default_main_program())

    if args.update_method == "pserver":
        train_prog, startup_prog = dist_transpile(trainer_id)
        if not train_prog:
            raise Exception(
                "Must configure correct environments to run dist train.")
        train_args.extend([train_prog, startup_prog])
        if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER":
            train_args.extend([nccl_id_var, num_trainers, trainer_id])
            train_parallel(*train_args)
        train(*train_args)
        exit(0)

    # for other update methods, use default programs
    train_args.append(fluid.default_main_program())
    train_args.append(fluid.default_startup_program())

    if args.update_method == "nccl2":
        nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id)
    if args.gpus == 1:
        # NOTE: parallel executor use profiler interanlly
        if args.use_nvprof and args.device == 'GPU':
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
                train(*train_args)
        else:
            train(*train_args)
    else:
        if args.device == "CPU":
            raise Exception("Only support GPU perf with parallel exe")
        train_args.extend([nccl_id_var, num_trainers, trainer_id])
        train_parallel(*train_args)
def main():
    rnn_out = encoder_decoder()
    label = layers.data(
        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
    cost = layers.cross_entropy(input=rnn_out, label=label)
    avg_cost = fluid.layers.mean(cost)

    optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
    optimizer.minimize(avg_cost)

    # fluid.memory_optimize(fluid.default_main_program())
    fluid.release_memory(fluid.default_main_program())

    # fix the order of training data
    train_data = paddle.batch(
        paddle.dataset.wmt14.train(dict_size), batch_size=batch_size)

    # train_data = paddle.batch(
    #     paddle.reader.shuffle(
    #         paddle.dataset.wmt14.train(dict_size), buf_size=1000),
    #     batch_size=batch_size)

    place = core.CPUPlace()
    exe = Executor(place)

    exe.run(framework.default_startup_program())

    batch_id = 0
    for pass_id in xrange(10):
        for data in train_data():
            word_data = to_lodtensor(map(lambda x: x[0], data), place)
            trg_word = to_lodtensor(map(lambda x: x[1], data), place)
            trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
            outs = exe.run(fluid.default_main_program(),
                           feed={
                               'src_word_id': word_data,
                               'target_language_word': trg_word,
                               'target_language_next_word': trg_word_next
                           },
                           fetch_list=[avg_cost])
            avg_cost_val = np.array(outs[0])
            print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
                  " avg_cost=" + str(avg_cost_val))
            if batch_id > 2:
                exit(0)
            if math.isnan(float(avg_cost_val)):
                sys.exit("got NaN loss, training failed.")
            batch_id += 1
Ejemplo n.º 3
0
def get_model(args):
    # Input data
    images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # Train program
    predict = cnn_model(images)
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    # Evaluator
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(
        input=predict, label=label, total=batch_size_tensor)

    # inference program
    inference_program = fluid.default_main_program().clone()

    # Optimization
    opt = fluid.optimizer.AdamOptimizer(
        learning_rate=0.001, beta1=0.9, beta2=0.999)

    # Reader
    train_reader = paddle.batch(
        paddle.dataset.mnist.train(), batch_size=args.batch_size)
    test_reader = paddle.batch(
        paddle.dataset.mnist.test(), batch_size=args.batch_size)
    return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc
 def run_executor(exe, feed, fetch_list, program=None):
     if isinstance(exe, fluid.ParallelExecutor):
         res = exe.run(fetch_list=fetch_list, feed=feed)
     elif isinstance(exe, fluid.Executor):
         if program is None:
             program = fluid.default_main_program()
         res = exe.run(program=program, feed=feed, fetch_list=fetch_list)
     else:
         raise ValueError('Unkown type exe')
     return res
Ejemplo n.º 5
0
 def test_calc_gradient(self):
     x = layers.create_parameter(dtype="float32", shape=[5, 10])
     y = layers.create_parameter(dtype="float32", shape=[10, 8])
     mul_out = layers.mul(x=x, y=y)
     mean_out = layers.mean(mul_out)
     a = calc_gradient(mean_out, mul_out)
     b = calc_gradient(mean_out, x)
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     exe.run(fluid.default_startup_program())
     exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
Ejemplo n.º 6
0
 def test_fetch_var(self):
     val = numpy.array([1, 3, 5]).astype(numpy.int32)
     x = layers.create_tensor(dtype="int32", persistable=True, name="x")
     layers.assign(input=val, output=x)
     exe = fluid.Executor(fluid.CPUPlace())
     exe.run(fluid.default_main_program(), feed={}, fetch_list=[])
     fetched_x = fluid.fetch_var("x")
     self.assertTrue(
         numpy.array_equal(fetched_x, val),
         "fetch_x=%s val=%s" % (fetched_x, val))
     self.assertEqual(fetched_x.dtype, val.dtype)
Ejemplo n.º 7
0
 def test_assign(self):
     val = (
         -100 + 200 * numpy.random.random(size=(2, 5))).astype(numpy.int32)
     x = layers.create_tensor(dtype="float32")
     layers.assign(input=val, output=x)
     exe = fluid.Executor(fluid.CPUPlace())
     fetched_x = exe.run(fluid.default_main_program(),
                         feed={},
                         fetch_list=[x])[0]
     self.assertTrue(
         numpy.array_equal(fetched_x, val),
         "fetch_x=%s val=%s" % (fetched_x, val))
     self.assertEqual(fetched_x.dtype, val.dtype)
Ejemplo n.º 8
0
def get_model(args):
    if args.data_set == "cifar10":
        classdim = 10
        if args.data_format == 'NCHW':
            data_shape = [3, 32, 32]
        else:
            data_shape = [32, 32, 3]
    else:
        classdim = 102
        if args.data_format == 'NCHW':
            data_shape = [3, 224, 224]
        else:
            data_shape = [224, 224, 3]

    # Input data
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # Train program
    net = vgg16_bn_drop(images)
    predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    # Evaluator
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(
        input=predict, label=label, total=batch_size_tensor)

    # inference program
    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    # Optimization
    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)

    # data reader
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.cifar.train10()
            if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
            buf_size=5120),
        batch_size=args.batch_size)
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10()
        if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
        batch_size=args.batch_size)

    return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc
Ejemplo n.º 9
0
 def run_program(self):
     outputs = []
     places = [core.CPUPlace()]
     if core.is_compiled_with_cuda():
         places.append(core.CUDAPlace(0))
     for place in places:
         self.set_inputs(place)
         exe = fluid.Executor(place)
         exe.run(fluid.default_startup_program())
         output = exe.run(fluid.default_main_program(),
                          feed=self.inputs,
                          fetch_list=self.fetch_list,
                          return_numpy=False)
         outputs.append(output)
     self.actual_outputs = outputs
Ejemplo n.º 10
0
    def run_program(self):
        """Run the test program.
        """
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))

        for place in places:
            self.set_inputs(place)
            exe = fluid.Executor(place)

            output = exe.run(fluid.default_main_program(),
                             feed=self.inputs,
                             fetch_list=self.fetch_list,
                             return_numpy=True)
            self.op_output = output
Ejemplo n.º 11
0
    def test_nvprof(self):
        if not fluid.core.is_compiled_with_cuda():
            return
        epoc = 8
        dshape = [4, 3, 28, 28]
        data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
        conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])

        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        output_file = 'cuda_profiler.txt'
        with profiler.cuda_profiler(output_file, 'csv') as nvprof:
            for i in range(epoc):
                input = np.random.random(dshape).astype('float32')
                exe.run(fluid.default_main_program(), feed={'data': input})
        os.remove(output_file)
    def check_result(self, fn, place, dtype):
        shape = [9, 10]

        x_data = np.random.random(size=shape).astype(dtype)
        y_data = np.random.random(size=shape).astype(dtype)
        python_out = fn(x_data, y_data)

        x_var = layers.create_global_var(
            name='x', shape=shape, value=0.0, dtype=dtype, persistable=True)
        y_var = layers.create_global_var(
            name='y', shape=shape, value=0.0, dtype=dtype, persistable=True)
        out = fn(x_var, y_var)

        exe = fluid.Executor(place)

        exe.run(fluid.default_startup_program())
        fluid_out = exe.run(fluid.default_main_program(),
                            feed={'x': x_data,
                                  'y': y_data},
                            fetch_list=[out])

        np.testing.assert_array_equal(python_out, fluid_out[0])
Ejemplo n.º 13
0
def train(use_cuda, save_dirname=None, is_local=True):
    # define network topology
    word = fluid.layers.data(
        name='word_data', shape=[1], dtype='int64', lod_level=1)
    predicate = fluid.layers.data(
        name='verb_data', shape=[1], dtype='int64', lod_level=1)
    ctx_n2 = fluid.layers.data(
        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
    ctx_n1 = fluid.layers.data(
        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
    ctx_0 = fluid.layers.data(
        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
    ctx_p1 = fluid.layers.data(
        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
    ctx_p2 = fluid.layers.data(
        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
    mark = fluid.layers.data(
        name='mark_data', shape=[1], dtype='int64', lod_level=1)
    feature_out = db_lstm(**locals())
    target = fluid.layers.data(
        name='target', shape=[1], dtype='int64', lod_level=1)
    crf_cost = fluid.layers.linear_chain_crf(
        input=feature_out,
        label=target,
        param_attr=fluid.ParamAttr(
            name='crfw', learning_rate=mix_hidden_lr))
    avg_cost = fluid.layers.mean(crf_cost)

    # TODO(qiao)
    # check other optimizers and check why out will be NAN
    sgd_optimizer = fluid.optimizer.SGD(
        learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.01,
            decay_steps=100000,
            decay_rate=0.5,
            staircase=True))
    sgd_optimizer.minimize(avg_cost)

    # TODO(qiao)
    # add dependency track and move this config before optimizer
    crf_decode = fluid.layers.crf_decoding(
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.conll05.test(), buf_size=8192),
        batch_size=BATCH_SIZE)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    feeder = fluid.DataFeeder(
        feed_list=[
            word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
        ],
        place=place)
    exe = fluid.Executor(place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())
        embedding_param = fluid.global_scope().find_var(
            embedding_name).get_tensor()
        embedding_param.set(
            load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
            place)

        start_time = time.time()
        batch_id = 0
        for pass_id in xrange(PASS_NUM):
            for data in train_data():
                cost = exe.run(main_program,
                               feed=feeder.feed(data),
                               fetch_list=[avg_cost])
                cost = cost[0]

                if batch_id % 10 == 0:
                    print("avg_cost:" + str(cost))
                    if batch_id != 0:
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))
                    # Set the threshold low to speed up the CI test
                    if float(cost) < 60.0:
                        if save_dirname is not None:
                            # TODO(liuyiqun): Change the target to crf_decode
                            fluid.io.save_inference_model(save_dirname, [
                                'word_data', 'verb_data', 'ctx_n2_data',
                                'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
                                'ctx_p2_data', 'mark_data'
                            ], [feature_out], exe)
                        return

                batch_id = batch_id + 1

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Ejemplo n.º 14
0
def train(net_type, use_cuda, save_dirname, is_local):
    classdim = 10
    data_shape = [3, 32, 32]

    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if net_type == "vgg":
        print("train vgg net")
        net = vgg16_bn_drop(images)
    elif net_type == "resnet":
        print("train resnet")
        net = resnet_cifar10(images, 32)
    else:
        raise ValueError("%s network is not supported" % net_type)

    predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(cost)
    acc = fluid.layers.accuracy(input=predict, label=label)

    # Test program 
    test_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(avg_cost)

    BATCH_SIZE = 128
    PASS_NUM = 1

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.cifar.train10(), buf_size=128 * 10),
        batch_size=BATCH_SIZE)

    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                exe.run(main_program, feed=feeder.feed(data))

                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
                        loss_t, acc_t = exe.run(program=test_program,
                                                feed=feeder.feed(test_data),
                                                fetch_list=[avg_cost, acc])
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
                        'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
                        format(pass_id, batch_id + 1,
                               float(avg_loss_value), float(acc_value)))

                    if acc_value > 0.01:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(save_dirname, ["pixel"],
                                                      [predict], exe)
                        return

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Ejemplo n.º 15
0
def train(word_dict,
          net_method,
          use_cuda,
          parallel=False,
          save_dirname=None,
          is_local=True):
    BATCH_SIZE = 128
    PASS_NUM = 5
    dict_dim = len(word_dict)
    class_dim = 2

    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1)
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")

    if not parallel:
        cost, acc_out, prediction = net_method(
            data, label, input_dim=dict_dim, class_dim=class_dim)
    else:
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            cost, acc, _ = net_method(
                pd.read_input(data),
                pd.read_input(label),
                input_dim=dict_dim,
                class_dim=class_dim)
            pd.write_output(cost)
            pd.write_output(acc)

        cost, acc = pd()
        cost = fluid.layers.mean(cost)
        acc_out = fluid.layers.mean(acc)
        prediction = None
        assert save_dirname is None

    adagrad = fluid.optimizer.Adagrad(learning_rate=0.002)
    adagrad.minimize(cost)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=1000),
        batch_size=BATCH_SIZE)
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in xrange(PASS_NUM):
            for data in train_data():
                cost_val, acc_val = exe.run(main_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[cost, acc_out])
                print("cost=" + str(cost_val) + " acc=" + str(acc_val))
                if cost_val < 0.4 and acc_val > 0.8:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, ["words"],
                                                      prediction, exe)
                    return
                if math.isnan(float(cost_val)):
                    sys.exit("got NaN loss, training failed.")
        raise AssertionError("Cost is too large for {0}".format(
            net_method.__name__))

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
                              padding=(0, 0),
                              dilation=(1, 1),
                              groups=1,
                              bias_attr=False,
                              name="conv2dX2")

add1 = fluid.layers.elementwise_add(conv2d1, conv2d2, name="add1")

relu2a = fluid.layers.relu(add1, name="relu2a")
relu2b = fluid.layers.relu(add1, name="relu2b")

add2 = fluid.layers.elementwise_add(relu2a, relu2b, name="add2")

relu3a = fluid.layers.relu(add2, name="relu3a")
relu3b = fluid.layers.relu(add2, name="relu3b")

exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
inp_dict = {'inputX1': inp_blob1, 'inputX2': inp_blob2}
var = [relu3a, relu3b]
res_paddle = exe.run(fluid.default_main_program(),
                     fetch_list=var,
                     feed=inp_dict)

fluid.io.save_inference_model(os.path.join(sys.argv[1], "2in_2out_dynbatch"),
                              list(inp_dict.keys()),
                              var,
                              exe,
                              model_filename="2in_2out_dynbatch.pdmodel",
                              params_filename="2in_2out_dynbatch.pdiparams")
Ejemplo n.º 17
0
def train():
    args = parse_args()
    model_type = args.model_type
    rnn_model = args.rnn_model
    logger = logging.getLogger("lm")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if args.enable_ce:
        fluid.default_startup_program().random_seed = SEED
    if args.log_path:
        file_handler = logging.FileHandler(args.log_path)
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    else:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.INFO)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)

    logger.info('Running with args : {}'.format(args))

    vocab_size = 10000
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 3
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 13
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    # Training process
    loss, last_hidden, last_cell, feed_order = lm_model.lm_model(
        hidden_size,
        vocab_size,
        batch_size,
        num_layers=num_layers,
        num_steps=num_steps,
        init_scale=init_scale,
        dropout=dropout,
        rnn_model=rnn_model)
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
        clip_norm=max_grad_norm))

    learning_rate = fluid.layers.create_global_var(name="learning_rate",
                                                   shape=[1],
                                                   value=1.0,
                                                   dtype='float32',
                                                   persistable=True)

    optimizer = fluid.optimizer.SGD(learning_rate=learning_rate)

    optimizer.minimize(loss)

    place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    data_path = args.data_path
    print("begin to load data")
    raw_data = reader.ptb_raw_data(data_path)
    print("finished load data")
    train_data, valid_data, test_data, _ = raw_data

    def prepare_input(batch, init_hidden, init_cell, epoch_id=0, with_lr=True):
        x, y = batch
        new_lr = base_learning_rate * (lr_decay**max(
            epoch_id + 1 - epoch_start_decay, 0.0))
        lr = np.ones((1), dtype='float32') * new_lr
        res = {}
        x = x.reshape((-1, num_steps, 1))
        y = y.reshape((-1, 1))

        res['x'] = x
        res['y'] = y
        res['init_hidden'] = init_hidden
        res['init_cell'] = init_cell
        if with_lr:
            res['learning_rate'] = lr

        return res

    def eval(data):
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                               dtype='float32')
        init_cell = np.zeros((num_layers, batch_size, hidden_size),
                             dtype='float32')
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id,
                                            with_lr=False)
            fetch_outs = exe.run(
                inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            total_loss += cost_train
            iters += num_steps

        ppl = np.exp(total_loss / iters)
        return ppl

    # get train epoch size
    batch_len = len(train_data) // batch_size
    epoch_size = (batch_len - 1) // num_steps
    log_interval = epoch_size // 10
    total_time = 0.0
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)
        train_data_iter = reader.get_data_iter(train_data, batch_size,
                                               num_steps)

        total_loss = 0

        init_hidden = None
        init_cell = None
        #debug_para(fluid.framework.default_main_program(), parallel_executor)
        total_loss = 0
        iters = 0
        init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                               dtype='float32')
        init_cell = np.zeros((num_layers, batch_size, hidden_size),
                             dtype='float32')
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id=epoch_id)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, last_hidden.name,
                                     last_cell.name, 'learning_rate'
                                 ],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            lr = np.array(fetch_outs[3])

            total_loss += cost_train
            iters += num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print("ppl ", batch_id, ppl[0], lr[0])

        ppl = np.exp(total_loss / iters)
        if epoch_id == 0 and ppl[0] > 1000:
            # for bad init, after first epoch, the loss is over 1000
            # no more need to continue
            return
        end_time = time.time()
        total_time += end_time - start_time
        print("train ppl", ppl[0])

        if epoch_id == max_epoch - 1 and args.enable_ce:
            print("ptblm\tlstm_language_model_duration\t%s" %
                  (total_time / max_epoch))
            print("ptblm\tlstm_language_model_loss\t%s" % ppl[0])

        model_path = os.path.join("model_new/", str(epoch_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(executor=exe,
                                   dirname=model_path,
                                   main_program=main_program)
        valid_ppl = eval(valid_data)
        print("valid ppl", valid_ppl[0])
    test_ppl = eval(test_data)
    print("test ppl", test_ppl[0])
Ejemplo n.º 18
0
# Reader for training
train_reader = paddle.batch(
    paddle.reader.shuffle(paddle.dataset.cifar.train10(), buf_size=50000),
    batch_size=BATCH_SIZE)

# Reader for testing. A separated data set for testing.
test_reader = paddle.batch(
    paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE)

use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

feed_order = ['pixel', 'label']

main_program = fluid.default_main_program()
star_program = fluid.default_startup_program()

predict,[avg_cost, acc] = train_program()

# Test program
test_program = main_program.clone(for_test=True)

optimizer = optimizer_program()
optimizer.minimize(avg_cost)

exe = fluid.Executor(place)
EPOCH_NUM = 2

# For training test cost
def train_test(program, reader):
Ejemplo n.º 19
0
    def forward(self, x):
        x = fluid.layers.reshape(x, shape=[1, 4])
        x = self.affine1(x)
        x = fluid.layers.dropout(x, self.dropout_ratio)
        x = fluid.layers.relu(x)
        action_scores = self.affine2(x)

        self._x_for_debug = x

        return fluid.layers.softmax(action_scores, axis=1)


with fluid.dygraph.guard():
    fluid.default_startup_program().random_seed = args.seed
    fluid.default_main_program().random_seed = args.seed
    np.random.seed(args.seed)

    policy = Policy()

    eps = np.finfo(np.float32).eps.item()
    optimizer = fluid.optimizer.AdamOptimizer(
        learning_rate=1e-2, parameter_list=policy.parameters())

    def get_mean_and_std(values=[]):
        n = 0.
        s = 0.
        for val in values:
            s += val
            n += 1
        mean = s / n
Ejemplo n.º 20
0
def train(conf_dict, data_reader, use_cuda=False):
    """
    Training of p classification model
    """
    label_dict_len = data_reader.get_dict_size('label_dict')
    # input layer
    word = fluid.layers.data(name='word_data',
                             shape=[1],
                             dtype='int64',
                             lod_level=1)
    postag = fluid.layers.data(name='token_pos',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    # label
    target = fluid.layers.data(name='target',
                               shape=[label_dict_len],
                               dtype='float32',
                               lod_level=0)
    # NN: embedding + lstm + pooling
    feature_out = p_model.db_lstm(data_reader, word, postag, conf_dict)
    # loss function for multi-label classification
    class_cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=feature_out, \
        label=target)
    avg_cost = fluid.layers.mean(class_cost)
    # optimization method
    sgd_optimizer = fluid.optimizer.AdamOptimizer(learning_rate=2e-3, )

    sgd_optimizer.minimize(avg_cost)

    train_batch_reader = paddle.batch(paddle.reader.shuffle(
        data_reader.get_train_reader(), buf_size=8192),
                                      batch_size=conf_dict['batch_size'])

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    feeder = fluid.DataFeeder(feed_list=[word, postag, target], place=place)
    exe = fluid.Executor(place)

    save_dirname = conf_dict['p_model_save_dir']

    def train_loop(main_program, trainer_id=0):
        """start train"""
        exe.run(fluid.default_startup_program())

        start_time = time.time()
        batch_id = 0
        for pass_id in six.moves.xrange(conf_dict['pass_num']):
            pass_start_time = time.time()
            cost_sum, cost_counter = 0, 0
            for data in train_batch_reader():
                cost = exe.run(main_program,
                               feed=feeder.feed(data),
                               fetch_list=[avg_cost])
                cost = cost[0]
                cost_sum += cost
                cost_counter += 1
                if batch_id % 10 == 0 and batch_id != 0:
                    sys.stderr.write(
                        "batch %d finished, second per batch: %02f\n" %
                        (batch_id, (time.time() - start_time) / batch_id))

                # cost expected, training over
                if float(cost) < 0.01:
                    pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0
                    sys.stderr.write(
                        "%d pass end, cost time: %02f, avg_cost: %f\n" %
                        (pass_id, time.time() - pass_start_time,
                         pass_avg_cost))
                    save_path = os.path.join(save_dirname, 'final')
                    fluid.io.save_inference_model(save_path,
                                                  ['word_data', 'token_pos'],
                                                  [feature_out],
                                                  exe,
                                                  params_filename='params')
                    return
                batch_id = batch_id + 1

            # save the model once each pass ends
            pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0
            sys.stderr.write(
                "%d pass end, cost time: %02f, avg_cost: %f\n" %
                (pass_id, time.time() - pass_start_time, pass_avg_cost))
            save_path = os.path.join(save_dirname,
                                     'pass_%04d-%f' % (pass_id, pass_avg_cost))
            fluid.io.save_inference_model(save_path,
                                          ['word_data', 'token_pos'],
                                          [feature_out],
                                          exe,
                                          params_filename='params')

        else:
            # pass times complete and the training is over
            save_path = os.path.join(save_dirname, 'final')
            fluid.io.save_inference_model(save_path,
                                          ['word_data', 'token_pos'],
                                          [feature_out],
                                          exe,
                                          params_filename='params')
        return

    train_loop(fluid.default_main_program())
Ejemplo n.º 21
0
    def get_pretraining_output(self, mask_label, mask_pos, labels):
        """Get the loss & accuracy for pretraining"""

        mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')

        # extract the first token feature in each sentence
        next_sent_feat = self.get_pooled_output()
        reshaped_emb_out = fluid.layers.reshape(x=self._enc_out,
                                                shape=[-1, self._emb_size])
        # extract masked tokens' feature
        mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)

        # transform: fc
        mask_trans_feat = fluid.layers.fc(
            input=mask_feat,
            size=self._emb_size,
            act=self._hidden_act,
            param_attr=fluid.ParamAttr(name=self.model_name +
                                       'mask_lm_trans_fc.w_0',
                                       initializer=self._param_initializer),
            bias_attr=fluid.ParamAttr(name=self.model_name +
                                      'mask_lm_trans_fc.b_0'))
        # transform: layer norm
        mask_trans_feat = pre_process_layer(mask_trans_feat,
                                            'n',
                                            name=self.model_name +
                                            'mask_lm_trans')

        mask_lm_out_bias_attr = fluid.ParamAttr(
            name=self.model_name + "mask_lm_out_fc.b_0",
            initializer=fluid.initializer.Constant(value=0.0))
        if self._weight_sharing:
            fc_out = fluid.layers.matmul(
                x=mask_trans_feat,
                y=fluid.default_main_program().global_block().var(
                    self._word_emb_name),
                transpose_y=True)
            fc_out += fluid.layers.create_parameter(shape=[self._voc_size],
                                                    dtype=self._dtype,
                                                    attr=mask_lm_out_bias_attr,
                                                    is_bias=True)

        else:
            fc_out = fluid.layers.fc(
                input=mask_trans_feat,
                size=self._voc_size,
                param_attr=fluid.ParamAttr(
                    name=self.model_name + "mask_lm_out_fc.w_0",
                    initializer=self._param_initializer),
                bias_attr=mask_lm_out_bias_attr)

        mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
            logits=fc_out, label=mask_label)
        mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss)

        next_sent_fc_out = fluid.layers.fc(
            input=next_sent_feat,
            size=2,
            param_attr=fluid.ParamAttr(name=self.model_name +
                                       "next_sent_fc.w_0",
                                       initializer=self._param_initializer),
            bias_attr=self.model_name + "next_sent_fc.b_0")

        next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy(
            logits=next_sent_fc_out, label=labels, return_softmax=True)

        next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax,
                                              label=labels)

        mean_next_sent_loss = fluid.layers.mean(next_sent_loss)

        loss = mean_next_sent_loss + mean_mask_lm_loss
        return next_sent_acc, mean_mask_lm_loss, loss
    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

        if place == None:
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)

        with fluid.dygraph.guard(place):
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            dy_param_init_value = {}
            for batch_id, data in enumerate(batch_py_reader()):
                if batch_id >= self.batch_num:
                    break

                img = data[0]
                label = data[1]
                label.stop_gradient = True

                img = fluid.layers.reshape(img, shape=[batch_size, -1])
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
                dy_out = avg_loss.numpy()

                if batch_id == 0:
                    for param in mlp.parameters():
                        dy_param_init_value[param.name] = param.numpy()

                avg_loss.backward()
                optimizer.minimize(avg_loss)
                mlp.clear_gradients()
                dy_param_value = {}
                for param in mlp.parameters():
                    dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            if place == None:
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)

            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

        if core.is_compiled_with_rocm():
            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
        else:
            self.assertTrue(np.allclose(static_out, dy_out))

        for key, value in six.iteritems(static_param_value):
            if core.is_compiled_with_rocm():
                self.assertTrue(
                    np.allclose(value, dy_param_value[key], atol=1e-3))
            else:
                self.assertTrue(np.allclose(value, dy_param_value[key]))
def main(use_cuda):
    """
    Advbox demo which demonstrate how to use advbox.
    """
    TOTAL_NUM = 500
    IMG_NAME = 'img'
    LABEL_NAME = 'label'

    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
    # gradient should flow
    img.stop_gradient = False
    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
    logits = mnist_cnn_model(img)
    cost = fluid.layers.cross_entropy(input=logits, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    #根据配置选择使用CPU资源还是GPU资源
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    BATCH_SIZE = 1

    test_reader = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.mnist.test(), buf_size=128 * 10),
                               batch_size=BATCH_SIZE)

    fluid.io.load_params(exe,
                         "./mnist/",
                         main_program=fluid.default_main_program())

    # advbox demo
    m = PaddleModel(fluid.default_main_program(),
                    IMG_NAME,
                    LABEL_NAME,
                    logits.name,
                    avg_cost.name, (-1, 1),
                    channel_axis=1)
    #使用静态FGSM epsilon不可变
    attack = FGSM_static(m)
    attack_config = {"epsilon": 0.01}

    # use test data to generate adversarial examples
    total_count = 0
    fooling_count = 0
    for data in test_reader():
        total_count += 1
        adversary = Adversary(data[0][0], data[0][1])

        # FGSM non-targeted attack
        adversary = attack(adversary, **attack_config)

        if adversary.is_successful():
            fooling_count += 1
            #print(
            #    'attack success, original_label=%d, adversarial_label=%d, count=%d'
            #    % (data[0][1], adversary.adversarial_label, total_count))
        else:
            logger.info('attack failed, original_label=%d, count=%d' %
                        (data[0][1], total_count))

        if total_count >= TOTAL_NUM:
            print(
                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
                % (fooling_count, total_count,
                   float(fooling_count) / total_count))
            break
    print("fgsm attack done without any defence")

    #使用FeatureFqueezingDefence

    # advbox FeatureFqueezingDefence demo

    n = PaddleLabelSmoothingDefenceModel(fluid.default_main_program(),
                                         IMG_NAME,
                                         LABEL_NAME,
                                         logits.name,
                                         avg_cost.name, (-1, 1),
                                         channel_axis=1,
                                         preprocess=None,
                                         smoothing=0.1)
    attack_new = FGSM_static(n)
    attack_config = {"epsilon": 0.01}

    total_count = 0
    fooling_count = 0
    for data in test_reader():
        total_count += 1

        #不设置y 会自动获取
        adversary = Adversary(data[0][0], None)

        # FGSM non-targeted attack
        adversary = attack_new(adversary, **attack_config)

        if adversary.is_successful():
            fooling_count += 1
            logger.info(
                'attack success, original_label=%d, adversarial_label=%d, count=%d'
                % (data[0][1], adversary.adversarial_label, total_count))
        else:
            logger.info('attack failed, original_label=%d, count=%d' %
                        (data[0][1], total_count))

        if total_count >= TOTAL_NUM:
            print(
                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
                % (fooling_count, total_count,
                   float(fooling_count) / total_count))
            break
    print("fgsm attack done with LabelSmoothingDefence")
Ejemplo n.º 24
0
    def testSetVariableBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=0.0,
                        beta1=0.8,
                        beta2=0.6,
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            adam.set_dict(self.opti_dict)
            ptb_model.set_dict(self.state_dict)

            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "global_step":
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]
                self.assertTrue(np.array_equal(new_t, base_t))
Ejemplo n.º 25
0
    def setUp(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                new_lr = 1.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            # check optimizer
            self.opti_dict = adam.state_dict()
            self.base_opti = {}
            for k, v in self.opti_dict.items():
                self.base_opti[v.name] = v.numpy()
                self.assertTrue(np.sum(np.abs(v.numpy())) != 0)

            fluid.save_dygraph(self.opti_dict, "./test_dy")

            self.state_dict = ptb_model.state_dict()

            self.model_base = {}
            for k, v in self.state_dict.items():
                np_t = v.numpy()
                self.model_base[k] = np_t

            fluid.save_dygraph(self.state_dict, "./test_dy")
Ejemplo n.º 26
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
    num_iterations = cfg.max_iter

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch
    model = model_builder.RCNN(
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
        use_random=False)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = [loss]

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
            warmup_iter=500,
            warmup_factor=1.0 / 3.0),
        regularization=fluid.regularizer.L2Decay(0.0001),
        momentum=0.9)
    optimizer.minimize(loss)

    fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))

        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu),
                                           loss_name=loss.name)

    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=False)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        train_reader = reader.train(batch_size=total_batch_size, shuffle=False)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def run(iterations):
        reader_time = []
        run_time = []
        total_images = 0

        for batch_id in range(iterations):
            start_time = time.time()
            data = next(train_reader())
            end_time = time.time()
            reader_time.append(end_time - start_time)
            start_time = time.time()
            if cfg.parallel:
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                     feed=feeder.feed(data))
            else:
                outs = exe.run(fluid.default_main_program(),
                               fetch_list=[v.name for v in fetch_list],
                               feed=feeder.feed(data))
            end_time = time.time()
            run_time.append(end_time - start_time)
            total_images += len(data)
            print("Batch {:d}, loss {:.6f} ".format(batch_id,
                                                    np.mean(outs[0])))
        return reader_time, run_time, total_images

    def run_pyreader(iterations):
        reader_time = [0]
        run_time = []
        total_images = 0

        py_reader.start()
        try:
            for batch_id in range(iterations):
                start_time = time.time()
                if cfg.parallel:
                    outs = train_exe.run(
                        fetch_list=[v.name for v in fetch_list])
                else:
                    outs = exe.run(fluid.default_main_program(),
                                   fetch_list=[v.name for v in fetch_list])
                end_time = time.time()
                run_time.append(end_time - start_time)
                total_images += devices_num
                print("Batch {:d}, loss {:.6f} ".format(
                    batch_id, np.mean(outs[0])))
        except fluid.core.EOFException:
            py_reader.reset()

        return reader_time, run_time, total_images

    run_func = run if not cfg.use_pyreader else run_pyreader

    # warm-up
    run_func(2)
    # profiling
    start = time.time()
    if cfg.use_profile:
        with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
            reader_time, run_time, total_images = run_func(num_iterations)
    else:
        reader_time, run_time, total_images = run_func(num_iterations)

    end = time.time()
    total_time = end - start
    print(
        "Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".
        format(total_time, np.sum(reader_time), np.sum(run_time),
               total_images / total_time))
Ejemplo n.º 27
0
    def test_accuracy(self):
        image = fluid.layers.data(
            name='image', shape=[1, 28, 28], dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = fluid.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
        ) else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        feeder = fluid.DataFeeder([image, label], place, program=main_prog)
        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=64)
        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)

        def train(program):
            iter = 0
            for data in train_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program, outputs=[avg_cost, acc_top1, acc_top5]):
            iter = 0
            result = [[], [], []]
            for data in train_reader():
                cost, top1, top5 = exe.run(program,
                                           feed=feeder.feed(data),
                                           fetch_list=outputs)
                iter += 1
                if iter % 100 == 0:
                    print(
                        'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(val_prog)
        fluid.io.save_inference_model(
            dirname='./test_quant_post',
            feeded_var_names=[image.name, label.name],
            target_vars=[avg_cost, acc_top1, acc_top5],
            main_program=val_prog,
            executor=exe,
            model_filename='model',
            params_filename='params')

        quant_post(
            exe,
            './test_quant_post',
            './test_quant_post_inference',
            paddle.dataset.mnist.test(),
            model_filename='model',
            params_filename='params',
            batch_nums=10)
        quant_post_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model(
            dirname='./test_quant_post_inference', executor=exe)
        top1_2, top5_2 = test(quant_post_prog, fetch_targets)
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
Ejemplo n.º 28
0
def train(use_cuda, params_dirname):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    print("Loading IMDB word dict....")
    word_dict = paddle.dataset.imdb.word_dict()

    print("Reading training data....")
    if args.enable_ce:
        train_reader = paddle.batch(paddle.dataset.imdb.train(word_dict),
                                    batch_size=BATCH_SIZE)
    else:
        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=25000),
                                    batch_size=BATCH_SIZE)

    print("Reading testing data....")
    test_reader = paddle.batch(paddle.dataset.imdb.test(word_dict),
                               batch_size=BATCH_SIZE)

    feed_order = ['words', 'label']
    pass_num = args.num_epochs

    main_program = fluid.default_main_program()
    star_program = fluid.default_startup_program()

    if args.enable_ce:
        main_program.random_seed = 90
        star_program.random_seed = 90

    prediction = inference_program(word_dict)
    train_func_outputs = train_program(prediction)
    avg_cost = train_func_outputs[0]

    test_program = main_program.clone(for_test=True)

    sgd_optimizer = optimizer_func()
    sgd_optimizer.minimize(avg_cost)
    exe = fluid.Executor(place)

    def train_test(program, reader):
        count = 0
        feed_var_list = [
            program.global_block().var(var_name) for var_name in feed_order
        ]
        feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place)
        test_exe = fluid.Executor(place)
        accumulated = len(train_func_outputs) * [0]
        for test_data in reader():
            avg_cost_np = test_exe.run(program=program,
                                       feed=feeder_test.feed(test_data),
                                       fetch_list=train_func_outputs)
            accumulated = [
                x[0] + x[1][0] for x in zip(accumulated, avg_cost_np)
            ]
            count += 1
        return [x / count for x in accumulated]

    def train_loop():

        feed_var_list_loop = [
            main_program.global_block().var(var_name)
            for var_name in feed_order
        ]
        feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)
        exe.run(fluid.default_startup_program())

        for epoch_id in range(pass_num):
            for step_id, data in enumerate(train_reader()):
                metrics = exe.run(
                    main_program,
                    feed=feeder.feed(data),
                    fetch_list=[var.name for var in train_func_outputs])
                if (step_id + 1) % 10 == 0:

                    avg_cost_test, acc_test = train_test(
                        test_program, test_reader)
                    print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
                        step_id, avg_cost_test, acc_test))

                    print("Step {0}, Epoch {1} Metrics {2}".format(
                        step_id, epoch_id, list(map(np.array, metrics))))
                if math.isnan(float(metrics[0])):
                    sys.exit("got NaN loss, training failed.")
            if params_dirname is not None:
                fluid.io.save_inference_model(params_dirname, ["words"],
                                              prediction, exe)
            if args.enable_ce and epoch_id == pass_num - 1:
                print("kpis\trnn_train_cost\t%f" % metrics[0])
                print("kpis\trnn_train_acc\t%f" % metrics[1])
                print("kpis\trnn_test_cost\t%f" % avg_cost_test)
                print("kpis\trnn_test_acc\t%f" % acc_test)

    train_loop()
    def testSetNumpyBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel("ptb_model",
                                 hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                # set lr to 0.0, not update parameter
                new_lr = 0.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                        beta1=0.8,
                        beta2=0.6)
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            np_opti_dict = {}
            np_state_dict = {}

            for k, v in self.opti_dict.items():
                np_opti_dict[v.name] = v.numpy()

            for k, v in self.state_dict.items():
                np_state_dict[v.name] = v.numpy()

            adam.set_dict(np_opti_dict)
            ptb_model.set_dict(np_state_dict)

            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "global_step":
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            # check parameter

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[v.name]
                self.assertTrue(np.array_equal(new_t, base_t))
Ejemplo n.º 30
0
def main():
    env = os.environ
    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    check_version()

    main_arch = cfg.architecture

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # build program
    model = create(main_arch)
    inputs_def = cfg['TrainReader']['inputs_def']
    train_feed_vars, train_loader = model.build_inputs(**inputs_def)
    train_fetches = model.train(train_feed_vars)
    loss = train_fetches['loss']

    start_iter = 0
    train_reader = create_reader(cfg.TrainReader,
                                 (cfg.max_iters - start_iter) * devices_num,
                                 cfg)
    # When iterable mode, set set_sample_list_generator(train_reader, place)
    train_loader.set_sample_list_generator(train_reader)

    # get all student variables
    student_vars = []
    for v in fluid.default_main_program().list_vars():
        try:
            student_vars.append((v.name, v.shape))
        except:
            pass
    # uncomment the following lines to print all student variables
    # print("="*50 + "student_model_vars" + "="*50)
    # print(student_vars)

    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            model = create(main_arch)
            inputs_def = cfg['EvalReader']['inputs_def']
            test_feed_vars, eval_loader = model.build_inputs(**inputs_def)
            fetches = model.eval(test_feed_vars)
    eval_prog = eval_prog.clone(True)

    eval_reader = create_reader(cfg.EvalReader)
    # When iterable mode, set set_sample_list_generator(eval_reader, place)
    eval_loader.set_sample_list_generator(eval_reader)

    # parse eval fetches
    extra_keys = []
    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
    eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                     extra_keys)

    teacher_cfg = load_config(FLAGS.teacher_config)
    merge_config(FLAGS.opt)
    teacher_arch = teacher_cfg.architecture
    teacher_program = fluid.Program()
    teacher_startup_program = fluid.Program()

    with fluid.program_guard(teacher_program, teacher_startup_program):
        with fluid.unique_name.guard():
            teacher_feed_vars = OrderedDict()
            for name, var in train_feed_vars.items():
                teacher_feed_vars[name] = teacher_program.global_block(
                )._clone_variable(var, force_persistable=False)
            model = create(teacher_arch)
            train_fetches = model.train(teacher_feed_vars)
            teacher_loss = train_fetches['loss']

    # get all teacher variables
    teacher_vars = []
    for v in teacher_program.list_vars():
        try:
            teacher_vars.append((v.name, v.shape))
        except:
            pass
    # uncomment the following lines to print all teacher variables
    # print("="*50 + "teacher_model_vars" + "="*50)
    # print(teacher_vars)

    exe.run(teacher_startup_program)
    assert FLAGS.teacher_pretrained, "teacher_pretrained should be set"
    checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained)
    teacher_program = teacher_program.clone(for_test=True)

    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    data_name_map = {
        'target0': 'target0',
        'target1': 'target1',
        'target2': 'target2',
        'image': 'image',
        'gt_bbox': 'gt_bbox',
        'gt_class': 'gt_class',
        'gt_score': 'gt_score'
    }
    merge(teacher_program, fluid.default_main_program(), data_name_map, place)

    yolo_output_names = [
        'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0',
        'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0',
        'strided_slice_4.tmp_0', 'transpose_0.tmp_0', 'strided_slice_5.tmp_0',
        'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0',
        'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0',
        'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0',
        'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0',
        'strided_slice_14.tmp_0', 'transpose_4.tmp_0'
    ]

    distill_pairs = [['teacher_conv2d_6.tmp_1', 'conv2d_20.tmp_1'],
                     ['teacher_conv2d_14.tmp_1', 'conv2d_28.tmp_1'],
                     ['teacher_conv2d_22.tmp_1', 'conv2d_36.tmp_1']]

    distill_loss = l2_distill(
        distill_pairs,
        100) if not cfg.use_fine_grained_loss else split_distill(
            yolo_output_names, 1000)
    loss = distill_loss + loss
    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')
    lr = lr_builder()
    opt = optim_builder(lr)
    opt.minimize(loss)

    exe.run(fluid.default_startup_program())
    fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'
    ignore_params = cfg.finetune_exclude_pretrained_params \
                 if 'finetune_exclude_pretrained_params' in cfg else []
    if FLAGS.resume_checkpoint:
        checkpoint.load_checkpoint(exe, fluid.default_main_program(),
                                   FLAGS.resume_checkpoint)
        start_iter = checkpoint.global_step()
    elif cfg.pretrain_weights and fuse_bn and not ignore_params:
        checkpoint.load_and_fusebn(exe, fluid.default_main_program(),
                                   cfg.pretrain_weights)
    elif cfg.pretrain_weights:
        checkpoint.load_params(exe,
                               fluid.default_main_program(),
                               cfg.pretrain_weights,
                               ignore_params=ignore_params)

    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.fuse_all_optimizer_ops = False
    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1

    parallel_main = fluid.CompiledProgram(
        fluid.default_main_program()).with_data_parallel(
            loss_name=loss.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)

    compiled_eval_prog = fluid.CompiledProgram(eval_prog)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
    best_box_ap_list = [0.0, 0]  #[map, iter]
    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)

    train_loader.start()
    for step_id in range(start_iter, cfg.max_iters):
        teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run(
            parallel_main,
            fetch_list=[
                'teacher_' + teacher_loss.name, distill_loss.name, loss.name,
                lr.name
            ])
        if step_id % cfg.log_iter == 0:
            logger.info(
                "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}"
                .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0],
                        teacher_loss_np[0]))
        if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1:
            save_name = str(
                step_id) if step_id != cfg.max_iters - 1 else "model_final"
            checkpoint.save(exe, fluid.default_main_program(),
                            os.path.join(save_dir, save_name))
            if FLAGS.save_inference:
                feeded_var_names = ['image', 'im_size']
                targets = list(fetches.values())
                fluid.io.save_inference_model(save_dir + '/infer',
                                              feeded_var_names, targets, exe,
                                              eval_prog)
            # eval
            results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys,
                               eval_values, eval_cls, cfg)
            resolution = None
            box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes,
                                        resolution, is_bbox_normalized,
                                        FLAGS.output_eval, map_type,
                                        cfg['EvalReader']['dataset'])

            if box_ap_stats[0] > best_box_ap_list[0]:
                best_box_ap_list[0] = box_ap_stats[0]
                best_box_ap_list[1] = step_id
                checkpoint.save(exe, fluid.default_main_program(),
                                os.path.join(save_dir, "best_model"))
                if FLAGS.save_inference:
                    feeded_var_names = ['image', 'im_size']
                    targets = list(fetches.values())
                    fluid.io.save_inference_model(save_dir + '/infer',
                                                  feeded_var_names, targets,
                                                  exe, eval_prog)
            logger.info("Best test box ap: {}, in step: {}".format(
                best_box_ap_list[0], best_box_ap_list[1]))
    train_loader.reset()
Ejemplo n.º 31
0
def do_train(args):
    device = set_device("gpu" if args.use_cuda else "cpu")
    fluid.enable_dygraph(device) if args.eager_run else None

    # set seed for CE
    random_seed = eval(str(args.random_seed))
    if random_seed is not None:
        fluid.default_main_program().random_seed = random_seed
        fluid.default_startup_program().random_seed = random_seed

    # define inputs
    inputs = [
        Input([None, None], "int64", name="src_word"),
        Input([None, None], "int64", name="src_pos"),
        Input([None, args.n_head, None, None],
              "float32",
              name="src_slf_attn_bias"),
        Input([None, None], "int64", name="trg_word"),
        Input([None, None], "int64", name="trg_pos"),
        Input([None, args.n_head, None, None],
              "float32",
              name="trg_slf_attn_bias"),
        Input([None, args.n_head, None, None],
              "float32",
              name="trg_src_attn_bias"),
    ]
    labels = [
        Input([None, 1], "int64", name="label"),
        Input([None, 1], "float32", name="weight"),
    ]

    # def dataloader
    train_loader, eval_loader = create_data_loader(args, device)

    # define model
    transformer = Transformer(args.src_vocab_size, args.trg_vocab_size,
                              args.max_length + 1, args.n_layer, args.n_head,
                              args.d_key, args.d_value, args.d_model,
                              args.d_inner_hid, args.prepostprocess_dropout,
                              args.attention_dropout, args.relu_dropout,
                              args.preprocess_cmd, args.postprocess_cmd,
                              args.weight_sharing, args.bos_idx, args.eos_idx)

    transformer.prepare(fluid.optimizer.Adam(
        learning_rate=fluid.layers.noam_decay(
            args.d_model, args.warmup_steps, learning_rate=args.learning_rate),
        beta1=args.beta1,
        beta2=args.beta2,
        epsilon=float(args.eps),
        parameter_list=transformer.parameters()),
                        CrossEntropyCriterion(args.label_smooth_eps),
                        inputs=inputs,
                        labels=labels)

    ## init from some checkpoint, to resume the previous training
    if args.init_from_checkpoint:
        transformer.load(args.init_from_checkpoint)
    ## init from some pretrain models, to better solve the current task
    if args.init_from_pretrain_model:
        transformer.load(args.init_from_pretrain_model, reset_optimizer=True)

    # model train
    transformer.fit(train_data=train_loader,
                    eval_data=eval_loader,
                    epochs=args.epoch,
                    eval_freq=1,
                    save_freq=1,
                    save_dir=args.save_model,
                    callbacks=[TrainCallback(args)])
Ejemplo n.º 32
0
def split_distill(split_output_names, weight):
    """
    Add fine grained distillation losses.
    Each loss is composed by distill_reg_loss, distill_cls_loss and
    distill_obj_loss
    """
    student_var = []
    for name in split_output_names:
        student_var.append(
            fluid.default_main_program().global_block().var(name))
    s_x0, s_y0, s_w0, s_h0, s_obj0, s_cls0 = student_var[0:6]
    s_x1, s_y1, s_w1, s_h1, s_obj1, s_cls1 = student_var[6:12]
    s_x2, s_y2, s_w2, s_h2, s_obj2, s_cls2 = student_var[12:18]
    teacher_var = []
    for name in split_output_names:
        teacher_var.append(
            fluid.default_main_program().global_block().var('teacher_' + name))
    t_x0, t_y0, t_w0, t_h0, t_obj0, t_cls0 = teacher_var[0:6]
    t_x1, t_y1, t_w1, t_h1, t_obj1, t_cls1 = teacher_var[6:12]
    t_x2, t_y2, t_w2, t_h2, t_obj2, t_cls2 = teacher_var[12:18]

    def obj_weighted_reg(sx, sy, sw, sh, tx, ty, tw, th, tobj):
        loss_x = fluid.layers.sigmoid_cross_entropy_with_logits(
            sx, fluid.layers.sigmoid(tx))
        loss_y = fluid.layers.sigmoid_cross_entropy_with_logits(
            sy, fluid.layers.sigmoid(ty))
        loss_w = fluid.layers.abs(sw - tw)
        loss_h = fluid.layers.abs(sh - th)
        loss = fluid.layers.sum([loss_x, loss_y, loss_w, loss_h])
        weighted_loss = fluid.layers.reduce_mean(loss *
                                                 fluid.layers.sigmoid(tobj))
        return weighted_loss

    def obj_weighted_cls(scls, tcls, tobj):
        loss = fluid.layers.sigmoid_cross_entropy_with_logits(
            scls, fluid.layers.sigmoid(tcls))
        weighted_loss = fluid.layers.reduce_mean(
            fluid.layers.elementwise_mul(loss,
                                         fluid.layers.sigmoid(tobj),
                                         axis=0))
        return weighted_loss

    def obj_loss(sobj, tobj):
        obj_mask = fluid.layers.cast(tobj > 0., dtype="float32")
        obj_mask.stop_gradient = True
        loss = fluid.layers.reduce_mean(
            fluid.layers.sigmoid_cross_entropy_with_logits(sobj, obj_mask))
        return loss

    distill_reg_loss0 = obj_weighted_reg(s_x0, s_y0, s_w0, s_h0, t_x0, t_y0,
                                         t_w0, t_h0, t_obj0)
    distill_reg_loss1 = obj_weighted_reg(s_x1, s_y1, s_w1, s_h1, t_x1, t_y1,
                                         t_w1, t_h1, t_obj1)
    distill_reg_loss2 = obj_weighted_reg(s_x2, s_y2, s_w2, s_h2, t_x2, t_y2,
                                         t_w2, t_h2, t_obj2)
    distill_reg_loss = fluid.layers.sum(
        [distill_reg_loss0, distill_reg_loss1, distill_reg_loss2])

    distill_cls_loss0 = obj_weighted_cls(s_cls0, t_cls0, t_obj0)
    distill_cls_loss1 = obj_weighted_cls(s_cls1, t_cls1, t_obj1)
    distill_cls_loss2 = obj_weighted_cls(s_cls2, t_cls2, t_obj2)
    distill_cls_loss = fluid.layers.sum(
        [distill_cls_loss0, distill_cls_loss1, distill_cls_loss2])

    distill_obj_loss0 = obj_loss(s_obj0, t_obj0)
    distill_obj_loss1 = obj_loss(s_obj1, t_obj1)
    distill_obj_loss2 = obj_loss(s_obj2, t_obj2)
    distill_obj_loss = fluid.layers.sum(
        [distill_obj_loss0, distill_obj_loss1, distill_obj_loss2])
    loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss) * weight
    return loss
Ejemplo n.º 33
0
    def test_deefcf(self):
        seed = 90
        if DATA_PATH:
            (users_np, items_np, labels_np, num_users, num_items,
             matrix) = load_data(DATA_PATH)
        else:
            (users_np, items_np, labels_np, num_users, num_items,
             matrix) = get_data()
        paddle.seed(seed)
        paddle.framework.random._manual_program_seed(seed)
        startup = fluid.Program()
        main = fluid.Program()

        scope = fluid.core.Scope()
        with new_program_scope(main=main, startup=startup, scope=scope):
            users = fluid.layers.data('users', [1], dtype='int32')
            items = fluid.layers.data('items', [1], dtype='int32')
            labels = fluid.layers.data('labels', [1], dtype='float32')

            deepcf = DeepCF(num_users, num_items, matrix)
            prediction = deepcf(users, items)
            loss = fluid.layers.reduce_sum(
                fluid.layers.log_loss(prediction, labels))
            adam = fluid.optimizer.AdamOptimizer(0.01)
            adam.minimize(loss)

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(startup)
            for e in range(NUM_EPOCHES):
                sys.stderr.write('epoch %d\n' % e)
                for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE):
                    if slice + BATCH_SIZE >= users_np.shape[0]:
                        break
                    static_loss = exe.run(
                        main,
                        feed={
                            users.name: users_np[slice:slice + BATCH_SIZE],
                            items.name: items_np[slice:slice + BATCH_SIZE],
                            labels.name: labels_np[slice:slice + BATCH_SIZE]
                        },
                        fetch_list=[loss])[0]
                    sys.stderr.write('static loss %s\n' % static_loss)

        with fluid.dygraph.guard():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            deepcf = DeepCF(num_users, num_items, matrix)
            adam = fluid.optimizer.AdamOptimizer(
                0.01, parameter_list=deepcf.parameters())
            for e in range(NUM_EPOCHES):
                sys.stderr.write('epoch %d\n' % e)
                for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE):
                    if slice + BATCH_SIZE >= users_np.shape[0]:
                        break
                    prediction = deepcf(
                        to_variable(users_np[slice:slice + BATCH_SIZE]),
                        to_variable(items_np[slice:slice + BATCH_SIZE]))
                    loss = fluid.layers.reduce_sum(
                        fluid.layers.log_loss(
                            prediction,
                            to_variable(labels_np[slice:slice + BATCH_SIZE])))
                    loss.backward()
                    adam.minimize(loss)
                    deepcf.clear_gradients()
                    dy_loss = loss.numpy()
                    sys.stderr.write('dynamic loss: %s %s\n' %
                                     (slice, dy_loss))

        with fluid.dygraph.guard():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            deepcf2 = DeepCF(num_users, num_items, matrix)
            adam2 = fluid.optimizer.AdamOptimizer(
                0.01, parameter_list=deepcf2.parameters())
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            for e in range(NUM_EPOCHES):
                sys.stderr.write('epoch %d\n' % e)
                for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE):
                    if slice + BATCH_SIZE >= users_np.shape[0]:
                        break
                    prediction2 = deepcf2(
                        to_variable(users_np[slice:slice + BATCH_SIZE]),
                        to_variable(items_np[slice:slice + BATCH_SIZE]))
                    loss2 = fluid.layers.reduce_sum(
                        fluid.layers.log_loss(
                            prediction2,
                            to_variable(labels_np[slice:slice + BATCH_SIZE])))
                    loss2.backward()
                    adam2.minimize(loss2)
                    deepcf2.clear_gradients()
                    dy_loss2 = loss2.numpy()
                    sys.stderr.write('dynamic loss: %s %s\n' %
                                     (slice, dy_loss2))

        with fluid.dygraph.guard():
            with _test_eager_guard():
                paddle.seed(seed)
                paddle.framework.random._manual_program_seed(seed)
                fluid.default_startup_program().random_seed = seed
                fluid.default_main_program().random_seed = seed

                deepcf = DeepCF(num_users, num_items, matrix)
                adam = fluid.optimizer.AdamOptimizer(
                    0.01, parameter_list=deepcf.parameters())

                for e in range(NUM_EPOCHES):
                    sys.stderr.write('epoch %d\n' % e)
                    for slice in range(0, BATCH_SIZE * NUM_BATCHES,
                                       BATCH_SIZE):
                        if slice + BATCH_SIZE >= users_np.shape[0]:
                            break
                        prediction = deepcf(
                            to_variable(users_np[slice:slice + BATCH_SIZE]),
                            to_variable(items_np[slice:slice + BATCH_SIZE]))
                        loss = fluid.layers.reduce_sum(
                            fluid.layers.log_loss(
                                prediction,
                                to_variable(labels_np[slice:slice +
                                                      BATCH_SIZE])))
                        loss.backward()
                        adam.minimize(loss)
                        deepcf.clear_gradients()
                        eager_loss = loss.numpy()
                        sys.stderr.write('eager loss: %s %s\n' %
                                         (slice, eager_loss))

        self.assertEqual(static_loss, dy_loss)
        self.assertEqual(static_loss, dy_loss2)
        self.assertEqual(static_loss, eager_loss)
Ejemplo n.º 34
0
def train(word_dict,
          net_method,
          use_cuda,
          seed,
          quality,
          save_dirname=None):
    BATCH_SIZE = 128
    PASS_NUM = 100
    dict_dim = len(word_dict)
    class_dim = 2
    target_val_acc = quality

    # Seed for batch producer
    random.seed(seed) 
    
    # Seed for weight initialization
    fluid.default_startup_program().random_seed = seed

    # Setup input features and label as data layers
    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1)
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")

    cost, acc_out, prediction = net_method(
        data, label, input_dim=dict_dim, class_dim=class_dim)

    # Initialize a test program for obtaining test accuracy and cost
    test_program = fluid.default_main_program().clone(for_test=True)

    # Setup Adam optimizer
    adam = fluid.optimizer.Adam(learning_rate=0.0005) #Learning rate of 5e-4 works for conv models and 2e-3 for LSTM model

    optimize_ops, params_grads = adam.minimize(cost)

    # Create reader to iterate over training set
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=25000),
        batch_size=BATCH_SIZE)

    # Setup place and executor for runtime
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
    
    # Create reader to iterate over validation set
    test_reader = paddle.batch(
                    paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in xrange(PASS_NUM):
            train_loss_set = []
            train_acc_set = []  
   
            # Calculate average training loss and accuracy
            # across all mini-batches in the training set
            for batch_id, data in enumerate(train_reader()):
                cost_val, acc_val = exe.run(main_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[cost, acc_out])
		train_loss_set.append(float(cost_val))
		train_acc_set.append(float(acc_val)) 
	    train_loss = np.array(train_loss_set).mean()
            train_acc = np.array(train_acc_set).mean() * 100

            # Calculate average valication loss and accuracy 
            # across all mini-batches in the validation set
            acc_set = []
            avg_loss_set = []
            for tid, test_data in enumerate(test_reader()):
                avg_loss_np, acc_np = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[cost, acc_out])
                acc_set.append(float(acc_np))
                avg_loss_set.append(float(avg_loss_np))
            acc_val = np.array(acc_set).mean() * 100 
            avg_loss_val = np.array(avg_loss_set).mean()
            print("Epoch =", pass_id, ", train-accuracy =", train_acc, ", train-loss =", train_loss, ", validation-accuracy =", acc_val, ", validation-loss =", avg_loss_val)

            if acc_val > target_val_acc:
                ## Exit the program on reaching desired accuracy value
                break

    train_loop(fluid.default_main_program())
Ejemplo n.º 35
0
def train(place, save_dirname):
    if args.data_set == "cifar10":
        class_dim = 10
        data_shape = [3, 32, 32]
    elif args.data_set == "imagenet":
        class_dim = 102
        data_shape = [3, 224, 224]
    else:
        raise ValueError("%s dataset is not supported" % data_set)

    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if args.model == "vgg":
        print("train vgg")
        net = vgg16(images)
    elif args.model == "resnet":
        print("train resnet")
        if args.data_set == "cifar10":
            net = resnet_cifar10(images)
        elif args.data_set == "imagenet":
            net = resnet_imagenet(images)
        else:
            raise ValueError("%s dataset is not supported" % args.data_set)
    else:
        raise ValueError("%s network is not supported" % args.model)

    predict = fluid.layers.fc(input=net, size=class_dim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc = fluid.layers.accuracy(input=predict, label=label)

    #Test program
    test_program = fluid.default_main_program().clone(for_test=True)
    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
    optimizer.minimize(avg_cost)

    BATCH_SIZE = args.train_batch_size
    PASS_NUM = 100

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.flowers.train()
            if args.data_set == 'imagenet' else paddle.dataset.cifar.train10(),
            buf_size=128 * 10),
        batch_size=args.train_batch_size)

    test_reader = paddle.batch(
        paddle.dataset.flowers.test()
        if args.data_set == 'imagenet' else paddle.dataset.cifar.test10(),
        batch_size=args.inf_batch_size)

    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    exe.run(fluid.default_startup_program())
    main_program = fluid.default_main_program()

    for pass_id in range(PASS_NUM):
        for batch_id, data in enumerate(train_reader()):
            train_image = np.array(
                map(lambda x: x[0].reshape(data_shape), data)).astype("float32")
            train_label = np.array(map(lambda x: x[1], data)).astype("int64")
            train_label = train_label.reshape([-1, 1])

            exe.run(main_program,
                    feed={'pixel': train_image,
                          'label': train_label})

            if (batch_id % 100) == 0:
                acc_list = []
                avg_loss_list = []
                for tid, test_data in enumerate(test_reader()):
                    test_image = np.array(
                        map(lambda x: x[0].reshape(data_shape),
                            test_data)).astype("float32")
                    test_label = np.array(map(lambda x: x[1],
                                              test_data)).astype("int64")
                    test_label = test_label.reshape([-1, 1])

                    loss_t, acc_t = exe.run(
                        program=test_program,
                        feed={"pixel": test_image,
                              "label": test_label},
                        fetch_list=[avg_cost, acc])
                    if math.isnan(float(loss_t)):
                        sys.exit("got NaN loss, training failed.")
                    acc_list.append(float(acc_t))
                    avg_loss_list.append(float(loss_t))

                acc_value = np.array(acc_list).mean()
                avg_loss_value = np.array(avg_loss_list).mean()

                print(
                    'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Accuracy {3:2.2}'.
                    format(pass_id, batch_id + 1,
                           float(avg_loss_value), float(acc_value)))

                if acc_value > args.threshold:
                    print(
                        'Save inference model with test accuracy of {0} at {1}'.
                        format(float(acc_value), save_dirname))
                    fluid.io.save_inference_model(save_dirname, ["pixel"],
                                                  [predict], exe)
                    return
    def test_transformer_sort_gradient_float32(self):
        seed = 90

        with guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True
            transformer = TransFormer('transformer',
                                      ModelHyperParams.src_vocab_size,
                                      ModelHyperParams.trg_vocab_size,
                                      ModelHyperParams.max_length + 1,
                                      ModelHyperParams.n_layer,
                                      ModelHyperParams.n_head,
                                      ModelHyperParams.d_key,
                                      ModelHyperParams.d_value,
                                      ModelHyperParams.d_model,
                                      ModelHyperParams.d_inner_hid,
                                      ModelHyperParams.prepostprocess_dropout,
                                      ModelHyperParams.attention_dropout,
                                      ModelHyperParams.relu_dropout,
                                      ModelHyperParams.preprocess_cmd,
                                      ModelHyperParams.postprocess_cmd,
                                      ModelHyperParams.weight_sharing,
                                      TrainTaskConfig.label_smooth_eps,
                                      use_py_reader=use_py_reader,
                                      is_test=False)
            if sync:
                lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
                    ModelHyperParams.d_model, TrainTaskConfig.warmup_steps)
                with fluid.default_main_program()._lr_schedule_guard():
                    learning_rate = lr_decay * TrainTaskConfig.learning_rate
                optimizer = fluid.optimizer.Adam(learning_rate=learning_rate,
                                                 beta1=TrainTaskConfig.beta1,
                                                 beta2=TrainTaskConfig.beta2,
                                                 epsilon=TrainTaskConfig.eps)
            else:
                optimizer = fluid.optimizer.SGD(learning_rate=0.003)
            dy_param_init = dict()
            dy_param_updated = dict()
            for i in range(batch_num):
                enc_inputs, dec_inputs, label, weights = create_data()
                dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
                    enc_inputs, dec_inputs, label, weights)

                if i == 0:
                    for param in transformer.parameters():
                        dy_param_init[param.name] = param.numpy()

                dy_avg_cost.backward(backward_strategy)
                optimizer.minimize(dy_avg_cost)
                transformer.clear_gradients()

                if i == batch_num - 1:
                    for param in transformer.parameters():
                        dy_param_updated[param.name] = param.numpy()

            dy_avg_cost_value = dy_avg_cost.numpy()
            dy_sum_cost_value = dy_sum_cost.numpy()
            dy_predict_value = dy_predict.numpy()
            dy_token_num_value = dy_token_num.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            transformer = TransFormer('transformer',
                                      ModelHyperParams.src_vocab_size,
                                      ModelHyperParams.trg_vocab_size,
                                      ModelHyperParams.max_length + 1,
                                      ModelHyperParams.n_layer,
                                      ModelHyperParams.n_head,
                                      ModelHyperParams.d_key,
                                      ModelHyperParams.d_value,
                                      ModelHyperParams.d_model,
                                      ModelHyperParams.d_inner_hid,
                                      ModelHyperParams.prepostprocess_dropout,
                                      ModelHyperParams.attention_dropout,
                                      ModelHyperParams.relu_dropout,
                                      ModelHyperParams.preprocess_cmd,
                                      ModelHyperParams.postprocess_cmd,
                                      ModelHyperParams.weight_sharing,
                                      TrainTaskConfig.label_smooth_eps,
                                      use_py_reader=use_py_reader,
                                      is_test=False)
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            optimizer = fluid.optimizer.SGD(learning_rate=0.003)

            data_input_names = encoder_data_input_fields + decoder_data_input_fields[:
                                                                                     -1] + label_data_input_fields
            all_inputs = make_all_inputs(data_input_names)
            enc_inputs_len = len(encoder_data_input_fields)
            dec_inputs_len = len(decoder_data_input_fields[:-1])
            enc_inputs = all_inputs[0:enc_inputs_len]
            dec_inputs = all_inputs[enc_inputs_len:enc_inputs_len +
                                    dec_inputs_len]
            label = all_inputs[-2]
            weights = all_inputs[-1]
            static_param_updated = dict()
            static_param_init = dict()
            static_param_name_list = list()
            static_sum_cost, static_avg_cost, static_predict, static_token_num = transformer(
                enc_inputs, dec_inputs, label, weights)
            optimizer.minimize(static_avg_cost)
            for param in transformer.parameters():
                static_param_name_list.append(param.name)
            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)
            for i in range(len(static_param_name_list)):
                static_param_init[static_param_name_list[i]] = out[i]
            static_sum_cost_value = None
            static_avg_cost_value = None
            static_predict_value = None
            static_token_num_value = None
            for i in range(batch_num):
                feed_dict = create_feed_dict_list(create_data(True))
                fetch_list = [
                    static_sum_cost, static_avg_cost, static_predict,
                    static_token_num
                ]

                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed=feed_dict,
                              fetch_list=fetch_list)
                static_sum_cost_value = out[0]
                static_avg_cost_value = out[1]
                static_predict_value = out[2]
                static_token_num_value = out[3]
                if i == batch_num - 1:
                    for k in range(4, len(out)):
                        static_param_updated[static_param_name_list[
                            k - 4]] = out[k]

        self.assertTrue(
            np.array_equal(static_avg_cost_value, dy_avg_cost_value))
        self.assertTrue(
            np.array_equal(static_sum_cost_value, dy_sum_cost_value))
        self.assertTrue(np.array_equal(static_predict_value, dy_predict_value))
        self.assertTrue(
            np.array_equal(static_token_num_value, dy_token_num_value))

        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):
            self.assertTrue(np.array_equal(value, dy_param_updated[key]))
Ejemplo n.º 37
0
def train(nn_type,
          use_cuda,
          parallel,
          save_dirname=None,
          model_filename=None,
          params_filename=None,
          is_local=True):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if nn_type == 'mlp':
        net_conf = mlp
    else:
        net_conf = conv_net

    if parallel:
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            img_ = pd.read_input(img)
            label_ = pd.read_input(label)
            prediction, avg_loss, acc = net_conf(img_, label_)
            for o in [avg_loss, acc]:
                pd.write_output(o)

        avg_loss, acc = pd()
        # get mean loss and acc through every devices.
        avg_loss = fluid.layers.mean(avg_loss)
        acc = fluid.layers.mean(acc)
    else:
        prediction, avg_loss, acc = net_conf(img, label)

    test_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(avg_loss)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    exe = fluid.Executor(place)

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=500),
        batch_size=BATCH_SIZE)
    test_reader = paddle.batch(
        paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                # train a mini-batch, fetch nothing
                exe.run(main_program, feed=feeder.feed(data))
                if (batch_id + 1) % 10 == 0:
                    acc_set = []
                    avg_loss_set = []
                    for test_data in test_reader():
                        acc_np, avg_loss_np = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[acc, avg_loss])
                        acc_set.append(float(acc_np))
                        avg_loss_set.append(float(avg_loss_np))
                    # get test acc and loss
                    acc_val = numpy.array(acc_set).mean()
                    avg_loss_val = numpy.array(avg_loss_set).mean()
                    if float(acc_val
                             ) > 0.2:  # Smaller value to increase CI speed
                        if save_dirname is not None:
                            fluid.io.save_inference_model(
                                save_dirname, ["img"], [prediction],
                                exe,
                                model_filename=model_filename,
                                params_filename=params_filename)
                        return
                    else:
                        print(
                            'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
                            format(pass_id, batch_id + 1,
                                   float(avg_loss_val), float(acc_val)))
                        if math.isnan(float(avg_loss_val)):
                            sys.exit("got NaN loss, training failed.")
        raise AssertionError("Loss of recognize digits is too large")

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Ejemplo n.º 38
0
def compress(args):
    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        _logger.info("Load pretrained model from {}".format(
            args.pretrained_model))
        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.batch(train_reader,
                                batch_size=args.batch_size,
                                drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))

    def train(epoch, program):

        build_strategy = fluid.BuildStrategy()
        exec_strategy = fluid.ExecutionStrategy()
        train_program = fluid.compiler.CompiledProgram(
            program).with_data_parallel(loss_name=avg_cost.name,
                                        build_strategy=build_strategy,
                                        exec_strategy=exec_strategy)

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                train_program,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    test(0, val_program)

    params = get_pruned_params(args, fluid.default_main_program())
    _logger.info("FLOPs before pruning: {}".format(
        flops(fluid.default_main_program())))
    pruner = Pruner(args.criterion)
    pruned_val_program, _, _ = pruner.prune(val_program,
                                            fluid.global_scope(),
                                            params=params,
                                            ratios=[args.pruned_ratio] *
                                            len(params),
                                            place=place,
                                            only_graph=True)

    pruned_program, _, _ = pruner.prune(fluid.default_main_program(),
                                        fluid.global_scope(),
                                        params=params,
                                        ratios=[args.pruned_ratio] *
                                        len(params),
                                        place=place)
    _logger.info("FLOPs after pruning: {}".format(flops(pruned_program)))
    for i in range(args.num_epochs):
        train(i, pruned_program)
        if i % args.test_period == 0:
            test(i, pruned_val_program)
            save_model(exe, pruned_val_program,
                       os.path.join(args.model_path, str(i)))
        if args.save_inference:
            infer_model_path = os.path.join(args.model_path, "infer_models",
                                            str(i))
            fluid.io.save_inference_model(infer_model_path, ["image"], [out],
                                          exe, pruned_val_program)
            _logger.info(
                "Saved inference model into [{}]".format(infer_model_path))
Ejemplo n.º 39
0
def get_model(args):
    lstm_size = 512
    emb_dim = 512
    crop_size = 1500

    data = fluid.layers.data(
        name="words", shape=[1], lod_level=1, dtype='int64')
    sentence = fluid.layers.embedding(
        input=data, size=[len(word_dict), emb_dim])

    sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        word = rnn.step_input(sentence)
        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])

        def gate_common(
                ipt,
                hidden,
                size, ):
            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
            gate = fluid.layers.sums(input=[gate0, gate1])
            return gate

        forget_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        input_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        output_gate = fluid.layers.sigmoid(
            x=gate_common(word, prev_hidden, lstm_size))
        cell_gate = fluid.layers.tanh(
            x=gate_common(word, prev_hidden, lstm_size))

        cell = fluid.layers.sums(input=[
            fluid.layers.elementwise_mul(
                x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(
                    x=input_gate, y=cell_gate)
        ])

        hidden = fluid.layers.elementwise_mul(
            x=output_gate, y=fluid.layers.tanh(x=cell))

        rnn.update_memory(prev_cell, cell)
        rnn.update_memory(prev_hidden, hidden)
        rnn.output(hidden)

    last = fluid.layers.sequence_pool(rnn(), 'last')
    logit = fluid.layers.fc(input=last, size=2, act='softmax')
    loss = fluid.layers.cross_entropy(
        input=logit,
        label=fluid.layers.data(
            name='label', shape=[1], dtype='int64'))
    loss = fluid.layers.mean(x=loss)

    # add acc
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \
                shape=[1], dtype='int64'), total=batch_size_tensor)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    adam = fluid.optimizer.Adam()

    train_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)
    test_reader = batch(
        paddle.reader.shuffle(
            crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
        batch_size=args.batch_size)

    return loss, inference_program, adam, train_reader, test_reader, batch_acc
Ejemplo n.º 40
0
def train(use_cuda, save_dirname, is_local=True):
    scale_infer, avg_cost = model()

    # test program
    test_program = fluid.default_main_program().clone(for_test=True)

    sgd_optimizer = SGDOptimizer(learning_rate=0.2)
    sgd_optimizer.minimize(avg_cost)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    exe = Executor(place)

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.movielens.train(), buf_size=8192),
        batch_size=BATCH_SIZE)
    test_reader = paddle.batch(
        paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)

    feed_order = [
        'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id',
        'movie_title', 'score'
    ]

    def train_loop(main_program):
        exe.run(framework.default_startup_program())

        feed_list = [
            main_program.global_block().var(var_name) for var_name in feed_order
        ]
        feeder = fluid.DataFeeder(feed_list, place)

        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                # train a mini-batch
                outs = exe.run(program=main_program,
                               feed=feeder.feed(data),
                               fetch_list=[avg_cost])
                out = np.array(outs[0])
                if (batch_id + 1) % 10 == 0:
                    avg_cost_set = []
                    for test_data in test_reader():
                        avg_cost_np = exe.run(program=test_program,
                                              feed=feeder.feed(test_data),
                                              fetch_list=[avg_cost])
                        avg_cost_set.append(avg_cost_np[0])
                        break  # test only 1 segment for speeding up CI

                    # get test avg_cost
                    test_avg_cost = np.array(avg_cost_set).mean()
                    if test_avg_cost < 6.0:
                        # if avg_cost less than 6.0, we think our code is good.
                        if save_dirname is not None:
                            fluid.io.save_inference_model(save_dirname, [
                                "user_id", "gender_id", "age_id", "job_id",
                                "movie_id", "category_id", "movie_title"
                            ], [scale_infer], exe)
                        return

                if math.isnan(float(out[0])):
                    sys.exit("got NaN loss, training failed.")

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Ejemplo n.º 41
0
def train(learning_rate,
          batch_size,
          num_epochs,
          init_model=None,
          model_save_dir='model',
          parallel=True,
          use_nccl=True,
          lr_strategy=None,
          class_dim = 10000,
          layers = 50):
    image_shape = [3, 224, 224]

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if parallel:
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)

        with pd.do():
            image_ = pd.read_input(image)
            label_ = pd.read_input(label)
            out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers)
            cost = fluid.layers.cross_entropy(input=out, label=label_)
            avg_cost = fluid.layers.mean(x=cost)
            acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1)
            acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5)
            pd.write_output(avg_cost)
            pd.write_output(acc_top1)
            pd.write_output(acc_top5)

        avg_cost, acc_top1, acc_top5 = pd()
        avg_cost = fluid.layers.mean(x=avg_cost)
        acc_top1 = fluid.layers.mean(x=acc_top1)
        acc_top5 = fluid.layers.mean(x=acc_top5)
    else:
        out = SE_ResNeXt(input=image, class_dim=class_dim, layers = layers)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    inference_program = fluid.default_main_program().clone(for_test=True)

    if "piecewise_decay" in lr_strategy:
        bd = lr_strategy["piecewise_decay"]["bd"]
        lr = lr_strategy["piecewise_decay"]["lr"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    elif "cosine_decay" in lr_strategy:
        step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"]
        epochs = lr_strategy["cosine_decay"]["epochs"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=cosine_decay(learning_rate=learning_rate,
                step_each_epoch=step_each_epoch, epochs=epochs),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))  
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate,
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    opts = optimizer.minimize(avg_cost)
    fluid.memory_optimize(fluid.default_main_program())

    #inference_program = fluid.default_main_program().clone()
    #with fluid.program_guard(inference_program):
    #    inference_program = fluid.io.get_inference_program([avg_cost, acc_top1, acc_top5])

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    #fluid.io.save_inference_model('./debug/',
    #                  ["image"],
    #                  [out],
    #                  exe,
    #                  main_program=None,
    #                  model_filename='model',
    #                  params_filename='params')
    #exit()

    start_epoch = 0
    if init_model is not None:
        load_model = model_save_dir + "/" + str(init_model)
        fluid.io.load_persistables(exe, load_model)
        start_epoch = int(init_model) + 1

    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    for pass_id in range(start_epoch, num_epochs):
        train_info = [[], [], []]
        test_info = [[], [], []]
        for batch_id, data in enumerate(train_reader()):
            t1 = time.time()
            loss, acc1, acc5 = exe.run(fluid.default_main_program(),
                           feed=feeder.feed(data),
                           fetch_list=[avg_cost, acc_top1, acc_top5])
            t2 = time.time()
            period = t2 - t1
            train_info[0].append(loss[0])
            train_info[1].append(acc1[0])
            train_info[2].append(acc5[0])
            if batch_id % 10 == 0:
                print("Pass {0}, trainbatch {1}, loss {2}, acc1 {3},"
                      "acc5 {4} time {5}".format(pass_id,  \
                      batch_id, loss[0], acc1[0], acc5[0], \
                      "%2.2f sec" % period))
                sys.stdout.flush()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        for batch_id, data in enumerate(test_reader()):
            t1 = time.time()
            loss, acc1, acc5 = exe.run(inference_program,
                                feed=feeder.feed(data),
                                fetch_list=[avg_cost, acc_top1, acc_top5])
            t2 = time.time()
            period = t2 - t1
            test_info[0].append(loss[0])
            test_info[1].append(acc1[0])
            test_info[2].append(acc5[0])
            if batch_id % 10 == 0:
                print("Pass {0}, testbatch {1}, loss {2}, acc1 {3},"
                      "acc5 {4} time {5}".format(pass_id,  \
                      batch_id, loss[0], acc1[0], acc5[0], \
                      "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3},"
              "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(pass_id,  \
              train_loss, train_acc1, train_acc5, test_loss, \
              test_acc1, test_acc5))
        sys.stdout.flush()


        model_path = os.path.join(model_save_dir, str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path)
    def testSetNumpy(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel("ptb_model",
                                 hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                new_lr = 1.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr))
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            # check optimizer
            opti_dict = adam.state_dict()
            np_opti_dict = {}
            # set to zero
            for k, v in opti_dict.items():
                np_t = v.numpy()
                np_opti_dict[v.name] = np_t
                var = v._ivar.value().get_tensor()
                var.set(np.zeros_like(np_t), place)

                self.assertTrue(np.sum(np.abs(v.numpy())) == 0)

            if isinstance(adam._learning_rate, LearningRateDecay):
                adam._learning_rate.step_num = 0

            adam.set_dict(np_opti_dict)

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                self.assertTrue(
                    np.array_equal(v.numpy(), self.base_opti[v.name]))

            # check parameter
            state_dict = ptb_model.state_dict()
            np_state_dict = {}
            for k, v in state_dict.items():
                np_t = v.numpy()
                np_state_dict[v.name] = np_t
                var = v._ivar.value().get_tensor()

                var.set(np.zeros_like(np_t), place)

            ptb_model.set_dict(np_state_dict)

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[v.name]

                self.assertTrue(np.array_equal(new_t, base_t))
Ejemplo n.º 43
0
def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
    PASS_NUM = 100
    EMBED_SIZE = 32
    HIDDEN_SIZE = 256
    N = 5
    BATCH_SIZE = 32
    IS_SPARSE = is_sparse

    def __network__(words):
        embed_first = fluid.layers.embedding(
            input=words[0],
            size=[dict_size, EMBED_SIZE],
            dtype='float32',
            is_sparse=IS_SPARSE,
            param_attr='shared_w')
        embed_second = fluid.layers.embedding(
            input=words[1],
            size=[dict_size, EMBED_SIZE],
            dtype='float32',
            is_sparse=IS_SPARSE,
            param_attr='shared_w')
        embed_third = fluid.layers.embedding(
            input=words[2],
            size=[dict_size, EMBED_SIZE],
            dtype='float32',
            is_sparse=IS_SPARSE,
            param_attr='shared_w')
        embed_forth = fluid.layers.embedding(
            input=words[3],
            size=[dict_size, EMBED_SIZE],
            dtype='float32',
            is_sparse=IS_SPARSE,
            param_attr='shared_w')

        concat_embed = fluid.layers.concat(
            input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
        hidden1 = fluid.layers.fc(input=concat_embed,
                                  size=HIDDEN_SIZE,
                                  act='sigmoid')
        predict_word = fluid.layers.fc(input=hidden1,
                                       size=dict_size,
                                       act='softmax')
        cost = fluid.layers.cross_entropy(input=predict_word, label=words[4])
        avg_cost = fluid.layers.mean(cost)
        return avg_cost, predict_word

    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)

    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
    forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')

    if not is_parallel:
        avg_cost, predict_word = __network__(
            [first_word, second_word, third_word, forth_word, next_word])
    else:
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            avg_cost, predict_word = __network__(
                map(pd.read_input, [
                    first_word, second_word, third_word, forth_word, next_word
                ]))
            pd.write_output(avg_cost)

        avg_cost = fluid.layers.mean(pd())

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)

    train_reader = paddle.batch(
        paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(
        feed_list=[first_word, second_word, third_word, forth_word, next_word],
        place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in range(PASS_NUM):
            for data in train_reader():
                avg_cost_np = exe.run(main_program,
                                      feed=feeder.feed(data),
                                      fetch_list=[avg_cost])
                if avg_cost_np[0] < 5.0:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, [
                            'firstw', 'secondw', 'thirdw', 'forthw'
                        ], [predict_word], exe)
                    return
                if math.isnan(float(avg_cost_np[0])):
                    sys.exit("got NaN loss, training failed.")

        raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Ejemplo n.º 44
0
batch_labels1 = dic['batch_labels1']
batch_reg_target1 = dic['batch_reg_target1']
batch_centerness1 = dic['batch_centerness1']
batch_labels2 = dic['batch_labels2']
batch_reg_target2 = dic['batch_reg_target2']
batch_centerness2 = dic['batch_centerness2']
batch_labels3 = dic['batch_labels3']
batch_reg_target3 = dic['batch_reg_target3']
batch_centerness3 = dic['batch_centerness3']
batch_labels4 = dic['batch_labels4']
batch_reg_target4 = dic['batch_reg_target4']
batch_centerness4 = dic['batch_centerness4']

print()

results = exe.run(fluid.default_main_program(),
                  feed={
                      'cls_logits0': cls_logits0_ndarray,
                      'cls_logits1': cls_logits1_ndarray,
                      'cls_logits2': cls_logits2_ndarray,
                      'cls_logits3': cls_logits3_ndarray,
                      'cls_logits4': cls_logits4_ndarray,
                      'bboxes_reg0': bboxes_reg0_ndarray,
                      'bboxes_reg1': bboxes_reg1_ndarray,
                      'bboxes_reg2': bboxes_reg2_ndarray,
                      'bboxes_reg3': bboxes_reg3_ndarray,
                      'bboxes_reg4': bboxes_reg4_ndarray,
                      'centerness0': centerness0_ndarray,
                      'centerness1': centerness1_ndarray,
                      'centerness2': centerness2_ndarray,
                      'centerness3': centerness3_ndarray,
Ejemplo n.º 45
0
def main():
    env = os.environ
    cfg = load_config(FLAGS.config)
    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)
    if 'log_iter' not in cfg:
        cfg.log_iter = 20

    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # build program
    model = create(main_arch)
    inputs_def = cfg['TrainReader']['inputs_def']
    train_feed_vars, train_loader = model.build_inputs(**inputs_def)
    train_fetches = model.train(train_feed_vars)
    loss = train_fetches['loss']

    start_iter = 0
    train_reader = create_reader(cfg.TrainReader,
                                 (cfg.max_iters - start_iter) * devices_num,
                                 cfg)
    train_loader.set_sample_list_generator(train_reader, place)

    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            model = create(main_arch)
            inputs_def = cfg['EvalReader']['inputs_def']
            test_feed_vars, eval_loader = model.build_inputs(**inputs_def)
            fetches = model.eval(test_feed_vars)
    eval_prog = eval_prog.clone(True)

    eval_reader = create_reader(cfg.EvalReader)
    eval_loader.set_sample_list_generator(eval_reader, place)

    teacher_cfg = load_config(FLAGS.teacher_config)
    merge_config(FLAGS.opt)
    teacher_arch = teacher_cfg.architecture
    teacher_program = fluid.Program()
    teacher_startup_program = fluid.Program()

    with fluid.program_guard(teacher_program, teacher_startup_program):
        with fluid.unique_name.guard():
            teacher_feed_vars = OrderedDict()
            for name, var in train_feed_vars.items():
                teacher_feed_vars[name] = teacher_program.global_block(
                )._clone_variable(var, force_persistable=False)
            model = create(teacher_arch)
            train_fetches = model.train(teacher_feed_vars)
            teacher_loss = train_fetches['loss']

    exe.run(teacher_startup_program)
    assert FLAGS.teacher_pretrained, "teacher_pretrained should be set"
    checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained)
    teacher_program = teacher_program.clone(for_test=True)

    data_name_map = {
        'target0': 'target0',
        'target1': 'target1',
        'target2': 'target2',
        'image': 'image',
        'gt_bbox': 'gt_bbox',
        'gt_class': 'gt_class',
        'gt_score': 'gt_score'
    }
    merge(teacher_program, fluid.default_main_program(), data_name_map, place)

    yolo_output_names = [
        'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0',
        'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0',
        'strided_slice_4.tmp_0', 'transpose_0.tmp_0', 'strided_slice_5.tmp_0',
        'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0',
        'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0',
        'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0',
        'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0',
        'strided_slice_14.tmp_0', 'transpose_4.tmp_0'
    ]

    assert cfg.use_fine_grained_loss, \
        "Only support use_fine_grained_loss=True, Please set it in config file or '-o use_fine_grained_loss=true'"
    distill_loss = split_distill(yolo_output_names, 1000)
    loss = distill_loss + loss
    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')
    lr = lr_builder()
    opt = optim_builder(lr)
    opt.minimize(loss)

    exe.run(fluid.default_startup_program())
    checkpoint.load_params(exe, fluid.default_main_program(),
                           cfg.pretrain_weights)


    assert FLAGS.pruned_params is not None, \
        "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option."
    pruned_params = FLAGS.pruned_params.strip().split(",")
    logger.info("pruned params: {}".format(pruned_params))
    pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")]
    logger.info("pruned ratios: {}".format(pruned_ratios))
    assert len(pruned_params) == len(pruned_ratios), \
        "The length of pruned params and pruned ratios should be equal."
    assert pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios), \
        "The elements of pruned ratios should be in range (0, 1)."

    pruner = Pruner()
    distill_prog = pruner.prune(fluid.default_main_program(),
                                fluid.global_scope(),
                                params=pruned_params,
                                ratios=pruned_ratios,
                                place=place,
                                only_graph=False)[0]

    base_flops = flops(eval_prog)
    eval_prog = pruner.prune(eval_prog,
                             fluid.global_scope(),
                             params=pruned_params,
                             ratios=pruned_ratios,
                             place=place,
                             only_graph=True)[0]
    pruned_flops = flops(eval_prog)
    logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format(
        float(base_flops - pruned_flops) / base_flops, base_flops,
        pruned_flops))

    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1

    parallel_main = fluid.CompiledProgram(distill_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)
    compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog)

    # parse eval fetches
    extra_keys = []
    if cfg.metric == 'COCO':
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg.metric == 'VOC':
        extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
    eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                     extra_keys)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()
    map_type = cfg.map_type if 'map_type' in cfg else '11point'
    best_box_ap_list = [0.0, 0]  #[map, iter]
    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)

    train_loader.start()
    for step_id in range(start_iter, cfg.max_iters):
        teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run(
            parallel_main,
            fetch_list=[
                'teacher_' + teacher_loss.name, distill_loss.name, loss.name,
                lr.name
            ])
        if step_id % cfg.log_iter == 0:
            logger.info(
                "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}"
                .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0],
                        teacher_loss_np[0]))
        if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1:
            save_name = str(
                step_id) if step_id != cfg.max_iters - 1 else "model_final"
            checkpoint.save(exe, distill_prog,
                            os.path.join(save_dir, save_name))
            # eval
            results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys,
                               eval_values, eval_cls)
            resolution = None
            box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes,
                                        resolution, is_bbox_normalized,
                                        FLAGS.output_eval, map_type,
                                        cfg['EvalReader']['dataset'])

            if box_ap_stats[0] > best_box_ap_list[0]:
                best_box_ap_list[0] = box_ap_stats[0]
                best_box_ap_list[1] = step_id
                checkpoint.save(exe, distill_prog,
                                os.path.join("./", "best_model"))
            logger.info("Best test box ap: {}, in step: {}".format(
                best_box_ap_list[0], best_box_ap_list[1]))
    train_loader.reset()
Ejemplo n.º 46
0
# In[15]:

# 获取分类器,用cnn进行分类
predict = resnet(images, [2, 2, 2, 2])

# In[16]:

# 获取损失函数和准确率
cost = fluid.layers.cross_entropy(input=predict, label=label)  # 交叉熵
avg_cost = fluid.layers.mean(cost)  # 计算cost中所有元素的平均值
acc = fluid.layers.accuracy(input=predict, label=label)  #使用输入和标签计算准确率

# In[17]:

# 获取测试程序
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法Adam
boundaries = [150, 250]
values = [0.1, 0.01, 0.001]
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
# optimizer = fluid.optimizer.SGD(learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries,values=values),
#             regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4))
optimizer.minimize(avg_cost)
print("完成")

# In[18]:

# 定义使用CPU还是GPU,使用CPU时use_cuda = False,使用GPU时use_cuda = True
use_cuda = True
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    def test_compression(self):
        """
        Model: mobilenet_v1
        data: mnist
        step1: Training one epoch
        step2: pruning flops
        step3: fine-tune one epoch
        step4: check top1_acc.
        """
        if not fluid.core.is_compiled_with_cuda():
            return
        class_dim = 10
        image_shape = [1, 28, 28]
        image = fluid.layers.data(name='image',
                                  shape=image_shape,
                                  dtype='float32')
        image.stop_gradient = False
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        out = MobileNet().net(input=image, class_dim=class_dim)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        val_program = fluid.default_main_program().clone(for_test=False)

        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)

        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))

        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)

        val_feed_list = [('img', image.name), ('label', label.name)]
        val_fetch_list = [('acc_top1', acc_top1.name),
                          ('acc_top5', acc_top5.name)]

        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=128)
        train_feed_list = [('img', image.name), ('label', label.name)]
        train_fetch_list = [('loss', avg_cost.name)]

        com_pass = Compressor(place,
                              fluid.global_scope(),
                              fluid.default_main_program(),
                              train_reader=train_reader,
                              train_feed_list=train_feed_list,
                              train_fetch_list=train_fetch_list,
                              eval_program=val_program,
                              eval_reader=val_reader,
                              eval_feed_list=val_feed_list,
                              eval_fetch_list=val_fetch_list,
                              train_optimizer=optimizer)
        com_pass.config('./filter_pruning/compress.yaml')
        eval_graph = com_pass.run()
        self.assertTrue(
            abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) /
                0.969) < 0.02)
Ejemplo n.º 48
0
    def test_run(self):
        x = layers.data(
            name='x',
            shape=[-1, self.batch_size, self.hidden_size],
            dtype='float32')
        sequence_length = layers.data(
            name="sequence_length", shape=[-1], dtype='float32')

        rnn_out, last_hidden, last_cell = basic_lstm( x, None, None, self.hidden_size, num_layers=self.num_layers, \
                batch_first = self.batch_first, bidirectional=self.is_bidirect, sequence_length=sequence_length, forget_bias = self.forget_bias )

        last_hidden.persisbale = True
        rnn_out.persisbale = True

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)
        exe.run(framework.default_startup_program())

        param_list = fluid.default_main_program().block(0).all_parameters()

        # process weight and bias
        gate_weight = []
        gate_bias = []

        for i in range(self.num_layers):
            gate_w_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.w_0"
            gate_b_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.b_0"

            gate_w = np.array(fluid.global_scope().find_var(gate_w_name)
                              .get_tensor())
            gate_w = np.random.uniform(
                -0.1, 0.1, size=gate_w.shape).astype('float32')
            fluid.global_scope().find_var(gate_w_name).get_tensor().set(gate_w,
                                                                        place)

            gate_b = np.array(fluid.global_scope().find_var(gate_b_name)
                              .get_tensor())
            gate_b = np.random.uniform(
                -0.1, 0.1, size=gate_b.shape).astype('float32')
            fluid.global_scope().find_var(gate_b_name).get_tensor().set(gate_b,
                                                                        place)

            gate_weight.append(gate_w)
            gate_bias.append(gate_b)

        if self.is_bidirect:
            for i in range(self.num_layers):
                gate_w_name = "basic_lstm_reverse_layers_" + str(
                    i) + "/BasicLSTMUnit_0.w_0"
                gate_b_name = "basic_lstm_reverse_layers_" + str(
                    i) + "/BasicLSTMUnit_0.b_0"

                gate_w = np.array(fluid.global_scope().find_var(gate_w_name)
                                  .get_tensor())
                gate_w = np.random.uniform(
                    -0.1, 0.1, size=gate_w.shape).astype('float32')
                fluid.global_scope().find_var(gate_w_name).get_tensor().set(
                    gate_w, place)

                gate_b = np.array(fluid.global_scope().find_var(gate_b_name)
                                  .get_tensor())
                gate_b = np.random.uniform(
                    -0.1, 0.1, size=gate_b.shape).astype('float32')
                fluid.global_scope().find_var(gate_b_name).get_tensor().set(
                    gate_b, place)

                gate_weight.append(gate_w)
                gate_bias.append(gate_b)

        step_input_np = np.random.uniform(-0.1, 0.1, (
            self.seq_len, self.batch_size, self.hidden_size)).astype('float32')
        sequence_length_np = np.random.randint(
            self.seq_len // 2, self.seq_len,
            size=(self.batch_size)).astype('int64')

        out = exe.run(
            feed={'x': step_input_np,
                  'sequence_length': sequence_length_np},
            fetch_list=[rnn_out, last_hidden, last_cell])

        api_rnn_out = out[0]
        api_last_hidden = out[1]
        api_last_cell = out[2]

        np_out = lstm_np(
            step_input_np,
            None,
            None,
            self.hidden_size,
            gate_weight,
            gate_bias,
            num_layers=self.num_layers,
            batch_first=self.batch_first,
            is_bidirect=self.is_bidirect,
            sequence_length=sequence_length_np)

        self.assertTrue(np.allclose(api_rnn_out, np_out[0], rtol=1e-4, atol=0))
        self.assertTrue(
            np.allclose(
                api_last_hidden, np_out[1], rtol=1e-4, atol=0))
        self.assertTrue(
            np.allclose(
                api_last_cell, np_out[2], rtol=1e-4, atol=0))
Ejemplo n.º 49
0
 def test_get_places(self):
     places = fluid.layers.get_places()
     cpu = fluid.CPUPlace()
     exe = fluid.Executor(cpu)
     exe.run(fluid.default_main_program())
     self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
Ejemplo n.º 50
0
def main():
    args = parse_args()
    print(args)
    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        args.enable_ce = True
        if args.enable_ce:
            fluid.default_startup_program().random_seed = 102
            fluid.default_main_program().random_seed = 102
            np.random.seed(102)
            random.seed(102)

        # Training process

        if args.attention:
            model = AttentionModel(
                hidden_size,
                src_vocab_size,
                tar_vocab_size,
                batch_size,
                num_layers=num_layers,
                init_scale=init_scale,
                dropout=dropout)
        else:
            model = BaseModel(
                hidden_size,
                src_vocab_size,
                tar_vocab_size,
                batch_size,
                num_layers=num_layers,
                init_scale=init_scale,
                dropout=dropout)
        gloabl_norm_clip = GradClipByGlobalNorm(max_grad_norm)
        lr = args.learning_rate
        opt_type = args.optimizer
        if opt_type == "sgd":
            optimizer = fluid.optimizer.SGD(lr, parameter_list=model.parameters())
        elif opt_type == "adam":
            optimizer = fluid.optimizer.Adam(lr, parameter_list=model.parameters())
        else:
            print("only support [sgd|adam]")
            raise Exception("opt type not support")

        train_data_prefix = args.train_data_prefix
        eval_data_prefix = args.eval_data_prefix
        test_data_prefix = args.test_data_prefix
        vocab_prefix = args.vocab_prefix
        src_lang = args.src_lang
        tar_lang = args.tar_lang
        print("begin to load data")
        raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                                train_data_prefix, eval_data_prefix,
                                test_data_prefix, args.max_len)
        print("finished load data")
        train_data, valid_data, test_data, _ = raw_data

        def prepare_input(batch, epoch_id=0):
            src_ids, src_mask, tar_ids, tar_mask = batch
            res = {}
            src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
            in_tar = tar_ids[:, :-1]
            label_tar = tar_ids[:, 1:]

            in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
            label_tar = label_tar.reshape(
                (label_tar.shape[0], label_tar.shape[1], 1))
            inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask]
            return inputs, np.sum(tar_mask)

        # get train epoch size
        def eval(data, epoch_id=0):
            model.eval()
            eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
            total_loss = 0.0
            word_count = 0.0
            for batch_id, batch in enumerate(eval_data_iter):
                input_data_feed, word_num = prepare_input(
                    batch, epoch_id)
                loss = model(input_data_feed)

                total_loss += loss * batch_size
                word_count += word_num
            ppl = np.exp(total_loss.numpy() / word_count)
            model.train()
            return ppl

        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            model.train()
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(
                    train_data, batch_size, enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                input_data_feed, word_num = prepare_input(
                    batch, epoch_id=epoch_id)
                word_count += word_num
                loss = model(input_data_feed)
                # print(loss.numpy()[0])
                loss.backward()
                optimizer.minimize(loss, grad_clip = gloabl_norm_clip)
                model.clear_gradients()
                total_loss += loss * batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)

                if batch_id > 0 and batch_id % 100 == 0:
                    print("-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f" %
                        (epoch_id, batch_id, batch_time,
                        np.exp(total_loss.numpy() / word_count)))
                    total_loss = 0.0
                    word_count = 0.0

            end_time = time.time()
            epoch_time = end_time - start_time
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times)))

            
            dir_name = os.path.join(args.model_path,
                                    "epoch_" + str(epoch_id))
            print("begin to save", dir_name)
            paddle.fluid.save_dygraph(model.state_dict(), dir_name)
            print("save finished")
            dev_ppl = eval(valid_data)
            print("dev ppl", dev_ppl)
            test_ppl = eval(test_data)
            print("test ppl", test_ppl)
Ejemplo n.º 51
0
def train(use_cuda, save_dirname, is_local):
    x = fluid.layers.data(name='x', shape=[13], dtype='float32')

    y_predict = fluid.layers.fc(input=x, size=1, act=None)

    y = fluid.layers.data(name='y', shape=[1], dtype='float32')

    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    avg_cost = fluid.layers.mean(cost)

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_cost)

    BATCH_SIZE = 20

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.uci_housing.train(), buf_size=500),
        batch_size=BATCH_SIZE)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    def train_loop(main_program):
        feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
        exe.run(fluid.default_startup_program())

        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                avg_loss_value, = exe.run(main_program,
                                          feed=feeder.feed(data),
                                          fetch_list=[avg_cost])
                print(avg_loss_value)
                if avg_loss_value[0] < 10.0:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, ['x'],
                                                      [y_predict], exe)
                    return
                if math.isnan(float(avg_loss_value)):
                    sys.exit("got NaN loss, training failed.")
        raise AssertionError("Fit a line cost is too large, {0:2.2}".format(
            avg_loss_value[0]))

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_INIT_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_INIT_PSERVERS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        training_role = os.getenv("TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())
Ejemplo n.º 52
0
    def __call__( self, bboxes, scores ):
        
        def create_tmp_var(program, name, dtype, shape, lod_leval):
            return program.current_block().create_var(name=name, 
                                                      dtype=dtype, 
                                                      shape=shape, 
                                                      lod_leval=lod_leval)
        
        def _soft_nms_for_cls(dets, sigma, thres):
            """soft_nms_for_cls"""
            dets_final = []
            while len(dets) > 0:
                maxpos = np.argmax(dets[:, 0])
                dets_final.append(dets[maxpos].copy())
                ts, tx1, ty1, tx2, ty2 = dets[maxpos]
                scores = dets[:, 0]
                x1 = dets[:, 1]
                y1 = dets[:, 2]
                x2 = dets[:, 3]
                y2 = dets[:, 4]
                eta = 0 if self.normalized else 1
                areas = (x2 - x1 + eta) * (y2 - y1 + eta)
                xx1 = np.maximum(tx1, x1)
                yy1 = np.maximum(ty1, y1)
                xx2 = np.minimum(tx2, x2)
                yy2 = np.minimum(ty2, y2)
                w = np.maximum(0.0, xx2 - xx1 + eta)
                h = np.maximum(0.0, yy2 - yy1 + eta)
                inter = w * h
                ovr = inter / (areas + areas[maxpos] - inter) 
                weight = np.exp(-(ovr * ovr) / sigma)
                scores = scores * weight
                idx_keep = np.where(scores >= thres)
                dets[:, 0] = scores
                dets = dets[idx_keep]
            dets_final = np.array(dets_final).reshape(-1, 5)
            return dets_final 

        def _soft_nms(bboxes, scores):
            bboxes = np.array(bboxes)
            scores = np.array(scores)
            class_nums = scores.shape[-1]
            
            softnms_thres = self.score_threshold
            softnms_sigma = self.softnms_sigma
            keep_top_k = self.keep_top_k
            
            cls_boxes = [[] for _ in range(class_nums)]
            cls_ids = [[] for _ in range(class_nums)]
                        
            start_idx = 1 if self.background_label == 0 else 0
            for j in range(start_idx, class_nums):
                inds = np.where(scores[:, j] >= softnms_thres)[0]
                scores_j = scores[inds, j]
                rois_j = bboxes[inds, j, :]
                dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(np.float32, copy=False)
                cls_rank = np.argsort(-dets_j[:, 0])
                dets_j = dets_j[cls_rank]

                cls_boxes[j] = _soft_nms_for_cls( dets_j, sigma=softnms_sigma, thres=softnms_thres )
                cls_ids[j] = np.array( [j]*cls_boxes[j].shape[0] ).reshape(-1,1)
            
            cls_boxes = np.vstack(cls_boxes[start_idx:])
            cls_ids = np.vstack(cls_ids[start_idx:])
            pred_result = np.hstack( [cls_ids, cls_boxes] )

            # Limit to max_per_image detections **over all classes**
            image_scores = cls_boxes[:,0]
            if len(image_scores) > keep_top_k:
                image_thresh = np.sort(image_scores)[-keep_top_k]
                keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
                pred_result = pred_result[keep, :]
            
            res = fluid.LoDTensor()
            res.set_lod([[0, pred_result.shape[0]]])
            if pred_result.shape[0] == 0:
                pred_result = np.array( [[1]], dtype=np.float32 )
            res.set(pred_result, fluid.CPUPlace())
            
            return res
        
        pred_result = create_tmp_var(fluid.default_main_program(), 
                                     name='softnms_pred_result', 
                                     dtype='float32', 
                                     shape=[6],
                                     lod_leval=1)
        fluid.layers.py_func(func=_soft_nms,
                        x=[bboxes, scores], out=pred_result)
        return pred_result
Ejemplo n.º 53
0
def FullyConnected(input, size, act=None, name=None, use_bias=True):
    """
    A wrapper around `tf.layers.Dense`.
    One difference to maintain backward-compatibility:
    Default weight initializer is variance_scaling_initializer(2.0).
    Variable Names:
    * ``W``: weights of shape [in_dim, out_dim]
    * ``b``: bias
    """

    param_attr = ParamAttr(name='{}_W'.format(name))
    bias_attr = ParamAttr(name='{}_b'.format(name))

    ret = fluid.layers.fc(input=input,
                          size=size,
                          act=act,
                          param_attr=param_attr,
                          bias_attr=bias_attr)
    #var_W = fluid.global_scope().find_var(param_attr.name).get_tensor()
    ret.variables = VariableHolder(W=param_attr.name)
    return ret


if __name__ == '__main__':
    import paddle.fluid as fluid
    x = fluid.layers.data(name='state', shape=[3], dtype='float32')
    fc1 = FullyConnected('policy/fc1', x, size=256, act='relu')
    logger.info("fc1:{}".format(fc1))

    vars = fluid.default_main_program().list_vars()
Ejemplo n.º 54
0
places = fluid.layers.get_places(device_count=0, device_type=device_type)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
with pd.do():
    x_ = pd.read_input(x)
    y_ = pd.read_input(y)
    y_predict = fluid.layers.fc(input=x_, size=1, act=None)
    cost = fluid.layers.square_error_cost(input=y_predict, label=y_)
    avg_cost = fluid.layers.mean(x=cost)
    pd.write_output(avg_cost)

cost = pd()
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
sgd_optimizer.minimize(avg_cost)

fluid.memory_optimize(fluid.default_main_program(), print_log=True)
# fluid.release_memory(fluid.default_main_program())

BATCH_SIZE = 200

# fix the order of training data
train_reader = paddle.batch(
    paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE)

# train_reader = paddle.batch(
#     paddle.reader.shuffle(
#         paddle.dataset.uci_housing.train(), buf_size=500),
#     batch_size=BATCH_SIZE)

feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)