Example #1
0
    def test_device_guard_with_id(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            data1 = fluid.layers.fill_constant(shape=[1, 3, 8, 8],
                                               value=0.5,
                                               dtype='float32')
            data2 = fluid.layers.fill_constant(shape=[1, 3, 5, 5],
                                               value=0.5,
                                               dtype='float32')
            shape = fluid.layers.shape(data2)
            with fluid.device_guard("cpu"):
                shape = fluid.layers.slice(shape,
                                           axes=[0],
                                           starts=[0],
                                           ends=[4])
                with fluid.device_guard("gpu:1"):
                    out = fluid.layers.crop_tensor(data1, shape=shape)
        # check if the device attr is set correctly
        all_ops = main_program.global_block().ops
        device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
        for op in all_ops:
            if op.type == 'slice':
                self.assertEqual(op.desc.attr(device_attr_name), "cpu")
            if op.type == 'crop_tensor':
                self.assertEqual(op.desc.attr(device_attr_name), "gpu:1")

        execute(main_program, startup_program)
Example #2
0
    def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
        # Input data
        device_id = 0
        if dist_strategy:
            fleet.init(is_collective=True)
        with fluid.device_guard("gpu:0"):
            images = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype=DTYPE)
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')

            if dist_strategy:
                data_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[images, label],
                    capacity=64,
                    use_double_buffer=False,
                    iterable=False)
            # Train program
            predict = cnn_model(images)
        with fluid.device_guard("gpu:0"):
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(x=cost)

        # Evaluator
        with fluid.device_guard("gpu:0"):
            batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(
                input=predict, label=label, total=batch_size_tensor)

        inference_program = fluid.default_main_program().clone()
        base_lr = self.lr
        passes = [30, 60, 80, 90]
        steps_per_pass = 10
        bd = [steps_per_pass * p for p in passes]
        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
        lr_val = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
        opt = fluid.optimizer.Momentum(learning_rate=lr_val, momentum=0.9)

        # Reader
        train_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)

        if dist_strategy:
            strategy = fleet.DistributedStrategy()
            strategy.pipeline = True
            strategy.pipeline_configs = {
                'schedule_mode': 'F-then-B',
                'micro_batch_size': batch_size
            }
            dist_opt = fleet.distributed_optimizer(
                optimizer=opt, strategy=strategy)
            dist_opt.minimize(avg_cost)
        else:
            opt.minimize(avg_cost)

        if dist_strategy:
            return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict, data_loader
        else:
            return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict
    def pp_net(self, main_prog, startup_prog, pp_degree=2):
        def fc_block(input_x):
            fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh')
            fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
            fc_3 = paddle.fluid.layers.fc(input=fc_2, size=64, act='tanh')
            return fc_3

        with fluid.program_guard(main_prog, startup_prog):
            with fluid.unique_name.guard():
                role = role_maker.PaddleCloudRoleMaker(is_collective=True)
                fleet.init(role)
                with fluid.device_guard("gpu:0"):
                    input_x = paddle.fluid.layers.data(name="x",
                                                       shape=[32],
                                                       dtype='float32')
                    input_y = paddle.fluid.layers.data(name="y",
                                                       shape=[1],
                                                       dtype='int64')

                for stage_idx in range(pp_degree):
                    with fluid.device_guard("gpu:" + str(stage_idx)):
                        input_x = fc_block(input_x)

                with fluid.device_guard("gpu:" + str(pp_degree - 1)):
                    prediction = paddle.fluid.layers.fc(input=[input_x],
                                                        size=2,
                                                        act='softmax')
                    cost = paddle.fluid.layers.cross_entropy(input=prediction,
                                                             label=input_y)
                    avg_cost = paddle.fluid.layers.mean(x=cost)

        strategy = paddle.distributed.fleet.DistributedStrategy()
        return avg_cost, strategy
Example #4
0
    def build_net(self, inputs):
        def embedding_layer(input):
            return fluid.layers.embedding(
                input=input,
                is_sparse=True,
                size=[100001, 10],
                param_attr=fluid.ParamAttr(
                    name="SparseFeatFactors",
                    initializer=fluid.initializer.Uniform()),
            )

        sparse_embed_seq = list(map(embedding_layer, inputs[1:-1]))

        concated = fluid.layers.concat(sparse_embed_seq + inputs[0:1], axis=1)

        with fluid.device_guard("gpu"):
            fc1 = fluid.layers.fc(
                input=concated,
                size=400,
                act="relu",
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Normal(
                        scale=1 / math.sqrt(concated.shape[1]))),
                name="fc1")

        with fluid.device_guard("cpu"):
            fc2 = fluid.layers.fc(input=fc1,
                                  size=400,
                                  act="relu",
                                  param_attr=fluid.ParamAttr(
                                      initializer=fluid.initializer.Normal(
                                          scale=1 / math.sqrt(fc1.shape[1]))),
                                  name="fc2")

        with fluid.device_guard("gpu"):
            fc3 = fluid.layers.fc(input=fc2,
                                  size=400,
                                  act="relu",
                                  param_attr=fluid.ParamAttr(
                                      initializer=fluid.initializer.Normal(
                                          scale=1 / math.sqrt(fc2.shape[1]))),
                                  name="fc3")

        with fluid.device_guard("cpu"):
            predict = fluid.layers.fc(
                input=fc3,
                size=2,
                act="softmax",
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Normal(
                        scale=1 / math.sqrt(fc3.shape[1]))),
            )

        with fluid.device_guard("gpu"):
            labels = fluid.layers.cast(inputs[-1], dtype="int64")
            cost = fluid.layers.cross_entropy(input=predict, label=labels)
            avg_cost = fluid.layers.reduce_sum(cost)

        return avg_cost
Example #5
0
    def test_cpu_only_op(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            x = fluid.layers.fill_constant(shape=[2, 255, 13, 13],
                                           value=0.3,
                                           dtype='float32')
            gt_box = fluid.layers.fill_constant(shape=[2, 6, 4],
                                                value=0.5,
                                                dtype='float32')
            gt_label = fluid.layers.fill_constant(shape=[2, 6],
                                                  value=1.0,
                                                  dtype='int32')
            gt_score = fluid.layers.fill_constant(shape=[2, 6],
                                                  value=0.5,
                                                  dtype='float32')
            anchors = [
                10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156,
                198, 373, 326
            ]
            anchor_mask = [0, 1, 2]
            with fluid.device_guard("gpu"):
                # yolov3_loss only has cpu kernel, so its cpu kernel will be executed
                loss = fluid.layers.yolov3_loss(x=x,
                                                gt_box=gt_box,
                                                gt_label=gt_label,
                                                gt_score=gt_score,
                                                anchors=anchors,
                                                anchor_mask=anchor_mask,
                                                class_num=80,
                                                ignore_thresh=0.7,
                                                downsample_ratio=32)

        execute(main_program, startup_program)
Example #6
0
    def test_without_kernel_op(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
            loop_len = fluid.layers.fill_constant(shape=[1],
                                                  dtype='int64',
                                                  value=10)
            cond = fluid.layers.less_than(x=i, y=loop_len)

            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always")
                with fluid.device_guard("cpu"):
                    while_op = fluid.layers.While(cond=cond)
                    with while_op.block():
                        i = fluid.layers.increment(x=i, value=1, in_place=True)
                        fluid.layers.less_than(x=i, y=loop_len, cond=cond)

        warning = "The Op(while) is not support to set device."
        warning_num = get_vaild_warning_num(warning, w)
        assert warning_num == 1

        all_ops = main_program.global_block().ops
        device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
        for op in all_ops:
            if op.type == 'while':
                self.assertEqual(op.desc.attr(device_attr_name), "")

        execute(main_program, startup_program)
Example #7
0
    def test_op_descs_device_attr(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            data1 = fluid.layers.data(name="data_1",
                                      shape=[2],
                                      dtype="float32")
            data2 = fluid.layers.data(name="data_2",
                                      shape=[2],
                                      dtype="float32")
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
            fc1 = fluid.layers.fc(input=data1, size=10)
            fc2 = fluid.layers.fc(input=fc1, size=10)
            with fluid.device_guard("gpu"):
                out = fluid.layers.softmax_with_cross_entropy(logits=fc1 + fc2,
                                                              label=label)
                loss = fluid.layers.mean(out)
                opt = fluid.optimizer.SGDOptimizer(0.1)
                opt.minimize(loss)

        all_ops = main_program.global_block().ops
        device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
        for op in all_ops:
            self.assertEqual(True, op.desc.has_attr(device_attr_name))
            # fill_constant(backward op) is append to mean op, which should have
            # the same op_device value as mean op
            if op.desc == 'fill_constant':
                self.assertEqual(op.desc.attr(device_attr_name), "gpu")
Example #8
0
    def net(self, input):
        "Dynamic network -> Static network"
        dnn_model = DNNLayer(self.sparse_feature_dim,
                             self.embedding_size, self.dense_feature_dim,
                             len(input[1:-1]), self.fc_sizes)

        raw_predict_2d = dnn_model(input[1:-1], input[0])

        with fluid.device_guard("gpu"):
            predict_2d = paddle.nn.functional.softmax(raw_predict_2d)

            self.predict = predict_2d

            auc, batch_auc, _ = paddle.fluid.layers.auc(input=self.predict,
                                                        label=input[-1],
                                                        num_thresholds=2**12,
                                                        slide_steps=20)

            cost = paddle.nn.functional.cross_entropy(input=raw_predict_2d,
                                                      label=input[-1])
            avg_cost = paddle.mean(x=cost)
            self.cost = avg_cost
            self.infer_target_var = auc

            sync_mode = self.config.get("static_benchmark.sync_mode")
            if sync_mode == "heter":
                fluid.layers.Print(auc, message="AUC")

        return {'cost': avg_cost, 'auc': auc}
Example #9
0
    def forward(self, sparse_inputs, dense_inputs):

        sparse_embs = []
        for s_input in sparse_inputs:
            emb = self.embedding(s_input)
            emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim])
            sparse_embs.append(emb)

        y_dnn = paddle.concat(x=sparse_embs + [dense_inputs], axis=1)

        with fluid.device_guard("gpu"):
            for n_layer in self._mlp_layers:
                y_dnn = n_layer(y_dnn)

        return y_dnn
Example #10
0
    def test_warning(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always")
                with fluid.device_guard("gpu"):
                    x = fluid.layers.fill_constant(
                        shape=[1], value=3.0, dtype='float32', force_cpu=True)
                    y = fluid.layers.fill_constant(
                        shape=[1], value=4.0, dtype='float32')
                    result = fluid.layers.less_than(x=x, y=y, force_cpu=False)

        assert len(w) == 2
        all_ops = main_program.global_block().ops
        device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
        for op in all_ops:
            self.assertEqual(op.desc.attr(device_attr_name), "gpu")
Example #11
0
def cnn_model(data):
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=data,
        filter_size=5,
        num_filters=20,
        pool_size=2,
        pool_stride=2,
        act="relu",
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
            value=0.01)))
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        pool_size=2,
        pool_stride=2,
        act="relu",
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
            value=0.01)))

    SIZE = 10
    input_shape = conv_pool_2.shape
    param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
    scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5

    with fluid.device_guard("gpu:1"):
        predict = fluid.layers.fc(
            input=conv_pool_2,
            size=SIZE,
            act="softmax",
            param_attr=fluid.param_attr.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.01)))
        # To cover @RENAMED@GRADIENT
        predict2 = fluid.layers.fc(
            input=conv_pool_1,
            size=SIZE,
            act="softmax",
            param_attr=fluid.param_attr.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.01)))
        predict += predict2
    return predict
Example #12
0
    def tdm_net(self, input):
        """
        tdm训练网络的主要流程部分
        """
        is_distributed = True if envs.get_trainer() == "CtrTrainer" else False

        input_emb = input[0]
        item_label = input[1]

        # 根据输入的item的正样本在给定的树上进行负采样
        # sample_nodes 是采样的node_id的结果,包含正负样本
        # sample_label 是采样的node_id对应的正负标签
        # sample_mask 是为了保持tensor维度一致,padding部分的标签,若为0,则是padding的虚拟node_id

        if self.check_version():
            with fluid.device_guard("cpu"):
                sample_nodes, sample_label, sample_mask = fluid.contrib.layers.tdm_sampler(
                    x=item_label,
                    neg_samples_num_list=self.neg_sampling_list,
                    layer_node_num_list=self.layer_node_num_list,
                    leaf_node_num=self.leaf_node_nums,
                    tree_travel_attr=fluid.ParamAttr(name="TDM_Tree_Travel"),
                    tree_layer_attr=fluid.ParamAttr(name="TDM_Tree_Layer"),
                    output_positive=self.output_positive,
                    output_list=True,
                    seed=0,
                    tree_dtype='int64',
                    dtype='int64')
        else:
            sample_nodes, sample_label, sample_mask = fluid.contrib.layers.tdm_sampler(
                x=item_label,
                neg_samples_num_list=self.neg_sampling_list,
                layer_node_num_list=self.layer_node_num_list,
                leaf_node_num=self.leaf_node_nums,
                tree_travel_attr=fluid.ParamAttr(name="TDM_Tree_Travel"),
                tree_layer_attr=fluid.ParamAttr(name="TDM_Tree_Layer"),
                output_positive=self.output_positive,
                output_list=True,
                seed=0,
                tree_dtype='int64',
                dtype='int64')

        sample_nodes = [
            fluid.layers.reshape(sample_nodes[i], [-1, 1])
            for i in range(self.max_layers)
        ]

        # 查表得到每个节点的Embedding
        sample_nodes_emb = [
            fluid.layers.embedding(
                input=sample_nodes[i],
                is_sparse=True,
                size=[self.node_nums, self.node_emb_size],
                param_attr=fluid.ParamAttr(name="TDM_Tree_Emb"))
            for i in range(self.max_layers)
        ]

        # 此处进行Reshape是为了之后层次化的分类器训练
        sample_nodes_emb = [
            fluid.layers.reshape(sample_nodes_emb[i], [
                -1, self.neg_sampling_list[i] + self.output_positive,
                self.node_emb_size
            ]) for i in range(self.max_layers)
        ]

        # 对输入的input_emb进行转换,使其维度与node_emb维度一致
        input_trans_emb = self.input_trans_layer(input_emb)

        # 分类器的主体网络,分别训练不同层次的分类器
        layer_classifier_res = self.classifier_layer(input_trans_emb,
                                                     sample_nodes_emb)

        # 最后的概率判别FC,将所有层次的node分类结果放到一起以相同的标准进行判别
        # 考虑到树极大可能不平衡,有些item不在最后一层,所以需要这样的机制保证每个item都有机会被召回
        tdm_fc = fluid.layers.fc(
            input=layer_classifier_res,
            size=2,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(name="tdm.cls_fc.weight"),
            bias_attr=fluid.ParamAttr(name="tdm.cls_fc.bias"))

        # 将loss打平,放到一起计算整体网络的loss
        tdm_fc_re = fluid.layers.reshape(tdm_fc, [-1, 2])

        # 若想对各个层次的loss辅以不同的权重,则在此处无需concat
        # 支持各个层次分别计算loss,再乘相应的权重
        sample_label = fluid.layers.concat(sample_label, axis=1)
        labels_reshape = fluid.layers.reshape(sample_label, [-1, 1])
        labels_reshape.stop_gradient = True

        # 计算整体的loss并得到softmax的输出
        cost, softmax_prob = fluid.layers.softmax_with_cross_entropy(
            logits=tdm_fc_re, label=labels_reshape, return_softmax=True)

        # 通过mask过滤掉虚拟节点的loss
        sample_mask = fluid.layers.concat(sample_mask, axis=1)
        mask_reshape = fluid.layers.reshape(sample_mask, [-1, 1])
        mask_index = fluid.layers.where(mask_reshape != 0)
        mask_index.stop_gradient = True

        self.mask_cost = fluid.layers.gather_nd(cost, mask_index)

        softmax_prob = fluid.layers.unsqueeze(input=softmax_prob, axes=[1])
        self.mask_prob = fluid.layers.gather_nd(softmax_prob, mask_index)
        self.mask_label = fluid.layers.gather_nd(labels_reshape, mask_index)

        self._predict = self.mask_prob
Example #13
0
def build_network(input, layers=50, class_dim=1000):
    supported_layers = [18, 34, 50, 101, 152]
    assert layers in supported_layers
    depth = None
    if layers == 18:
        depth = [2, 2, 2, 2]
    elif layers == 34 or layers == 50:
        depth = [3, 4, 6, 3]
    elif layers == 101:
        depth = [3, 4, 23, 3]
    elif layers == 152:
        depth = [3, 8, 36, 3]
    num_filters = [64, 128, 256, 512]
    offset = 0
    with fluid.device_guard("gpu:%d" % (offset)):
        conv = conv_bn_layer(input=input,
                             num_filters=64,
                             filter_size=7,
                             stride=2,
                             act='relu')
        conv = fluid.layers.pool2d(input=conv,
                                   pool_size=3,
                                   pool_stride=2,
                                   pool_padding=1,
                                   pool_type='max')
    offset += 1
    if layers >= 50:
        for block in range(len(depth)):
            with fluid.device_guard("gpu:%d" % (offset)):
                for i in range(depth[block]):
                    conv = bottleneck_block(
                        input=conv,
                        num_filters=num_filters[block],
                        stride=2 if i == 0 and block != 0 else 1)
            offset += 1

        with fluid.device_guard("gpu:%d" % (offset)):
            pool = fluid.layers.pool2d(input=conv,
                                       pool_size=7,
                                       pool_type='avg',
                                       global_pooling=True)
            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
            out = fluid.layers.fc(
                input=pool,
                size=class_dim,
                param_attr=fluid.param_attr.ParamAttr(
                    initializer=fluid.initializer.Uniform(-stdv, stdv)))
    else:
        for block in range(len(depth)):
            with fluid.device_guard("gpu:%d" % (offset)):
                for i in range(depth[block]):
                    conv = basic_block(
                        input=conv,
                        num_filters=num_filters[block],
                        stride=2 if i == 0 and block != 0 else 1,
                        is_first=block == i == 0)
            offset += 1
        with fluid.device_guard("gpu:%d" % (offset)):
            pool = fluid.layers.pool2d(input=conv,
                                       pool_size=7,
                                       pool_type='avg',
                                       global_pooling=True)
            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
            out = fluid.layers.fc(
                input=pool,
                size=class_dim,
                param_attr=fluid.param_attr.ParamAttr(
                    initializer=fluid.initializer.Uniform(-stdv, stdv)))
    return out, offset
Example #14
0
def create_model(args, phase, micro_bsz, dp_sharding_rank, dp_worldsize, topo):
    if args.use_sop:
        from reader.pretraining_ds_ernie_full_sent import make_pretrain_dataset
    else:
        from reader.pretraining_ds_mlm import make_pretrain_dataset

    # mask_label, mask_pos for mlm, labels for sop
    if args.use_sop:
        input_fields = {
            'names':
            ['src_ids', 'sent_ids', 'mask_label', 'mask_pos', 'labels'],
            'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                       [-1, 1], [-1, 1], [-1, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0],
        }
    else:
        input_fields = {
            'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos'],
            'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                       [-1, 1], [-1, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0],
        }

    with fluid.device_guard("gpu:0"):
        inputs = [
            fluid.data(name=input_fields['names'][i],
                       shape=input_fields['shapes'][i],
                       dtype=input_fields['dtypes'][i],
                       lod_level=input_fields['lod_levels'][i])
            for i in range(len(input_fields['names']))
        ]
    if args.use_sop:
        (src_ids, sent_ids, mask_label, mask_pos, labels) = inputs
    else:
        (src_ids, sent_ids, mask_label, mask_pos) = inputs
    train_file_list = glob.glob(args.data_dir + "/*")
    vocab = {}
    with open(args.vocab_file) as r:
        for line in r:
            lines = line.strip().split('\t')
            vocab[lines[0]] = int(lines[1])

    log.debug("========= worker: {} of {} ==========".format(
        dp_sharding_rank, dp_worldsize))

    data_reader = make_pretrain_dataset('pt', train_file_list, True, vocab,
                                        micro_bsz, len(vocab),
                                        args.max_seq_len, dp_sharding_rank,
                                        dp_worldsize)
    with fluid.device_guard("gpu:0"):
        data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                         capacity=70,
                                                         iterable=False)
    places = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0)))

    def data_gen():
        yield from data_reader

    data_loader.set_batch_generator(data_gen, places)

    ernie_config = ErnieConfig(args.ernie_config_file)._config_dict
    ernie_config["preln"] = args.preln

    weight_sharing = (topo.mp.size == 1 and topo.pp.size == 1
                      )  # pp mp should not do weight sharing
    with fluid.device_guard("gpu:0"):
        ernie = ErnieModel(src_ids,
                           sent_ids,
                           ernie_config,
                           weight_sharing=weight_sharing,
                           topo=topo)
    checkpoints = ernie._checkpoints
    checkpoints.pop(-1)

    with fluid.device_guard(f'gpu:{args.num_pp-1}'):
        mask_lm_loss, mean_mask_lm_loss = ernie.get_lm_output(
            mask_label, mask_pos)
        total_loss = mean_mask_lm_loss

        if args.use_sop:
            sop_acc, mean_sop_loss = ernie.get_next_sentence_output(labels)
            total_loss += mean_sop_loss

        if topo.pp.size > 1:
            mask_lm_loss.persistable = True
            mean_mask_lm_loss.persistable = True
            # checkpoints.extend([mask_lm_loss.name, mean_mask_lm_loss.name])
            if args.use_sop:
                mean_sop_loss.persistable = True
                sop_acc.persistable = True
                # checkpoints.extend([mean_sop_loss.name, sop_acc.name])
            total_loss.persistable = True
            # checkpoints.append(total_loss.name)

    if args.use_sop:
        graph_vars = {
            'data_loader': data_loader,
            'mask_lm_loss': mask_lm_loss,
            'mean_mask_lm_loss': mean_mask_lm_loss,
            'sop_loss': mean_sop_loss,
            'sop_acc': sop_acc,
            'total_loss': total_loss,
            'checkpoints': checkpoints
        }
    else:
        graph_vars = {
            'data_loader': data_loader,
            'mask_lm_loss': mask_lm_loss,
            'mean_mask_lm_loss': mean_mask_lm_loss,
            'total_loss': total_loss,
            'checkpoints': checkpoints,
        }
    return graph_vars
Example #15
0
 def device_attr2():
     with fluid.device_guard("cpu:1"):
         out = fluid.layers.fill_constant(shape=[1],
                                          value=0.2,
                                          dtype='float32')
def net(batch_size=4, lr=0.01):
    """
        network definition

        Args:
            batch_size(int): the size of mini-batch for training
            lr(float): learning rate of training
        Returns:
            avg_cost: LoDTensor of cost.
        """
    dnn_input_dim, lr_input_dim = int(2), int(2)

    with fluid.device_guard("cpu"):
        dnn_data = fluid.layers.data(name="dnn_data",
                                     shape=[-1, 1],
                                     dtype="int64",
                                     lod_level=1,
                                     append_batch_size=False)
        lr_data = fluid.layers.data(name="lr_data",
                                    shape=[-1, 1],
                                    dtype="int64",
                                    lod_level=1,
                                    append_batch_size=False)
        label = fluid.layers.data(name="click",
                                  shape=[-1, 1],
                                  dtype="float32",
                                  lod_level=0,
                                  append_batch_size=False)

        datas = [dnn_data, lr_data, label]

        # build dnn model
        dnn_layer_dims = [2, 1]
        dnn_embedding = fluid.layers.embedding(
            is_distributed=False,
            input=dnn_data,
            size=[dnn_input_dim, dnn_layer_dims[0]],
            param_attr=fluid.ParamAttr(
                name="deep_embedding",
                initializer=fluid.initializer.Constant(value=0.01)),
            is_sparse=True)
        dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding,
                                              pool_type="sum")
        dnn_out = dnn_pool

        # build lr model
        lr_embbding = fluid.layers.embedding(
            is_distributed=False,
            input=lr_data,
            size=[lr_input_dim, 1],
            param_attr=fluid.ParamAttr(
                name="wide_embedding",
                initializer=fluid.initializer.Constant(value=0.01)),
            is_sparse=True)
        lr_pool = fluid.layers.sequence_pool(input=lr_embbding,
                                             pool_type="sum")

    with fluid.device_guard("gpu"):
        for i, dim in enumerate(dnn_layer_dims[1:]):
            fc = fluid.layers.fc(
                input=dnn_out,
                size=dim,
                act="relu",
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Constant(value=0.01)),
                name='dnn-fc-%d' % i)
            dnn_out = fc

        merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
        label = fluid.layers.cast(label, dtype="int64")
        predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')

        cost = fluid.layers.cross_entropy(input=predict, label=label)
        avg_cost = fluid.layers.mean(x=cost)
    return datas, avg_cost
Example #17
0
    def net(self, args, batch_size=4, lr=0.01):
        """
        network definition

        Args:
            batch_size(int): the size of mini-batch for training
            lr(float): learning rate of training
        Returns:
            avg_cost: LoDTensor of cost.
        """
        dnn_input_dim, lr_input_dim = int(1e5), int(1e5)

        dnn_data = fluid.layers.data(name="dnn_data",
                                     shape=[-1, 1],
                                     dtype="int64",
                                     lod_level=1,
                                     append_batch_size=False)
        lr_data = fluid.layers.data(name="lr_data",
                                    shape=[-1, 1],
                                    dtype="int64",
                                    lod_level=1,
                                    append_batch_size=False)
        label = fluid.layers.data(name="click",
                                  shape=[-1, 1],
                                  dtype="float32",
                                  lod_level=0,
                                  append_batch_size=False)

        datas = [dnn_data, lr_data, label]

        if args.reader == "pyreader":
            self.reader = fluid.io.PyReader(feed_list=datas,
                                            capacity=64,
                                            iterable=False,
                                            use_double_buffer=False)

        # build dnn model
        dnn_layer_dims = [128, 64, 32, 1]
        dnn_embedding = fluid.layers.embedding(
            is_distributed=False,
            input=dnn_data,
            size=[dnn_input_dim, dnn_layer_dims[0]],
            param_attr=fluid.ParamAttr(
                name="deep_embedding",
                initializer=fluid.initializer.Constant(value=0.01)),
            is_sparse=True)
        dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding,
                                              pool_type="sum")
        dnn_out = dnn_pool

        # build lr model
        lr_embbding = fluid.layers.embedding(
            is_distributed=False,
            input=lr_data,
            size=[lr_input_dim, 1],
            param_attr=fluid.ParamAttr(
                name="wide_embedding",
                initializer=fluid.initializer.Constant(value=0.01)),
            is_sparse=True)
        lr_pool = fluid.layers.sequence_pool(input=lr_embbding,
                                             pool_type="sum")

        with fluid.device_guard("gpu"):
            for i, dim in enumerate(dnn_layer_dims[1:]):
                fc = fluid.layers.fc(
                    input=dnn_out,
                    size=dim,
                    act="relu",
                    param_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.Constant(value=0.01)),
                    name='dnn-fc-%d' % i)
                dnn_out = fc

            merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
            label = fluid.layers.cast(label, dtype="int64")
            predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')

            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(x=cost)
            fluid.layers.Print(avg_cost, message="avg_cost")

        self.feeds = datas
        self.train_file_path = ["fake1", "fake2"]
        self.avg_cost = avg_cost
        self.predict = predict

        return avg_cost
Example #18
0
            offset += 1
        with fluid.device_guard("gpu:%d" % (offset)):
            pool = fluid.layers.pool2d(input=conv,
                                       pool_size=7,
                                       pool_type='avg',
                                       global_pooling=True)
            stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
            out = fluid.layers.fc(
                input=pool,
                size=class_dim,
                param_attr=fluid.param_attr.ParamAttr(
                    initializer=fluid.initializer.Uniform(-stdv, stdv)))
    return out, offset


with fluid.device_guard("gpu:0"):
    image_shape = [3, 224, 224]
    image = fluid.layers.data(name="feed_image",
                              shape=image_shape,
                              dtype="float32")
    label = fluid.layers.data(name="feed_label", shape=[1], dtype="int64")
    data_loader = fluid.io.DataLoader.from_generator(feed_list=[image, label],
                                                     capacity=64,
                                                     use_double_buffer=True,
                                                     iterable=False)

fc, offset = build_network(image)

with fluid.device_guard("gpu:%d" % (offset)):
    out, prob = fluid.layers.softmax_with_cross_entropy(logits=fc,
                                                        label=label,