def test_device_guard_with_id(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): data1 = fluid.layers.fill_constant(shape=[1, 3, 8, 8], value=0.5, dtype='float32') data2 = fluid.layers.fill_constant(shape=[1, 3, 5, 5], value=0.5, dtype='float32') shape = fluid.layers.shape(data2) with fluid.device_guard("cpu"): shape = fluid.layers.slice(shape, axes=[0], starts=[0], ends=[4]) with fluid.device_guard("gpu:1"): out = fluid.layers.crop_tensor(data1, shape=shape) # check if the device attr is set correctly all_ops = main_program.global_block().ops device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() for op in all_ops: if op.type == 'slice': self.assertEqual(op.desc.attr(device_attr_name), "cpu") if op.type == 'crop_tensor': self.assertEqual(op.desc.attr(device_attr_name), "gpu:1") execute(main_program, startup_program)
def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None): # Input data device_id = 0 if dist_strategy: fleet.init(is_collective=True) with fluid.device_guard("gpu:0"): images = fluid.layers.data( name='pixel', shape=[1, 28, 28], dtype=DTYPE) label = fluid.layers.data(name='label', shape=[1], dtype='int64') if dist_strategy: data_loader = fluid.io.DataLoader.from_generator( feed_list=[images, label], capacity=64, use_double_buffer=False, iterable=False) # Train program predict = cnn_model(images) with fluid.device_guard("gpu:0"): cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Evaluator with fluid.device_guard("gpu:0"): batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size_tensor) inference_program = fluid.default_main_program().clone() base_lr = self.lr passes = [30, 60, 80, 90] steps_per_pass = 10 bd = [steps_per_pass * p for p in passes] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr_val = fluid.layers.piecewise_decay(boundaries=bd, values=lr) opt = fluid.optimizer.Momentum(learning_rate=lr_val, momentum=0.9) # Reader train_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=batch_size) if dist_strategy: strategy = fleet.DistributedStrategy() strategy.pipeline = True strategy.pipeline_configs = { 'schedule_mode': 'F-then-B', 'micro_batch_size': batch_size } dist_opt = fleet.distributed_optimizer( optimizer=opt, strategy=strategy) dist_opt.minimize(avg_cost) else: opt.minimize(avg_cost) if dist_strategy: return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict, data_loader else: return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict
def pp_net(self, main_prog, startup_prog, pp_degree=2): def fc_block(input_x): fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') fc_3 = paddle.fluid.layers.fc(input=fc_2, size=64, act='tanh') return fc_3 with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) with fluid.device_guard("gpu:0"): input_x = paddle.fluid.layers.data(name="x", shape=[32], dtype='float32') input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') for stage_idx in range(pp_degree): with fluid.device_guard("gpu:" + str(stage_idx)): input_x = fc_block(input_x) with fluid.device_guard("gpu:" + str(pp_degree - 1)): prediction = paddle.fluid.layers.fc(input=[input_x], size=2, act='softmax') cost = paddle.fluid.layers.cross_entropy(input=prediction, label=input_y) avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() return avg_cost, strategy
def build_net(self, inputs): def embedding_layer(input): return fluid.layers.embedding( input=input, is_sparse=True, size=[100001, 10], param_attr=fluid.ParamAttr( name="SparseFeatFactors", initializer=fluid.initializer.Uniform()), ) sparse_embed_seq = list(map(embedding_layer, inputs[1:-1])) concated = fluid.layers.concat(sparse_embed_seq + inputs[0:1], axis=1) with fluid.device_guard("gpu"): fc1 = fluid.layers.fc( input=concated, size=400, act="relu", param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal( scale=1 / math.sqrt(concated.shape[1]))), name="fc1") with fluid.device_guard("cpu"): fc2 = fluid.layers.fc(input=fc1, size=400, act="relu", param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal( scale=1 / math.sqrt(fc1.shape[1]))), name="fc2") with fluid.device_guard("gpu"): fc3 = fluid.layers.fc(input=fc2, size=400, act="relu", param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal( scale=1 / math.sqrt(fc2.shape[1]))), name="fc3") with fluid.device_guard("cpu"): predict = fluid.layers.fc( input=fc3, size=2, act="softmax", param_attr=fluid.ParamAttr( initializer=fluid.initializer.Normal( scale=1 / math.sqrt(fc3.shape[1]))), ) with fluid.device_guard("gpu"): labels = fluid.layers.cast(inputs[-1], dtype="int64") cost = fluid.layers.cross_entropy(input=predict, label=labels) avg_cost = fluid.layers.reduce_sum(cost) return avg_cost
def test_cpu_only_op(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): x = fluid.layers.fill_constant(shape=[2, 255, 13, 13], value=0.3, dtype='float32') gt_box = fluid.layers.fill_constant(shape=[2, 6, 4], value=0.5, dtype='float32') gt_label = fluid.layers.fill_constant(shape=[2, 6], value=1.0, dtype='int32') gt_score = fluid.layers.fill_constant(shape=[2, 6], value=0.5, dtype='float32') anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326 ] anchor_mask = [0, 1, 2] with fluid.device_guard("gpu"): # yolov3_loss only has cpu kernel, so its cpu kernel will be executed loss = fluid.layers.yolov3_loss(x=x, gt_box=gt_box, gt_label=gt_label, gt_score=gt_score, anchors=anchors, anchor_mask=anchor_mask, class_num=80, ignore_thresh=0.7, downsample_ratio=32) execute(main_program, startup_program)
def test_without_kernel_op(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0) loop_len = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) cond = fluid.layers.less_than(x=i, y=loop_len) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") with fluid.device_guard("cpu"): while_op = fluid.layers.While(cond=cond) with while_op.block(): i = fluid.layers.increment(x=i, value=1, in_place=True) fluid.layers.less_than(x=i, y=loop_len, cond=cond) warning = "The Op(while) is not support to set device." warning_num = get_vaild_warning_num(warning, w) assert warning_num == 1 all_ops = main_program.global_block().ops device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() for op in all_ops: if op.type == 'while': self.assertEqual(op.desc.attr(device_attr_name), "") execute(main_program, startup_program)
def test_op_descs_device_attr(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): data1 = fluid.layers.data(name="data_1", shape=[2], dtype="float32") data2 = fluid.layers.data(name="data_2", shape=[2], dtype="float32") label = fluid.layers.data(name="label", shape=[1], dtype="int64") fc1 = fluid.layers.fc(input=data1, size=10) fc2 = fluid.layers.fc(input=fc1, size=10) with fluid.device_guard("gpu"): out = fluid.layers.softmax_with_cross_entropy(logits=fc1 + fc2, label=label) loss = fluid.layers.mean(out) opt = fluid.optimizer.SGDOptimizer(0.1) opt.minimize(loss) all_ops = main_program.global_block().ops device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() for op in all_ops: self.assertEqual(True, op.desc.has_attr(device_attr_name)) # fill_constant(backward op) is append to mean op, which should have # the same op_device value as mean op if op.desc == 'fill_constant': self.assertEqual(op.desc.attr(device_attr_name), "gpu")
def net(self, input): "Dynamic network -> Static network" dnn_model = DNNLayer(self.sparse_feature_dim, self.embedding_size, self.dense_feature_dim, len(input[1:-1]), self.fc_sizes) raw_predict_2d = dnn_model(input[1:-1], input[0]) with fluid.device_guard("gpu"): predict_2d = paddle.nn.functional.softmax(raw_predict_2d) self.predict = predict_2d auc, batch_auc, _ = paddle.fluid.layers.auc(input=self.predict, label=input[-1], num_thresholds=2**12, slide_steps=20) cost = paddle.nn.functional.cross_entropy(input=raw_predict_2d, label=input[-1]) avg_cost = paddle.mean(x=cost) self.cost = avg_cost self.infer_target_var = auc sync_mode = self.config.get("static_benchmark.sync_mode") if sync_mode == "heter": fluid.layers.Print(auc, message="AUC") return {'cost': avg_cost, 'auc': auc}
def forward(self, sparse_inputs, dense_inputs): sparse_embs = [] for s_input in sparse_inputs: emb = self.embedding(s_input) emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim]) sparse_embs.append(emb) y_dnn = paddle.concat(x=sparse_embs + [dense_inputs], axis=1) with fluid.device_guard("gpu"): for n_layer in self._mlp_layers: y_dnn = n_layer(y_dnn) return y_dnn
def test_warning(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") with fluid.device_guard("gpu"): x = fluid.layers.fill_constant( shape=[1], value=3.0, dtype='float32', force_cpu=True) y = fluid.layers.fill_constant( shape=[1], value=4.0, dtype='float32') result = fluid.layers.less_than(x=x, y=y, force_cpu=False) assert len(w) == 2 all_ops = main_program.global_block().ops device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() for op in all_ops: self.assertEqual(op.desc.attr(device_attr_name), "gpu")
def cnn_model(data): conv_pool_1 = fluid.nets.simple_img_conv_pool( input=data, filter_size=5, num_filters=20, pool_size=2, pool_stride=2, act="relu", param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( value=0.01))) conv_pool_2 = fluid.nets.simple_img_conv_pool( input=conv_pool_1, filter_size=5, num_filters=50, pool_size=2, pool_stride=2, act="relu", param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant( value=0.01))) SIZE = 10 input_shape = conv_pool_2.shape param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE] scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5 with fluid.device_guard("gpu:1"): predict = fluid.layers.fc( input=conv_pool_2, size=SIZE, act="softmax", param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Constant(value=0.01))) # To cover @RENAMED@GRADIENT predict2 = fluid.layers.fc( input=conv_pool_1, size=SIZE, act="softmax", param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Constant(value=0.01))) predict += predict2 return predict
def tdm_net(self, input): """ tdm训练网络的主要流程部分 """ is_distributed = True if envs.get_trainer() == "CtrTrainer" else False input_emb = input[0] item_label = input[1] # 根据输入的item的正样本在给定的树上进行负采样 # sample_nodes 是采样的node_id的结果,包含正负样本 # sample_label 是采样的node_id对应的正负标签 # sample_mask 是为了保持tensor维度一致,padding部分的标签,若为0,则是padding的虚拟node_id if self.check_version(): with fluid.device_guard("cpu"): sample_nodes, sample_label, sample_mask = fluid.contrib.layers.tdm_sampler( x=item_label, neg_samples_num_list=self.neg_sampling_list, layer_node_num_list=self.layer_node_num_list, leaf_node_num=self.leaf_node_nums, tree_travel_attr=fluid.ParamAttr(name="TDM_Tree_Travel"), tree_layer_attr=fluid.ParamAttr(name="TDM_Tree_Layer"), output_positive=self.output_positive, output_list=True, seed=0, tree_dtype='int64', dtype='int64') else: sample_nodes, sample_label, sample_mask = fluid.contrib.layers.tdm_sampler( x=item_label, neg_samples_num_list=self.neg_sampling_list, layer_node_num_list=self.layer_node_num_list, leaf_node_num=self.leaf_node_nums, tree_travel_attr=fluid.ParamAttr(name="TDM_Tree_Travel"), tree_layer_attr=fluid.ParamAttr(name="TDM_Tree_Layer"), output_positive=self.output_positive, output_list=True, seed=0, tree_dtype='int64', dtype='int64') sample_nodes = [ fluid.layers.reshape(sample_nodes[i], [-1, 1]) for i in range(self.max_layers) ] # 查表得到每个节点的Embedding sample_nodes_emb = [ fluid.layers.embedding( input=sample_nodes[i], is_sparse=True, size=[self.node_nums, self.node_emb_size], param_attr=fluid.ParamAttr(name="TDM_Tree_Emb")) for i in range(self.max_layers) ] # 此处进行Reshape是为了之后层次化的分类器训练 sample_nodes_emb = [ fluid.layers.reshape(sample_nodes_emb[i], [ -1, self.neg_sampling_list[i] + self.output_positive, self.node_emb_size ]) for i in range(self.max_layers) ] # 对输入的input_emb进行转换,使其维度与node_emb维度一致 input_trans_emb = self.input_trans_layer(input_emb) # 分类器的主体网络,分别训练不同层次的分类器 layer_classifier_res = self.classifier_layer(input_trans_emb, sample_nodes_emb) # 最后的概率判别FC,将所有层次的node分类结果放到一起以相同的标准进行判别 # 考虑到树极大可能不平衡,有些item不在最后一层,所以需要这样的机制保证每个item都有机会被召回 tdm_fc = fluid.layers.fc( input=layer_classifier_res, size=2, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr(name="tdm.cls_fc.weight"), bias_attr=fluid.ParamAttr(name="tdm.cls_fc.bias")) # 将loss打平,放到一起计算整体网络的loss tdm_fc_re = fluid.layers.reshape(tdm_fc, [-1, 2]) # 若想对各个层次的loss辅以不同的权重,则在此处无需concat # 支持各个层次分别计算loss,再乘相应的权重 sample_label = fluid.layers.concat(sample_label, axis=1) labels_reshape = fluid.layers.reshape(sample_label, [-1, 1]) labels_reshape.stop_gradient = True # 计算整体的loss并得到softmax的输出 cost, softmax_prob = fluid.layers.softmax_with_cross_entropy( logits=tdm_fc_re, label=labels_reshape, return_softmax=True) # 通过mask过滤掉虚拟节点的loss sample_mask = fluid.layers.concat(sample_mask, axis=1) mask_reshape = fluid.layers.reshape(sample_mask, [-1, 1]) mask_index = fluid.layers.where(mask_reshape != 0) mask_index.stop_gradient = True self.mask_cost = fluid.layers.gather_nd(cost, mask_index) softmax_prob = fluid.layers.unsqueeze(input=softmax_prob, axes=[1]) self.mask_prob = fluid.layers.gather_nd(softmax_prob, mask_index) self.mask_label = fluid.layers.gather_nd(labels_reshape, mask_index) self._predict = self.mask_prob
def build_network(input, layers=50, class_dim=1000): supported_layers = [18, 34, 50, 101, 152] assert layers in supported_layers depth = None if layers == 18: depth = [2, 2, 2, 2] elif layers == 34 or layers == 50: depth = [3, 4, 6, 3] elif layers == 101: depth = [3, 4, 23, 3] elif layers == 152: depth = [3, 8, 36, 3] num_filters = [64, 128, 256, 512] offset = 0 with fluid.device_guard("gpu:%d" % (offset)): conv = conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') offset += 1 if layers >= 50: for block in range(len(depth)): with fluid.device_guard("gpu:%d" % (offset)): for i in range(depth[block]): conv = bottleneck_block( input=conv, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1) offset += 1 with fluid.device_guard("gpu:%d" % (offset)): pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) out = fluid.layers.fc( input=pool, size=class_dim, param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv))) else: for block in range(len(depth)): with fluid.device_guard("gpu:%d" % (offset)): for i in range(depth[block]): conv = basic_block( input=conv, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1, is_first=block == i == 0) offset += 1 with fluid.device_guard("gpu:%d" % (offset)): pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) out = fluid.layers.fc( input=pool, size=class_dim, param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv))) return out, offset
def create_model(args, phase, micro_bsz, dp_sharding_rank, dp_worldsize, topo): if args.use_sop: from reader.pretraining_ds_ernie_full_sent import make_pretrain_dataset else: from reader.pretraining_ds_mlm import make_pretrain_dataset # mask_label, mask_pos for mlm, labels for sop if args.use_sop: input_fields = { 'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos', 'labels'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } else: input_fields = { 'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0], } with fluid.device_guard("gpu:0"): inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] if args.use_sop: (src_ids, sent_ids, mask_label, mask_pos, labels) = inputs else: (src_ids, sent_ids, mask_label, mask_pos) = inputs train_file_list = glob.glob(args.data_dir + "/*") vocab = {} with open(args.vocab_file) as r: for line in r: lines = line.strip().split('\t') vocab[lines[0]] = int(lines[1]) log.debug("========= worker: {} of {} ==========".format( dp_sharding_rank, dp_worldsize)) data_reader = make_pretrain_dataset('pt', train_file_list, True, vocab, micro_bsz, len(vocab), args.max_seq_len, dp_sharding_rank, dp_worldsize) with fluid.device_guard("gpu:0"): data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=70, iterable=False) places = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0))) def data_gen(): yield from data_reader data_loader.set_batch_generator(data_gen, places) ernie_config = ErnieConfig(args.ernie_config_file)._config_dict ernie_config["preln"] = args.preln weight_sharing = (topo.mp.size == 1 and topo.pp.size == 1 ) # pp mp should not do weight sharing with fluid.device_guard("gpu:0"): ernie = ErnieModel(src_ids, sent_ids, ernie_config, weight_sharing=weight_sharing, topo=topo) checkpoints = ernie._checkpoints checkpoints.pop(-1) with fluid.device_guard(f'gpu:{args.num_pp-1}'): mask_lm_loss, mean_mask_lm_loss = ernie.get_lm_output( mask_label, mask_pos) total_loss = mean_mask_lm_loss if args.use_sop: sop_acc, mean_sop_loss = ernie.get_next_sentence_output(labels) total_loss += mean_sop_loss if topo.pp.size > 1: mask_lm_loss.persistable = True mean_mask_lm_loss.persistable = True # checkpoints.extend([mask_lm_loss.name, mean_mask_lm_loss.name]) if args.use_sop: mean_sop_loss.persistable = True sop_acc.persistable = True # checkpoints.extend([mean_sop_loss.name, sop_acc.name]) total_loss.persistable = True # checkpoints.append(total_loss.name) if args.use_sop: graph_vars = { 'data_loader': data_loader, 'mask_lm_loss': mask_lm_loss, 'mean_mask_lm_loss': mean_mask_lm_loss, 'sop_loss': mean_sop_loss, 'sop_acc': sop_acc, 'total_loss': total_loss, 'checkpoints': checkpoints } else: graph_vars = { 'data_loader': data_loader, 'mask_lm_loss': mask_lm_loss, 'mean_mask_lm_loss': mean_mask_lm_loss, 'total_loss': total_loss, 'checkpoints': checkpoints, } return graph_vars
def device_attr2(): with fluid.device_guard("cpu:1"): out = fluid.layers.fill_constant(shape=[1], value=0.2, dtype='float32')
def net(batch_size=4, lr=0.01): """ network definition Args: batch_size(int): the size of mini-batch for training lr(float): learning rate of training Returns: avg_cost: LoDTensor of cost. """ dnn_input_dim, lr_input_dim = int(2), int(2) with fluid.device_guard("cpu"): dnn_data = fluid.layers.data(name="dnn_data", shape=[-1, 1], dtype="int64", lod_level=1, append_batch_size=False) lr_data = fluid.layers.data(name="lr_data", shape=[-1, 1], dtype="int64", lod_level=1, append_batch_size=False) label = fluid.layers.data(name="click", shape=[-1, 1], dtype="float32", lod_level=0, append_batch_size=False) datas = [dnn_data, lr_data, label] # build dnn model dnn_layer_dims = [2, 1] dnn_embedding = fluid.layers.embedding( is_distributed=False, input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, pool_type="sum") dnn_out = dnn_pool # build lr model lr_embbding = fluid.layers.embedding( is_distributed=False, input=lr_data, size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) lr_pool = fluid.layers.sequence_pool(input=lr_embbding, pool_type="sum") with fluid.device_guard("gpu"): for i, dim in enumerate(dnn_layer_dims[1:]): fc = fluid.layers.fc( input=dnn_out, size=dim, act="relu", param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01)), name='dnn-fc-%d' % i) dnn_out = fc merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1) label = fluid.layers.cast(label, dtype="int64") predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) return datas, avg_cost
def net(self, args, batch_size=4, lr=0.01): """ network definition Args: batch_size(int): the size of mini-batch for training lr(float): learning rate of training Returns: avg_cost: LoDTensor of cost. """ dnn_input_dim, lr_input_dim = int(1e5), int(1e5) dnn_data = fluid.layers.data(name="dnn_data", shape=[-1, 1], dtype="int64", lod_level=1, append_batch_size=False) lr_data = fluid.layers.data(name="lr_data", shape=[-1, 1], dtype="int64", lod_level=1, append_batch_size=False) label = fluid.layers.data(name="click", shape=[-1, 1], dtype="float32", lod_level=0, append_batch_size=False) datas = [dnn_data, lr_data, label] if args.reader == "pyreader": self.reader = fluid.io.PyReader(feed_list=datas, capacity=64, iterable=False, use_double_buffer=False) # build dnn model dnn_layer_dims = [128, 64, 32, 1] dnn_embedding = fluid.layers.embedding( is_distributed=False, input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) dnn_pool = fluid.layers.sequence_pool(input=dnn_embedding, pool_type="sum") dnn_out = dnn_pool # build lr model lr_embbding = fluid.layers.embedding( is_distributed=False, input=lr_data, size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", initializer=fluid.initializer.Constant(value=0.01)), is_sparse=True) lr_pool = fluid.layers.sequence_pool(input=lr_embbding, pool_type="sum") with fluid.device_guard("gpu"): for i, dim in enumerate(dnn_layer_dims[1:]): fc = fluid.layers.fc( input=dnn_out, size=dim, act="relu", param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01)), name='dnn-fc-%d' % i) dnn_out = fc merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1) label = fluid.layers.cast(label, dtype="int64") predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) fluid.layers.Print(avg_cost, message="avg_cost") self.feeds = datas self.train_file_path = ["fake1", "fake2"] self.avg_cost = avg_cost self.predict = predict return avg_cost
offset += 1 with fluid.device_guard("gpu:%d" % (offset)): pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) out = fluid.layers.fc( input=pool, size=class_dim, param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv))) return out, offset with fluid.device_guard("gpu:0"): image_shape = [3, 224, 224] image = fluid.layers.data(name="feed_image", shape=image_shape, dtype="float32") label = fluid.layers.data(name="feed_label", shape=[1], dtype="int64") data_loader = fluid.io.DataLoader.from_generator(feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=False) fc, offset = build_network(image) with fluid.device_guard("gpu:%d" % (offset)): out, prob = fluid.layers.softmax_with_cross_entropy(logits=fc, label=label,