def elmo_encoder(word_ids, elmo_l2_coef): """ param:word_ids param:elmo_l2_coef """ x_emb = layers.embedding(input=word_ids, size=[vocab_size, emb_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name='embedding_para')) x_emb_r = fluid.layers.sequence_reverse(x_emb, name=None) fw_hiddens, fw_hiddens_ori = encoder_wrapper(x_emb, vocab_size, emb_size, para_name='fw_', args=None) bw_hiddens, bw_hiddens_ori = encoder_wrapper(x_emb_r, vocab_size, emb_size, para_name='bw_', args=None) num_layers = len(fw_hiddens_ori) token_embeddings = layers.concat(input=[x_emb, x_emb], axis=1) token_embeddings.stop_gradient = True concate_embeddings = [token_embeddings] for index in range(num_layers): embedding = layers.concat( input=[fw_hiddens_ori[index], bw_hiddens_ori[index]], axis=1) embedding = dropout(embedding) embedding.stop_gradient = True concate_embeddings.append(embedding) weighted_emb = weight_layers(concate_embeddings, l2_coef=elmo_l2_coef) return weighted_emb
def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value): """ Reshape input tensors at the last dimension to split multi-heads and then transpose. Specifically, transform the input tensor with shape [bs, max_sequence_length, n_head * hidden_dim] to the output tensor with shape [bs, n_head, max_sequence_length, hidden_dim]. """ # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. reshaped_q = layers.reshape(x=queries, shape=[0, 0, n_head, d_key], inplace=True) # permuate the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. reshape_layer = _wrap_layer_with_block( layers.reshape, fluid.default_main_program().current_block().parent_idx ) if cache is not None and static_kv else layers.reshape transpose_layer = _wrap_layer_with_block( layers.transpose, fluid.default_main_program().current_block().parent_idx ) if cache is not None and static_kv else layers.transpose reshaped_k = reshape_layer(x=keys, shape=[0, 0, n_head, d_key], inplace=True) k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = reshape_layer(x=values, shape=[0, 0, n_head, d_value], inplace=True) v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3]) if cache is not None: # only for faster inference if static_kv: # For encoder-decoder attention in inference cache_k, cache_v = cache["static_k"], cache["static_v"] # To init the static_k and static_v in cache. # Maybe we can use condition_op(if_else) to do these at the first # step in while loop to replace these, however it might be less # efficient. static_cache_init = _wrap_layer_with_block( layers.assign, fluid.default_main_program().current_block().parent_idx) static_cache_init(k, cache_k) static_cache_init(v, cache_v) else: # For decoder self-attention in inference cache_k, cache_v = cache["k"], cache["v"] # gather cell states corresponding to selected parent select_k = layers.gather(cache_k, index=gather_idx) select_v = layers.gather(cache_v, index=gather_idx) if not static_kv: # For self attention in inference, use cache and concat time steps. select_k = layers.concat([select_k, k], axis=2) select_v = layers.concat([select_v, v], axis=2) # update cell states(caches) cached in global block layers.assign(select_k, cache_k) layers.assign(select_v, cache_v) return q, select_k, select_v return q, k, v
def greedy_search_infilling(model, q_ids, q_sids, sos_id, eos_id, attn_id, max_encode_len=640, max_decode_len=100, tgt_type_id=3): model.eval() _, logits, info = model(q_ids, q_sids) gen_ids = L.argmax(logits, -1) d_batch, d_seqlen = q_ids.shape seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) has_stopped = np.zeros([d_batch], dtype=np.bool) gen_seq_len = np.zeros([d_batch], dtype=np.int64) output_ids = [] past_cache = info['caches'] cls_ids = L.ones([d_batch], dtype='int64') * sos_id attn_ids = L.ones([d_batch], dtype='int64') * attn_id ids = L.stack([cls_ids, attn_ids], -1) for step in range(max_decode_len): bias = gen_bias(q_ids, ids, step) pos_ids = D.to_variable( np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) pos_ids += seqlen _, logits, info = model(ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) gen_ids = L.argmax(logits, -1) past_cached_k, past_cached_v = past_cache cached_k, cached_v = info['caches'] cached_k = [ L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k) ] # concat cached cached_v = [ L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v) ] past_cache = (cached_k, cached_v) gen_ids = gen_ids[:, 1] ids = L.stack([gen_ids, attn_ids], 1) gen_ids = gen_ids.numpy() has_stopped |= (gen_ids == eos_id).astype(np.bool) gen_seq_len += (1 - has_stopped.astype(np.int64)) output_ids.append(gen_ids.tolist()) if has_stopped.all(): break output_ids = np.array(output_ids).transpose([1, 0]) return output_ids
def bbox_ciou(self, boxes1_x0y0x1y1, boxes2_x0y0x1y1): ''' 计算ciou = iou - p2/c2 - av :param boxes1: (batch_size, num_priors, 4) pred_x0y0x1y1 :param boxes2: (batch_size, num_priors, 4) label_x0y0x1y1 :return: ''' # 得到中心点坐标、宽高 boxes1 = P.concat( [(boxes1_x0y0x1y1[:, :, :2] + boxes1_x0y0x1y1[:, :, 2:]) * 0.5, boxes1_x0y0x1y1[:, :, 2:] - boxes1_x0y0x1y1[:, :, :2]], axis=-1) boxes2 = P.concat( [(boxes2_x0y0x1y1[:, :, :2] + boxes2_x0y0x1y1[:, :, 2:]) * 0.5, boxes2_x0y0x1y1[:, :, 2:] - boxes2_x0y0x1y1[:, :, :2]], axis=-1) # 两个矩形的面积 boxes1_area = (boxes1_x0y0x1y1[:, :, 2] - boxes1_x0y0x1y1[:, :, 0]) * ( boxes1_x0y0x1y1[:, :, 3] - boxes1_x0y0x1y1[:, :, 1]) boxes2_area = (boxes2_x0y0x1y1[:, :, 2] - boxes2_x0y0x1y1[:, :, 0]) * ( boxes2_x0y0x1y1[:, :, 3] - boxes2_x0y0x1y1[:, :, 1]) # 相交矩形的左上角坐标、右下角坐标 left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :2], boxes2_x0y0x1y1[:, :, :2]) right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, 2:], boxes2_x0y0x1y1[:, :, 2:]) # 相交矩形的面积inter_area。iou inter_section = P.relu(right_down - left_up) inter_area = inter_section[:, :, 0] * inter_section[:, :, 1] union_area = boxes1_area + boxes2_area - inter_area iou = inter_area / union_area # 包围矩形的左上角坐标、右下角坐标 enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :2], boxes2_x0y0x1y1[:, :, :2]) enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, 2:], boxes2_x0y0x1y1[:, :, 2:]) # 包围矩形的对角线的平方 enclose_wh = enclose_right_down - enclose_left_up enclose_c2 = P.pow(enclose_wh[:, :, 0], 2) + P.pow( enclose_wh[:, :, 1], 2) # 两矩形中心点距离的平方 p2 = P.pow(boxes1[:, :, 0] - boxes2[:, :, 0], 2) + P.pow( boxes1[:, :, 1] - boxes2[:, :, 1], 2) # 增加av。分母boxes2[:, :, 3]可能为0,所以加了极小的常数防止nan atan1 = P.atan(boxes1[:, :, 2] / (boxes1[:, :, 3] + 1e-9)) atan2 = P.atan(boxes2[:, :, 2] / (boxes2[:, :, 3] + 1e-9)) v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2) a = v / (1 - iou + v) ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v return ciou
def std_gen_interpolate(batch_size=8, seed=None, out_path='data/out', levels=None, interpolate_mode=0): default_levels = ("y;z0;z11;z12;z21;z22;z31;z32;z41;z42;z51;z52;z61;z62") if levels is None: levels = default_levels default_levels = default_levels.split(';') img_save_dir = os.path.join('/tmp', out_path+'.dir') os.system(f'rm -rf {img_save_dir}') os.system(f'mkdir {img_save_dir} -p') with dg.no_grad(): model_cache.train_mode = False model_cache.initialized = False if seed is not None: rds.rng = np.random.RandomState(seed) elif rds.rng is None: rds.rng = np.random G = model_cache.G x_np = rds.rng.randn(batch_size,140).astype('float32') y_np = rds.rng.randint(0,1000,size=[batch_size]).astype('int64') x = dg.to_variable(x_np) y_cls = dg.to_variable(y_np) y_hot = layers.one_hot(layers.unsqueeze(y_cls,[1]), depth=1000) y_embed = G.embed_y(y_hot) x = layers.concat([x, x[:1]], 0) y_embed = layers.concat([y_embed, y_embed[:1]], 0) levels = levels.split(';') for level in default_levels: if len(level) == 1: locals()[level] = y_embed locals()['_'+level] = y_embed[:1] if len(level) >= 2: idx = int(level[1])*20 locals()[level] = x[:,idx:idx+20] locals()['_'+level] = x[:1,idx:idx+20] imgs = [] for i in range(batch_size): for j in range(40): alpha = j / 40 if interpolate_mode == 1: alpha = alpha**2 * (3 - 2 * alpha) for level in levels: locals()['_'+level] = (1 - alpha) * locals()[level][i:i+1] + alpha * locals()[level][i+1:i+2] inputs = [] for level in default_levels[1:]: inputs.append(locals()['_'+level]) img_pd = G(inputs, locals()['_'+default_levels[0]], True) img = np.uint8(img_pd.numpy().clip(0,1)*255)[0].transpose([1,2,0]) imgs.append(Image.fromarray(img)) stdout.write(f'{i*40+j+1}/{40*batch_size}\r') stdout.flush() print('') for i, img in enumerate(imgs): img.save(os.path.join(img_save_dir, str(i).zfill(5)+'.png')) imgs[0].save(out_path+'.gif', save_all=True, append_images=imgs[1:], duration=40, loop=0) out_path = out_path + '.mp4' os.system(f'ffmpeg -r 40 -i {img_save_dir}/%05d.png -hide_banner -loglevel warning -nostats -c:v libx264 -crf 23 -y {out_path}') os.system(f'rm -rf {img_save_dir}')
def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value): """ Reshape input tensors at the last dimension to split multi-heads and then transpose. Specifically, transform the input tensor with shape [bs, max_sequence_length, n_head * hidden_dim] to the output tensor with shape [bs, n_head, max_sequence_length, hidden_dim]. """ # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. reshaped_q = layers.reshape(x=queries, shape=[0, 0, n_head, d_key], inplace=True) # permuate the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. reshape_layer = wrap_layer_with_block( layers.reshape, fluid.default_main_program().current_block().parent_idx ) if cache is not None and static_kv else layers.reshape transpose_layer = wrap_layer_with_block( layers.transpose, fluid.default_main_program().current_block().parent_idx ) if cache is not None and static_kv else layers.transpose reshaped_k = reshape_layer(x=keys, shape=[0, 0, n_head, d_key], inplace=True) k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = reshape_layer(x=values, shape=[0, 0, n_head, d_value], inplace=True) v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3]) if cache is not None: # only for faster inference cache_, i = cache if static_kv: # For encoder-decoder attention in inference cache_k, cache_v = cache_["static_k"], cache_["static_v"] # To init the static_k and static_v in global block. static_cache_init = wrap_layer_with_block( layers.assign, fluid.default_main_program().current_block().parent_idx) static_cache_init( k, fluid.default_main_program().global_block().var( "static_k_%d" % i)) static_cache_init( v, fluid.default_main_program().global_block().var( "static_v_%d" % i)) k, v = cache_k, cache_v else: # For decoder self-attention in inference # use cache and concat time steps. cache_k, cache_v = cache_["k"], cache_["v"] k = layers.concat([cache_k, k], axis=2) v = layers.concat([cache_v, v], axis=2) cache_["k"], cache_["v"] = (k, v) return q, k, v
def bbox_iou(boxes1, boxes2): ''' 预测框 boxes1 (?, grid_h, grid_w, 3, 1, 4),神经网络的输出(tx, ty, tw, th)经过了后处理求得的(bx, by, bw, bh) 图片中所有的gt boxes2 (?, 1, 1, 1, 150, 4) paddle里不支持省略号,boxes1_area = boxes1[..., 2] * boxes1[..., 3] 冒号要写完 ''' boxes1_area = boxes1[:, :, :, :, :, 2] * boxes1[:, :, :, :, :, 3] # 所有格子的3个预测框的面积 boxes2_area = boxes2[:, :, :, :, :, 2] * boxes2[:, :, :, :, :, 3] # 所有ground truth的面积 # (x, y, w, h)变成(x0, y0, x1, y1) boxes1 = P.concat([ boxes1[:, :, :, :, :, :2] - boxes1[:, :, :, :, :, 2:] * 0.5, boxes1[:, :, :, :, :, :2] + boxes1[:, :, :, :, :, 2:] * 0.5 ], axis=-1) boxes2 = P.concat([ boxes2[:, :, :, :, :, :2] - boxes2[:, :, :, :, :, 2:] * 0.5, boxes2[:, :, :, :, :, :2] + boxes2[:, :, :, :, :, 2:] * 0.5 ], axis=-1) # 所有格子的3个预测框 分别 和 150个ground truth 计算iou。 所以left_up和right_down的shape = (?, grid_h, grid_w, 3, 150, 2) expand_boxes1 = P.expand(boxes1, [1, 1, 1, 1, P.shape(boxes2)[4], 1 ]) # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape expand_boxes2 = P.expand( boxes2, [1, P.shape(boxes1)[1], P.shape(boxes1)[2], P.shape(boxes1)[3], 1, 1]) # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape left_up = P.elementwise_max(expand_boxes1[:, :, :, :, :, :2], expand_boxes2[:, :, :, :, :, :2]) # 相交矩形的左上角坐标 right_down = P.elementwise_min(expand_boxes1[:, :, :, :, :, 2:], expand_boxes2[:, :, :, :, :, 2:]) # 相交矩形的右下角坐标 inter_section = P.relu( right_down - left_up) # 相交矩形的w和h,是负数时取0 (?, grid_h, grid_w, 3, 150, 2) inter_area = inter_section[:, :, :, :, :, 0] * inter_section[:, :, :, :, :, 1] # 相交矩形的面积 (?, grid_h, grid_w, 3, 150) expand_boxes1_area = P.expand(boxes1_area, [1, 1, 1, 1, P.shape(boxes2)[4]]) expand_boxes2_area = P.expand(boxes2_area, [ 1, P.shape(expand_boxes1_area)[1], P.shape(expand_boxes1_area)[2], P.shape(expand_boxes1_area)[3], 1 ]) union_area = expand_boxes1_area + expand_boxes2_area - inter_area # union_area (?, grid_h, grid_w, 3, 150) iou = 1.0 * inter_area / union_area # iou (?, grid_h, grid_w, 3, 150) return iou
def forward(self, input_tensor, cur_state): h_cur = cur_state x_in = concat([input_tensor, h_cur], axis=1) update = sigmoid(self.update_gate(x_in)) reset = sigmoid(self.reset_gate(x_in)) x_out = tanh( self.out_gate(concat([input_tensor, h_cur * reset], axis=1))) h_new = h_cur * (1 - update) + x_out * update return h_new
def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): """logits.shape == [B*W, V]""" _, vocab_size = logits.shape bsz, beam_width = state.log_probs.shape onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] probs = L.log(L.softmax(logits)) #[B*W, V] probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] not_eos = 1 - onehot_eos length_to_add = not_finished * not_eos #[B*W,V] alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) allscore = hyp_score(allprobs, alllen, length_penalty) if is_first_step: allscore = L.reshape( allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 scores, idx = L.topk(allscore, k=beam_width) #[B, W] next_beam_id = idx // vocab_size #[B, W] next_word_id = idx % vocab_size gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) gather_idx = L.concat( [L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) next_finished = L.reshape( L.gather_nd(state.finished, gather_idx), state.finished.shape ) #[gather new beam state according to new beam id] #log.debug(gather_idx.numpy()) #log.debug(state.finished.numpy()) #log.debug(next_finished.numpy()) next_finished += L.cast(next_word_id == eos_id, 'int64') next_finished = L.cast(next_finished > 0, 'int64') #log.debug(next_word_id.numpy()) #log.debug(next_beam_id.numpy()) next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) return output, next_state
def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): """logits.shape == [B*W, V]""" beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size logits_np = logits.numpy() for i in range(beam_size): logits_np[i][17963] = 0 # make [UNK] prob = 0 logits = D.to_variable(logits_np) bsz, beam_width = state.log_probs.shape onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] probs = L.log(L.softmax(logits)) #[B*W, V] probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] not_eos = 1 - onehot_eos length_to_add = not_finished * not_eos #[B*W,V] alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) allscore = hyp_score(allprobs, alllen, length_penalty) if is_first_step: allscore = L.reshape( allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 scores, idx = L.topk(allscore, k=beam_width) #[B, W] next_beam_id = idx // vocab_size #[B, W] next_word_id = idx % vocab_size gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) gather_idx = L.concat( [L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) next_finished = L.reshape( L.gather_nd(state.finished, gather_idx), state.finished.shape ) #[gather new beam state according to new beam id] next_finished += L.cast(next_word_id == eos_id, 'int64') next_finished = L.cast(next_finished > 0, 'int64') next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) return output, next_state
def get_single_direction_output(rnn_input, encode_hidden, unit_list, mask=None, direc_index=0): rnn = StaticRNN() #print(rnn_input.shape) with rnn.step(): step_input = rnn.step_input(rnn_input) if mask: step_mask = rnn.step_input(mask) for i in range(num_layers): if init_hidden: pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) else: pre_hidden = rnn.memory(batch_ref=rnn_input, shape=[-1, hidden_size], ref_batch_dim_idx=1) encode_h = encode_hidden[i] pre_encode_hidden = layers.concat([pre_hidden, encode_h], axis=1) new_hidden = unit_list[i](step_input, pre_encode_hidden) if mask: new_hidden = layers.elementwise_mul( new_hidden, step_mask, axis=0) - layers.elementwise_mul( pre_hidden, (step_mask - 1), axis=0) rnn.update_memory(pre_hidden, new_hidden) rnn.step_output(new_hidden) step_input = new_hidden if dropout_prob is not None and dropout_prob > 0.0: step_input = layers.dropout(step_input, dropout_prob=dropout_prob, ) rnn.step_output(step_input) rnn_out = rnn() last_hidden_array = [] all_hidden_array = [] # 增加这个来得到所有隐含状态 rnn_output = rnn_out[-1] for i in range(num_layers): last_hidden = rnn_out[i] all_hidden_array.append(last_hidden) last_hidden = last_hidden[-1] last_hidden_array.append(last_hidden) all_hidden_array = layers.concat(all_hidden_array, axis=0) all_hidden_array = layers.reshape(all_hidden_array, shape=[num_layers, input.shape[0], -1, hidden_size]) last_hidden_output = layers.concat(last_hidden_array, axis=0) last_hidden_output = layers.reshape(last_hidden_output, shape=[num_layers, -1, hidden_size]) return rnn_output, last_hidden_output, all_hidden_array
def flow_generation(self, label, ref_labels, ref_images, prev_labels, prev_images, ref_idx): """ Generates flows and masks for warping reference / previous images. Args: label (NxCxHxW): Target label map. ref_labels (NxKxCxHxW): Reference label maps. ref_images (NxKx3xHxW): Reference images. prev_labels (NxTxCxHxW): Previous label maps. prev_images (NxTx3xHxW): Previous images. ref_idx (Nx1): index for which image to use from the reference images. Returns: - flow (list of Nx2xHxW): Optical flows. - occ_mask (list of Nx1xHxW): Occlusion masks. - img_warp (list of Nx3xHxW): Warped reference /previous images. - cond_inputs (list of Nx4xHxW): conditional inputs for SPADE combination """ # Pick an image in the reference imagegs using ref_idx. ref_label, ref_image = pick_image([ref_labels, ref_images], ref_idx) # Only start using prev frames when enough prev frames are generated. has_prev = prev_labels is not None and prev_labels.shape[ 1] == self.num_frames_G - 1 flow, occ_mask, img_warp, cond_inputs = [None] * 2, [None] * 2, [ None ] * 2, [None] * 2 if self.warp_ref: # Generate flows / masks for warping the reference image. flow_ref, occ_mask_ref = self.flow_network_ref( label, ref_label, ref_image) ref_image_warp = resample(ref_image, flow_ref) flow[0], occ_mask[0], img_warp[ 0] = flow_ref, occ_mask_ref, ref_image_warp[:, :3] # Concat warped image and occlusion mask to form the conditional input. cond_inputs[0] = L.concat([img_warp[0], occ_mask[0]], axis=1) if self.temporal_initialized and has_prev: # Generate flows / masks for warping the previous image. b, t, c, h, w = prev_labels.shape prev_labels_concat = L.reshape(prev_labels, (b, -1, h, w)) prev_images_concat = L.reshape(prev_images, (b, -1, h, w)) flow_prev, occ_mask_prev = self.flow_network_temp( label, prev_labels_concat, prev_images_concat) img_prev_warp = resample(prev_images[:, -1], flow_prev) flow[1], occ_mask[1], img_warp[ 1] = flow_prev, occ_mask_prev, img_prev_warp cond_inputs[1] = L.concat([img_warp[1], occ_mask[1]], axis=1) return flow, occ_mask, img_warp, cond_inputs
def __call__(self, x): x_1 = x x_2 = self.max_pool1(x) x_3 = self.max_pool2(x) x_4 = self.max_pool3(x) if self.seq == 'desc': out = L.concat([x_4, x_3, x_2, x_1], axis=1) else: out = L.concat([x_1, x_2, x_3, x_4], axis=1) return out
def _build_distribution(self, enc_final_state=None): enc_hidden = [ layers.concat(state, axis=-1) for state in enc_final_state ] enc_hidden = layers.concat(enc_hidden, axis=-1) z_mean_log_var = layers.fc(input=enc_hidden, size=self.latent_size * 2, name='fc_dist') z_mean, z_log_var = layers.split(z_mean_log_var, 2, -1) return z_mean, z_log_var
def build_model(self): node_features = self.graph_wrapper.node_feat["feat"] output = self.gcn(gw=self.graph_wrapper, feature=node_features, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_1") output1 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_2") output2 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_3") output = L.concat(input=[output1, output2, output], axis=-1) output, ratio_length = sag_pool(gw=self.graph_wrapper, feature=output, ratio=self.pooling_ratio, graph_id=self.graph_id, dataset=self.args.dataset_name, name="sag_pool_1") output = L.lod_reset(output, self.graph_wrapper.graph_lod) cat1 = L.sequence_pool(output, "sum") ratio_length = L.cast(ratio_length, dtype="float32") cat1 = L.elementwise_div(cat1, ratio_length, axis=-1) cat2 = L.sequence_pool(output, "max") output = L.concat(input=[cat2, cat1], axis=-1) output = L.fc(output, size=self.hidden_size, act="relu") output = L.dropout(output, dropout_prob=self.dropout_ratio) output = L.fc(output, size=self.hidden_size // 2, act="relu") output = L.fc(output, size=self.num_classes, act=None, param_attr=fluid.ParamAttr(name="final_fc")) self.labels = L.cast(self.labels, dtype="float32") loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels) self.loss = L.mean(loss) pred = L.sigmoid(output) self.pred = L.argmax(x=pred, axis=-1) correct = L.equal(self.pred, self.labels_1dim) correct = L.cast(correct, dtype="int32") self.correct = L.reduce_sum(correct)
def forward(self): """ forward """ src, dst = L.read_file(self.pyreader) if self.is_sparse: # sparse mode use 2 dims input. src = L.reshape(src, [-1, 1]) dst = L.reshape(dst, [-1, 1]) src_embed = split_embedding(src, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.num_featuers, self.hidden_size]) dst_embed = L.reshape( dst_embed, [-1, self.neg_num + 1, self.num_featuers, self.hidden_size]) src_embed = L.reduce_mean(src_embed, 2) dst_embed = L.reduce_mean(dst_embed, 2) logits = L.matmul(src_embed, dst_embed, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def forward(self, input): x = self.DownBlock(input) gap = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg') gap_ = reshape(x=gap, shape=(x.shape[0], -1)) gap_logit = self.gap_fc(gap_) gap_weight = self.gap_fc.parameters()[0] gap_weight = transpose(gap_weight, perm=[1, 0]) gap_weight = unsqueeze(gap_weight, axes=2) gap_weight = unsqueeze(gap_weight, axes=3) gap = x * gap_weight gmp = adaptive_pool2d(x, pool_size=[1, 1], pool_type='max') gmp_ = reshape(x=gmp, shape=(x.shape[0], -1)) gmp_logit = self.gmp_fc(gmp_) gmp_weight = self.gmp_fc.parameters()[0] gmp_weight = transpose(gmp_weight, perm=[1, 0]) gmp_weight = unsqueeze(gmp_weight, axes=2) gmp_weight = unsqueeze(gmp_weight, axes=3) gmp = x * gmp_weight cam_logit = concat(input=[gap_logit, gmp_logit], axis=1) x = concat(input=[gap, gmp], axis=1) x = self.relu(self.conv1x1(x)) heatmap = reduce_sum(x, dim=1, keep_dim=True) if self.light: x_ = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg') x_ = reshape(x=x_, shape=(x_.shape[0], -1)) x_ = self.FC(x_) else: x_ = reshape(x, shape=(x.shape[0], -1)) x_ = self.FC(x_) gamma, beta = self.gamma(x_), self.beta(x_) for i in range(self.n_blocks): x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta) out = self.UpBlock2(x) return out, cam_logit, heatmap
def gen_bias(encoder_inputs, decoder_inputs, step): decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), 'float32') #[1, 1, decoderlen, decoderlen] encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] if step > 0: bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) else: bias = L.concat([encoder_bias, decoder_bias], -1) return bias
def rotation_z(theta): """ :param theta: must be a scale, shape [1], 'float32' :return: """ cos_value = cos(theta/2) sin_value = sin(theta/2) zero_pd = pp_zeros([1], "float32") rz_re = concat([cos_value, zero_pd, zero_pd, cos_value], axis=0) rz_im = concat([-sin_value, zero_pd, zero_pd, sin_value], axis=0) return ComplexVariable(reshape(rz_re, [2, 2]), reshape(rz_im, [2, 2]))
def _ranking(self, inputs, predictions): """ Reranking generated responses. """ src_token = inputs["src_token"] src_mask = inputs["src_mask"] src_pos = inputs["src_pos"] src_type = inputs["src_type"] src_turn = inputs["src_turn"] src_embed = self.embedder(src_token, src_pos, src_type, src_turn) batch_size, num_latent, tgt_seq_len = predictions.shape # shape: [batch_size, num_latent, seq_len, 1] preds_token = F.unsqueeze(predictions, [3]) preds_mask = F.not_equal(preds_token, self.padding_idx, "int64") preds_pos = layers.range(0, tgt_seq_len, 1, dtype="float32") preds_pos = F.unsqueeze(preds_pos, [0, 0, 1]) preds_pos = layers.expand(preds_pos, [batch_size, num_latent, 1, 1]) preds_pos = layers.cast(preds_pos, "int64") preds_type = layers.zeros_like(preds_token) preds_turn = layers.zeros_like(preds_token) scores = [] for i in range(num_latent): pred_token = preds_token[:, i] pred_mask = preds_mask[:, i] pred_pos = preds_pos[:, i] pred_type = preds_type[:, i] pred_turn = preds_turn[:, i] input_mask = layers.concat([src_mask, pred_mask], axis=1) input_mask.stop_gradient = True pred_embed = self.embedder(pred_token, pred_pos, pred_type, pred_turn) embed = layers.concat([src_embed, pred_embed], axis=1) embed = self.embed_layer_norm(embed) mask_embed = self.mask_embed mask_embed = layers.expand(mask_embed, [batch_size, 1, 1]) mask_embed = self.embed_layer_norm(mask_embed) out = layers.concat([mask_embed, embed], axis=1) mask = self._create_mask(input_mask, append_head=True) for layer in self.layers: out = layer(out, mask, None) mask_embed = out[:, 0] score = self.discriminator(mask_embed) scores.append(score[:, 0]) scores = layers.stack(scores, axis=1) return scores
def _attn_forward(self, queries, keys, values, attn_bias, past_cache, head_mask=None): assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 q = self.q(queries) k = self.k(keys) v = self.v(values) cache = (k, v) if past_cache is not None: cached_k, cached_v = past_cache k = L.concat([cached_k, k], 1) v = L.concat([cached_v, v], 1) if hasattr(self.q, 'fn') and self.q.fn.cur_config['expand_ratio'] != None: n_head = int(self.n_head * self.q.fn.cur_config['expand_ratio']) else: n_head = self.n_head q = L.transpose( L.reshape(q, [0, 0, n_head, q.shape[-1] // n_head]), [0, 2, 1, 3]) #[batch, head, seq, dim] k = L.transpose( L.reshape(k, [0, 0, n_head, k.shape[-1] // n_head]), [0, 2, 1, 3]) #[batch, head, seq, dim] v = L.transpose( L.reshape(v, [0, 0, n_head, v.shape[-1] // n_head]), [0, 2, 1, 3]) #[batch, head, seq, dim] q = L.scale(q, scale=self.d_key**-0.5) score = L.matmul(q, k, transpose_y=True) if attn_bias is not None: score += attn_bias score = L.softmax(score, use_cudnn=True) score = self.dropout(score) if head_mask is not None: score = score * head_mask out = L.matmul(score, v) out = L.transpose(out, [0, 2, 1, 3]) out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) out = self.o(out) return out, cache
def get_l2_norm_pow(params_grads, sum_dtype=None): sum_square_list = [] sum_square_list_fp16 = [] sum_square_list_fp32 = [] for p, g in params_grads: if g is None: continue if getattr(p, 'need_clip', True) is False: continue merge_grad = g if g.type == core.VarDesc.VarType.SELECTED_ROWS: merge_grad = layers.merge_selected_rows(g) merge_grad = layers.get_tensor_from_selected_rows(merge_grad) sum_square = _squared_l2_norm(merge_grad) if sum_square.dtype == core.VarDesc.VarType.FP16: sum_square_list_fp16.append(sum_square) elif sum_square.dtype == core.VarDesc.VarType.FP32: sum_square_list_fp32.append(sum_square) else: sum_square_list.append(sum_square) # all parameters have been filterd out if len(sum_square_list) + len(sum_square_list_fp16) + len( sum_square_list_fp32) == 0: return None, None assert sum_dtype in ["float64", "float32", None], \ "sum's type must be float64/ float32 / None" if sum_dtype != "float64": sum_dtype = 'float64' if len(sum_square_list) > 0 else "float32" global_norm_var = [] if len(sum_square_list_fp16) > 0: global_norm_var_fp16 = layers.concat(sum_square_list_fp16) global_norm_var_fp16 = layers.reduce_sum(global_norm_var_fp16) global_norm_var.append(global_norm_var_fp16.astype(sum_dtype)) if len(sum_square_list_fp32) > 0: global_norm_var_fp32 = layers.concat(sum_square_list_fp32) global_norm_var_fp32 = layers.reduce_sum(global_norm_var_fp32) if sum_dtype == 'float32': global_norm_var.append(global_norm_var_fp32) else: global_norm_var.append(global_norm_var_fp32.astype(sum_dtype)) if len(sum_square_list) > 0: global_norm_var_fp64 = layers.concat(sum_square_list) global_norm_var_fp64 = layers.reduce_sum(global_norm_var_fp64) global_norm_var.append(global_norm_var_fp64) global_norm_var = layers.concat(global_norm_var) global_norm_var = layers.reduce_sum(global_norm_var) return global_norm_var, sum_dtype
def forward(self, x): features = self.feature(x) x1 = self.stage1(features) x2 = L.concat([x1, features], 1) x2 = self.stage2(x2) x3 = L.concat([x2, features], 1) x3 = self.stage3(x3) x4 = L.concat([x3, features], 1) x4 = self.stage4(x4) x5 = L.concat([x4, features], 1) x5 = self.stage5(x5) x6 = L.concat([x5, features], 1) x6 = self.stage6(x6) return [x1, x2, x3, x4, x5, x6]
def forward(self, gw): x = self._atom_encoder(gw) patch_repr = [] for i in range(self.num_layers): e = self._bond_encoder(gw, name='l%d'%i) x = gin_layer(gw, x, e, 'gin_%s' % i) x = L.batch_norm( x, param_attr=F.ParamAttr(name='batchnorm_%s' % i)) patch_repr.append(x) # $h_i^{(k)}$ patch_summary = L.concat(patch_repr, axis=1) # $h_{\phi}^i$ patch_pool = [pgl.layers.graph_pooling(gw, x, 'sum') for x in patch_repr] global_repr = L.concat(patch_pool, axis=1) return global_repr, patch_summary
def _decode(self, x, y, w, h, anchors, stride, scale_x_y, eps, is_gt=False): conv_shape = x.shape # (8, 13, 13, 3) batch_size = conv_shape[0] n_grid = conv_shape[1] anchor_per_scale = conv_shape[3] _x = L.unsqueeze(x, 4) _y = L.unsqueeze(y, 4) conv_raw_dxdy = L.concat([_x, _y], -1) # (8, 13, 13, 3, 2) _w = L.unsqueeze(w, 4) _h = L.unsqueeze(h, 4) conv_raw_dwdh = L.concat([_w, _h], -1) # (8, 13, 13, 3, 2) rows = L.range(0, n_grid, 1, 'float32') cols = L.range(0, n_grid, 1, 'float32') rows = L.expand(L.reshape(rows, (1, -1, 1)), [n_grid, 1, 1]) cols = L.expand(L.reshape(cols, (-1, 1, 1)), [1, n_grid, 1]) offset = L.concat([rows, cols], axis=-1) offset = L.reshape(offset, (1, n_grid, n_grid, 1, 2)) offset = L.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1]) if is_gt: decode_xy = (conv_raw_dxdy + offset) / n_grid else: if (abs(scale_x_y - 1.0) < eps): decode_xy = L.sigmoid(conv_raw_dxdy) decode_xy = (decode_xy + offset) / n_grid else: # Grid Sensitive decode_xy = scale_x_y * L.sigmoid(conv_raw_dxdy) - 0.5 * ( scale_x_y - 1.0) decode_xy = (decode_xy + offset) / n_grid anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32)) decode_wh = (L.exp(conv_raw_dwdh) * anchor_t) / (n_grid * stride) decode_xywh = L.concat([decode_xy, decode_wh], axis=-1) if is_gt: decode_xywh.stop_gradient = True return decode_xywh # (8, 13, 13, 3, 4)
def get_prediction(self, body_feats, im_size): """ Get prediction result of YOLOv3 network Args: input (list): List of Variables, output of backbone stages im_size (Variable): Variable of size([h, w]) of each image Returns: pred (Variable): shape = [bs, keep_top_k, 6] """ # outputs里为大中小感受野的输出 outputs = self._get_outputs(body_feats) boxes = [] scores = [] for i, output in enumerate(outputs): if self.iou_aware: output = get_iou_aware_score(output, len(self.anchor_masks[i]), self.num_classes, self.iou_aware_factor) box, score = fluid.layers.yolo_box( x=output, img_size=im_size, anchors=self.mask_anchors[i], class_num=self.num_classes, conf_thresh=self.nms_cfg['score_threshold'], downsample_ratio=self.downsample[i], name="yolo_box" + str(i), clip_bbox=self.clip_bbox, scale_x_y=self.scale_x_y) boxes.append(box) scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) yolo_boxes = L.concat(boxes, axis=1) yolo_scores = L.concat(scores, axis=2) # nms nms_cfg = copy.deepcopy(self.nms_cfg) nms_type = nms_cfg.pop('nms_type') batch_size = 1 if nms_type == 'matrix_nms': pred = fluid.layers.matrix_nms(yolo_boxes, yolo_scores, background_label=-1, **nms_cfg) elif nms_type == 'multiclass_nms': pred = fluid.layers.multiclass_nms(yolo_boxes, yolo_scores, background_label=-1, **nms_cfg) return pred
def test_nce(self): window_size = 5 words = [] for i in range(window_size): words.append( layers.data(name='word_{0}'.format(i), shape=[1], dtype='int64')) dict_size = 10000 label_word = int(window_size // 2) + 1 embs = [] for i in range(window_size): if i == label_word: continue emb = layers.embedding(input=words[i], size=[dict_size, 32], param_attr='emb.w', is_sparse=True) embs.append(emb) embs = layers.concat(input=embs, axis=1) loss = layers.nce(input=embs, label=words[label_word], num_total_classes=dict_size, param_attr='nce.w', bias_attr='nce.b') avg_loss = layers.mean(loss) self.assertIsNotNone(avg_loss) print(str(default_main_program()))
def loss_boxes(self, outputs, targets, indices, num_boxes): """ Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4] The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size. """ assert "pred_boxes" in outputs idx = self._get_src_permutation_idx(indices) src_boxes = outputs["pred_boxes"].numpy()[ idx[0].numpy(), idx[1].numpy(), :] # [num_objects, 4] src_boxes = dg.to_variable(src_boxes) target_boxes = [ t["boxes"].numpy()[i.numpy()] for t, (_, i) in zip(targets, indices) ] target_boxes = [dg.to_variable(t) for t in target_boxes] target_boxes = L.concat(target_boxes, 0).astype("float32") # [num_objects, 4] loss_bbox = F.loss.l1_loss(src_boxes, target_boxes, reduction="sum") losses = {} losses["loss_bbox"] = loss_bbox / num_boxes num_boxes = src_boxes.shape[0] mask = T.creation.diag(dg.to_variable( np.ones(num_boxes))) # mask out non-diag element loss_giou = (1 - box_ops.generalied_box_iou( box_ops.box_cxcywh_to_xyxy(src_boxes), box_ops.box_cxcywh_to_xyxy(target_boxes))) * mask losses["loss_giou"] = L.reduce_sum(loss_giou) / num_boxes return losses
def distributed_embedding(input, dict_size, hidden_size, initializer, name, num_part=16, is_sparse=False, learning_rate=1.0): _part_size = hidden_size // num_part if hidden_size % num_part != 0: _part_size += 1 output_embedding = [] p_num = 0 while hidden_size > 0: _part_size = min(_part_size, hidden_size) hidden_size -= _part_size print("part", p_num, "size=", (dict_size, _part_size)) part_embedding = L.embedding(input=input, size=(dict_size, int(_part_size)), is_sparse=is_sparse, is_distributed=False, param_attr=F.ParamAttr( name=name + '_part%s' % p_num, initializer=initializer, learning_rate=learning_rate)) p_num += 1 output_embedding.append(part_embedding) return L.concat(output_embedding, -1)
def test_nce(self): window_size = 5 words = [] for i in xrange(window_size): words.append( layers.data( name='word_{0}'.format(i), shape=[1], dtype='int64')) dict_size = 10000 label_word = int(window_size / 2) + 1 embs = [] for i in xrange(window_size): if i == label_word: continue emb = layers.embedding( input=words[i], size=[dict_size, 32], param_attr='emb.w', is_sparse=True) embs.append(emb) embs = layers.concat(input=embs, axis=1) loss = layers.nce(input=embs, label=words[label_word], num_total_classes=dict_size, param_attr='nce.w', bias_attr='nce.b') avg_loss = layers.mean(loss) self.assertIsNotNone(avg_loss) print(str(default_main_program()))
def get_activations(data_loader, key_real, key_fake, generator=None, sample_size=None, preprocess=None): inception = build_inception() inception.eval() batch_y = [] for it, data in enumerate(data_loader.batch_reader(2)()): if preprocess is not None: data = preprocess(data) if generator is None: images = data[key_real] else: net_G_output = generator(data) images = net_G_output[key_fake] # Clamp the image for models that do not set the output to between # -1, 1. For models that employ tanh, this has not effect. # images = L.clip(images, -1, 1) images = apply_image_net_normalization(images) images = nn.functional.interpolate(images, size=(299, 299), mode='bilinear', align_corners=True) y = inception(images) batch_y += [y] batch_y = L.concat(batch_y).numpy() if sample_size is not None: batch_y = batch_y[:sample_size] # print(batch_y.shape) return batch_y
def get_usr_combined_features(): # FIXME(dzh) : old API integer_value(10) may have range check. # currently we don't have user configurated check. USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 uid = layers.data(name='user_id', shape=[1], dtype='int64') usr_emb = layers.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], param_attr='user_table', is_sparse=IS_SPARSE) usr_fc = layers.fc(input=usr_emb, size=32) USR_GENDER_DICT_SIZE = 2 usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', is_sparse=IS_SPARSE) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_emb = layers.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, param_attr='age_table') usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_emb = layers.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', is_sparse=IS_SPARSE) usr_job_fc = layers.fc(input=usr_job_emb, size=16) concat_embed = layers.concat( input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return usr_combined_features
def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) category_id = layers.data( name='category_id', shape=[1], dtype='int64', lod_level=1) mov_categories_emb = layers.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) mov_title_id = layers.data( name='movie_title', shape=[1], dtype='int64', lod_level=1) mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", pool_type="sum") concat_embed = layers.concat( input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features
def test_word_embedding(self): program = Program() with program_guard(program, startup_program=Program()): dict_size = 10000 embed_size = 32 first_word = layers.data(name='firstw', shape=[1], dtype='int64') second_word = layers.data(name='secondw', shape=[1], dtype='int64') third_word = layers.data(name='thirdw', shape=[1], dtype='int64') forth_word = layers.data(name='forthw', shape=[1], dtype='int64') next_word = layers.data(name='nextw', shape=[1], dtype='int64') embed_first = layers.embedding( input=first_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_second = layers.embedding( input=second_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_third = layers.embedding( input=third_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_forth = layers.embedding( input=forth_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') concat_embed = layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid') predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') cost = layers.cross_entropy(input=predict_word, label=next_word) avg_cost = layers.mean(cost) self.assertIsNotNone(avg_cost) print(str(program))