def decode(conv_output, anchors, stride, num_class, conf_thresh): conv_shape = P.shape(conv_output) batch_size = conv_shape[0] n_grid = conv_shape[1] anchor_per_scale = len(anchors) conv_output = P.reshape( conv_output, (batch_size, n_grid, n_grid, anchor_per_scale, 5 + num_class)) conv_raw_dxdy = conv_output[:, :, :, :, 0:2] conv_raw_dwdh = conv_output[:, :, :, :, 2:4] conv_raw_conf = conv_output[:, :, :, :, 4:5] conv_raw_prob = conv_output[:, :, :, :, 5:] rows = P.range(0, n_grid, 1, 'float32') cols = P.range(0, n_grid, 1, 'float32') rows = P.expand(P.reshape(rows, (1, -1, 1)), [n_grid, 1, 1]) cols = P.expand(P.reshape(cols, (-1, 1, 1)), [1, n_grid, 1]) offset = P.concat([rows, cols], axis=-1) offset = P.reshape(offset, (1, n_grid, n_grid, 1, 2)) offset = P.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1]) pred_xy = (P.sigmoid(conv_raw_dxdy) + offset) * stride pred_wh = (P.exp(conv_raw_dwdh) * P.assign(anchors)) pred_xywh = P.concat([pred_xy, pred_wh], axis=-1) pred_conf = P.sigmoid(conv_raw_conf) pred_prob = P.sigmoid(conv_raw_prob) pred_xywh = P.reshape(pred_xywh, (batch_size, -1, 4)) # [-1, -1, 4] pred_conf = P.reshape(pred_conf, (batch_size, -1, 1)) # [-1, -1, 1] pred_prob = P.reshape(pred_prob, (batch_size, -1, num_class)) # [-1, -1, 80] return pred_xywh, pred_conf, pred_prob
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size, para_name, args): """Util function for pointer network""" def linear(inputs, para_name, args): return layers.fc(input=inputs, size=size, param_attr=fluid.ParamAttr(name=para_name + '_w'), bias_attr=fluid.ParamAttr(name=para_name + '_b')) input_cat = layers.concat([hidden_t_prev, x_t], axis=1) forget_gate = layers.sigmoid(x=linear(input_cat, para_name + '_lstm_f', args)) input_gate = layers.sigmoid(x=linear(input_cat, para_name + '_lstm_i', args)) output_gate = layers.sigmoid(x=linear(input_cat, para_name + '_lstm_o', args)) cell_tilde = layers.tanh(x=linear(input_cat, para_name + '_lstm_c', args)) cell_t = layers.sums(input=[ layers.elementwise_mul( x=forget_gate, y=cell_t_prev), layers.elementwise_mul( x=input_gate, y=cell_tilde) ]) hidden_t = layers.elementwise_mul(x=output_gate, y=layers.tanh(x=cell_t)) return hidden_t, cell_t
def forward(self, input_tensor, cur_state): h_cur = cur_state x_in = concat([input_tensor, h_cur], axis=1) update = sigmoid(self.update_gate(x_in)) reset = sigmoid(self.reset_gate(x_in)) x_out = tanh( self.out_gate(concat([input_tensor, h_cur * reset], axis=1))) h_new = h_cur * (1 - update) + x_out * update return h_new
def build_graph(self, mode='train'): self._build_data() pred = self._build_net() if mode == 'train': loss, no_grad_set = self._compute_loss(pred) pred = layers.sigmoid(pred) acc = self._compute_acc(pred) return loss, acc, pred, no_grad_set else: pred = layers.sigmoid(pred) return pred
def forward(self, q, k, v, lengths, speaker_embed, start_index, force_monotonic=False, prev_coeffs=None, window=None): # add position encoding as an inductive bias if self.has_bias: # multi-speaker model omega_q = 2 * F.sigmoid( F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1])) omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze( self.k_pos_affine(speaker_embed), axes=[-1])) else: # single-speaker case batch_size = q.shape[0] omega_q = F.ones((batch_size, ), dtype="float32") omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q) k += self.position_encoding_weight * positional_encoding(k, 0, omega_k) q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v) activations = F.matmul(q, k, transpose_y=True) activations /= np.sqrt(self.attention_dim) if self.training: # mask the <pad> parts from the encoder mask = F.sequence_mask(lengths, dtype="float32") attn_bias = F.scale(1. - mask, -1000) activations += F.unsqueeze(attn_bias, [1]) elif force_monotonic: assert window is not None backward_step, forward_step = window T_enc = k.shape[1] batch_size, T_dec, _ = q.shape # actually T_dec = 1 here alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \ if prev_coeffs is None \ else F.argmax(prev_coeffs, axis=-1) backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool") forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool") mask = F.cast(F.logical_xor(backward, forward), "float32") # print("mask's shape:", mask.shape) attn_bias = F.scale(1. - mask, -1000) activations += attn_bias # softmax coefficients = F.softmax(activations, axis=-1) # context vector coefficients = F.dropout(coefficients, 1. - self.keep_prob, dropout_implementation='upscale_in_train') contexts = F.matmul(coefficients, v) # context normalization enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32") contexts *= F.sqrt(enc_lengths) # out affine contexts = self.out_affine(contexts) return contexts, coefficients
def forward(self, input, pre_hidden, pre_cell): concat_input_hidden = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=concat_input_hidden, y=self._weight) gate_input = layers.elementwise_add(gate_input, self._bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) new_cell = layers.elementwise_add( layers.elementwise_mul( pre_cell, layers.sigmoid(layers.elementwise_add(f, self._forget_bias))), layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j))) new_hidden = layers.tanh(new_cell) * layers.sigmoid(o) return new_hidden, new_cell
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 5) tmp_0 = layers.assign(self.feed_vars[0]) # subgraph with 9 op nodes tmp_1 = tmp_0 * layers.sigmoid(self.feed_vars[1]) + layers.sigmoid( self.feed_vars[2]) * layers.tanh(self.feed_vars[3]) tmp_2 = layers.tanh(tmp_1) + layers.sigmoid(self.feed_vars[4]) self.append_gradients(tmp_2) self.num_fused_ops = 2 self.fetch_list = [tmp_2, self.grad(tmp_0)]
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 64], dtype, 5) one = layers.fill_constant(shape=[1], dtype=dtype, value=1.0) tmp_0 = one * self.feed_vars[0] # subgraph with 9 op nodes tmp_1 = tmp_0 * layers.sigmoid(self.feed_vars[1]) + layers.sigmoid( self.feed_vars[2]) * layers.tanh(self.feed_vars[3]) tmp_2 = layers.tanh(tmp_1) + layers.sigmoid(self.feed_vars[4]) self.append_gradients(tmp_2) self.num_fused_ops = 2 self.fetch_list = [tmp_2, self.grad(tmp_0)]
def _decode(self, x, y, w, h, anchors, stride, scale_x_y, eps, is_gt=False): conv_shape = x.shape # (8, 13, 13, 3) batch_size = conv_shape[0] n_grid = conv_shape[1] anchor_per_scale = conv_shape[3] _x = L.unsqueeze(x, 4) _y = L.unsqueeze(y, 4) conv_raw_dxdy = L.concat([_x, _y], -1) # (8, 13, 13, 3, 2) _w = L.unsqueeze(w, 4) _h = L.unsqueeze(h, 4) conv_raw_dwdh = L.concat([_w, _h], -1) # (8, 13, 13, 3, 2) rows = L.range(0, n_grid, 1, 'float32') cols = L.range(0, n_grid, 1, 'float32') rows = L.expand(L.reshape(rows, (1, -1, 1)), [n_grid, 1, 1]) cols = L.expand(L.reshape(cols, (-1, 1, 1)), [1, n_grid, 1]) offset = L.concat([rows, cols], axis=-1) offset = L.reshape(offset, (1, n_grid, n_grid, 1, 2)) offset = L.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1]) if is_gt: decode_xy = (conv_raw_dxdy + offset) / n_grid else: if (abs(scale_x_y - 1.0) < eps): decode_xy = L.sigmoid(conv_raw_dxdy) decode_xy = (decode_xy + offset) / n_grid else: # Grid Sensitive decode_xy = scale_x_y * L.sigmoid(conv_raw_dxdy) - 0.5 * ( scale_x_y - 1.0) decode_xy = (decode_xy + offset) / n_grid anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32)) decode_wh = (L.exp(conv_raw_dwdh) * anchor_t) / (n_grid * stride) decode_xywh = L.concat([decode_xy, decode_wh], axis=-1) if is_gt: decode_xywh.stop_gradient = True return decode_xywh # (8, 13, 13, 3, 4)
def create_rnn_op(self): x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False) x.stop_gradient = False h_boot = layers.data(shape=[self.input_dim], dtype='float32', name='h_boot') h_boot.stop_gradient = False rnn = layers.StaticRNN() with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) temp_l = layers.fc(input=x_t, size=self.input_dim, param_attr='W', bias_attr=False) temp_r = layers.fc(input=h_pre, size=self.input_dim, param_attr='U', bias_attr=False) h = layers.sigmoid(x=layers.elementwise_add(x=temp_l, y=temp_r)) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()
def add_input(self, x, condition=None): """Add a step input. This method works similarily with `forward` but in a `step-in-step-out` fashion. Args: x (Variable): shape(B, C_res, T=1), input for a step, dtype float32. condition (Variable, optional): shape(B, C_cond, T=1). condition for a step, dtype float32. Defaults to None. Returns: (residual, skip_connection) residual (Variable): shape(B, C_res, T=1), the residual for a step, which is used as the input to the next layer of ResidualBlock. skip_connection (Variable): shape(B, C_res, T=1), the skip connection for a step. This output is accumulated with that of other ResidualBlocks. """ h = x # dilated conv h = self.conv.add_input(h) # condition if condition is not None: h += self.condition_proj(condition) # gated tanh content, gate = F.split(h, 2, dim=1) z = F.sigmoid(gate) * F.tanh(content) # projection residual = F.scale(z + x, np.sqrt(0.5)) skip_connection = z return residual, skip_connection
def forward(self, x, speaker_embed=None): """ Args: x (Variable): shape(B, C_in, T), dtype float32, the input of Conv1DGLU layer, where B means batch_size, C_in means the input channels T means input time steps. speaker_embed (Variable): shape(B, C_sp), dtype float32, speaker embed, where C_sp means speaker embedding size. Returns: x (Variable): shape(B, C_out, T), the output of Conv1DGLU, where C_out means the `num_filters`. """ residual = x x = F.dropout(x, self.dropout, dropout_implementation="upscale_in_train") x = self.conv(x) content, gate = F.split(x, num_or_sections=2, dim=1) if speaker_embed is not None: sp = F.softsign(self.fc(speaker_embed)) content = F.elementwise_add(content, sp, axis=0) # glu x = F.sigmoid(gate) * content if self.residual: x = F.scale(x + residual, np.sqrt(0.5)) return x
def add_input(self, x_t, speaker_embed=None): """ Takes a step of inputs and return a step of outputs. It works similarily with the `forward` method, but in a `step-in-step-out` fashion. Args: x_t (Variable): shape(B, C_in, T=1), dtype float32, the input of Conv1DGLU layer, where B means batch_size, C_in means the input channels. speaker_embed (Variable): Shape(B, C_sp), dtype float32, speaker embed, where C_sp means speaker embedding size. Returns: x (Variable): shape(B, C_out), the output of Conv1DGLU, where C_out means the `num_filter`. """ residual = x_t x_t = F.dropout(x_t, self.dropout, dropout_implementation="upscale_in_train") x_t = self.conv.add_input(x_t) content_t, gate_t = F.split(x_t, num_or_sections=2, dim=1) if speaker_embed is not None: sp = F.softsign(self.fc(speaker_embed)) content_t = F.elementwise_add(content_t, sp, axis=0) # glu x_t = F.sigmoid(gate_t) * content_t if self.residual: x_t = F.scale(x_t + residual, np.sqrt(0.5)) return x_t
def forward(self, input, bias=None, padding=None): """ input: input feature (B, T, C) padding: only used when using causal conv, we pad mannually """ input_dropped = F.dropout(input, 1. - self.keep_prob, dropout_implementation="upscale_in_train") if self.causal: assert padding is not None input_dropped = F.concat([padding, input_dropped], axis=1) hidden = self.conv(input_dropped) if self.has_bias: assert bias is not None transformed_bias = F.softsign(self.bias_affine(bias)) hidden_embedded = hidden + F.unsqueeze(transformed_bias, [1]) else: hidden_embedded = hidden # glu content, gate = F.split(hidden, num_or_sections=2, dim=-1) content = hidden_embedded[:, :, :self.in_channel] hidden = F.sigmoid(gate) * content # # residual hidden = F.scale(input + hidden, math.sqrt(0.5)) return hidden
def link_predict_model(num_nodes, hidden_size=16, name='link_predict_task', binary_op_type="Weighted-L2"): pyreader = l.py_reader(capacity=70, shapes=[[-1, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64'], lod_levels=[0, 0, 0], name=name + '_pyreader', use_double_buffer=True) u, v, label = l.read_file(pyreader) u_embed = l.embedding(input=u, size=[num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) v_embed = l.embedding(input=v, size=[num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) u_embed.stop_gradient = True v_embed.stop_gradient = True edge_embed = binary_op(u_embed, v_embed, binary_op_type) logit = l.fc(input=edge_embed, size=1) loss = l.sigmoid_cross_entropy_with_logits(logit, l.cast(label, 'float32')) loss = l.reduce_mean(loss) prob = l.sigmoid(logit) return pyreader, loss, prob, label
def test_sigmoid(self): program = Program() with program_guard(program): input = layers.data(name="input", shape=[16], dtype="float32") out = layers.sigmoid(input, name='sigmoid') self.assertIsNotNone(out) print(str(program))
def forward(self, x, condition=None): """Conv1D gated-tanh Block. Args: x (Variable): shape(B, C_res, T), the input. (B stands for batch_size, C_res stands for residual channels, T stands for time steps.) dtype float32. condition (Variable, optional): shape(B, C_cond, T), the condition, it has been upsampled in time steps, so it has the same time steps as the input does.(C_cond stands for the condition's channels). Defaults to None. Returns: (residual, skip_connection) residual (Variable): shape(B, C_res, T), the residual, which is used as the input to the next layer of ResidualBlock. skip_connection (Variable): shape(B, C_res, T), the skip connection. This output is accumulated with that of other ResidualBlocks. """ time_steps = x.shape[-1] h = x # dilated conv h = self.conv(h) if h.shape[-1] != time_steps: h = h[:, :, :time_steps] # condition if condition is not None: h += self.condition_proj(condition) # gated tanh content, gate = F.split(h, 2, dim=1) z = F.sigmoid(gate) * F.tanh(content) # projection residual = F.scale(z + x, math.sqrt(.5)) skip_connection = z return residual, skip_connection
def create_model(args, config, graph_label): """Create model for given model configuration.""" logging.info('building model') graph_wrapper = GraphWrapper(name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) encoder = GINEncoder(config) global_repr, patch_summary = encoder.forward(graph_wrapper) hid = L.fc(global_repr, config['hidden_size'], act='relu', name='finetune_fc1') hid = L.fc(hid, config['hidden_size'], act='relu', name='finetune_fc2') logits = L.fc(global_repr, args.num_tasks, name="finetune_fc3") loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=graph_label) loss = L.reduce_mean(loss) pred = L.sigmoid(logits) keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb', 'pred'] Agent = namedtuple('Agent', keys) return Agent(loss=loss, graph_wrapper=graph_wrapper, encoder=encoder, graph_emb=global_repr, pred=pred)
def decoder_step(gru_unit, cue_gru_unit, step_in, hidden, input_size, hidden_size, memory, memory_mask, knowledge, mask=None): """ decoder step """ # get attention out # get hidden top layers top_hidden = layers.slice(hidden, axes=[0], starts=[0], ends=[1]) top_hidden = layers.squeeze(top_hidden, axes=[0]) top_hidden = layers.unsqueeze(top_hidden, axes=[1]) weight_memory, attn = dot_attention(top_hidden, memory, memory_mask) step_in = layers.unsqueeze(step_in, axes=[1]) rnn_input_list = [step_in, weight_memory] if weight_memory.shape[0] == -1: knowledge_1 = layers.reshape(knowledge, shape=weight_memory.shape) else: knowledge_1 = knowledge cue_input_list = [knowledge_1, weight_memory] output_list = [weight_memory] rnn_input = layers.concat(rnn_input_list, axis=2) rnn_input = layers.squeeze(rnn_input, axes=[1]) rnn_output, rnn_last_hidden = gru_unit(rnn_input, hidden, mask) cue_input = layers.concat(cue_input_list, axis=2) cue_input = layers.squeeze(cue_input, axes=[1]) cue_rnn_out, cue_rnn_last_hidden = cue_gru_unit(cue_input, hidden, mask) h_y = layers.tanh( fc(rnn_last_hidden, hidden_size, hidden_size, name="dec_fc1")) h_cue = layers.tanh( fc(cue_rnn_last_hidden, hidden_size, hidden_size, name="dec_fc2")) concate_y_cue = layers.concat([h_y, h_cue], axis=2) k = layers.sigmoid(fc(concate_y_cue, hidden_size * 2, 1, name='dec_fc3')) new_hidden = h_y * k - h_cue * (k - 1.0) new_hidden_tmp = layers.transpose(new_hidden, perm=[1, 0, 2]) output_list.append(new_hidden_tmp) real_out = layers.concat(output_list, axis=2) if mask: mask_tmp = layers.unsqueeze(mask, axes=[0]) new_hidden = layers.elementwise_mul((new_hidden - hidden), mask_tmp, axis=0) new_hidden += hidden return real_out, new_hidden
def forward(self, features, im_info): # features: p6 -> p2 pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) rpn_rois = [] rpn_roi_probs = [] # p2 -> p6 for lvl in range(self.k_min, self.k_max + 1): # 2 -> 6 lvl_anchors = generate_anchors( stride=2.**lvl, sizes=[self.anchor_size[0] * 2.**(lvl - self.k_min)], aspect_ratios=self.anchor_aspect_ratios) lvl_cls_logits = pred_objectness_logits[self.k_max - lvl] lvl_cls_logits = L.sigmoid(lvl_cls_logits).numpy() lvl_bbox_deltas = pred_anchor_deltas[self.k_max - lvl].numpy() lvl_rois, lvl_roi_probs, lvl_anchors = self.generate_proposal_op( lvl_cls_logits, lvl_bbox_deltas, lvl_anchors, 1 / 2**lvl, im_info) rpn_rois.append(lvl_rois) rpn_roi_probs.append(lvl_roi_probs) rois = self.collect_and_distribute_op(rpn_rois, rpn_roi_probs) # list of ndarray(size: (n, 5)), p2 -> p5 return rois
def _compute_pc(self, x, mask): if mask is not None: x -= (1 - mask) * 1e10 x = layers.reduce_max(x, dim=1, keep_dim=True) x = layers.relu(self.pc_fc1(x)) x = layers.sigmoid(self.pc_fc2(x)) return x
def appnp(gw, feature, alpha=0.2, k_hop=10, name=""): """Implementation of APPNP of "Predict then Propagate: Graph Neural Networks meet Personalized PageRank" (ICLR 2019). Args: gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`) feature: A tensor with shape (num_nodes, feature_size). edge_dropout: Edge dropout rate. k_hop: K Steps for Propagation Return: A tensor with shape (num_nodes, hidden_size) """ def send_src_copy(src_feat, dst_feat, edge_feat): feature = src_feat["h"] return feature def get_norm(indegree): float_degree = L.cast(indegree, dtype="float32") float_degree = L.clamp(float_degree, min=1.0) norm = L.pow(float_degree, factor=-0.5) return norm cks = [] h0 = feature ngw = gw norm = get_norm(ngw.indegree()) for i in range(k_hop): feature = feature * norm msg = gw.send(send_src_copy, nfeat_list=[("h", feature)]) feature = gw.recv(msg, "sum") feature = feature * norm #feature = feature * (1 - alpha) + h0 * alpha fan_in = feature.shape[-1] * 3 bias_bound = 1.0 / math.sqrt(fan_in) fc_bias_attr = F.ParamAttr( initializer=F.initializer.UniformInitializer(low=-bias_bound, high=bias_bound)) negative_slope = math.sqrt(5) gain = math.sqrt(2.0 / (1 + negative_slope**2)) std = gain / math.sqrt(fan_in) weight_bound = math.sqrt(3.0) * std fc_w_attr = F.ParamAttr(initializer=F.initializer.UniformInitializer( low=-weight_bound, high=weight_bound)) gate_f = L.fc([feature, h0, feature - h0], 1, param_attr=fc_w_attr, name=name + 'appnp_gate_' + str(i), bias_attr=fc_bias_attr) alpha = L.sigmoid(gate_f) feature = feature * (1 - alpha) + h0 * alpha if (i + 1) % 3 == 0: cks.append(feature) return feature, cks
def gru_step(self, input, hidden, mask=None): """ gru step """ hidden_array = [] for i in range(self.num_layers): hidden_temp = layers.slice(hidden, axes=[0], starts=[i], ends=[i + 1]) hidden_temp = layers.reshape(hidden_temp, shape=[-1, self.hidden_size]) hidden_array.append(hidden_temp) last_hidden_array = [] for k in range(self.num_layers): trans_input = layers.matmul(input, self.weight_input_array[k]) trans_input += self.bias_input_array[k] trans_hidden = layers.matmul(hidden_array[k], self.weight_hidden_array[k]) trans_hidden += self.bias_hidden_array[k] input_array = layers.split(trans_input, num_or_sections=3, dim=-1) trans_array = layers.split(trans_hidden, num_or_sections=3, dim=-1) reset_gate = layers.sigmoid(input_array[0] + trans_array[0]) input_gate = layers.sigmoid(input_array[1] + trans_array[1]) new_gate = layers.tanh(input_array[2] + reset_gate * trans_array[2]) new_hidden = new_gate + input_gate * (hidden_array[k] - new_gate) if mask: neg_mask = layers.fill_constant_batch_size_like( input=mask, shape=[1], value=1.0, dtype='float32') - mask new_hidden = new_hidden * mask + hidden_array[k] * neg_mask last_hidden_array.append(new_hidden) input = new_hidden if self.dropout and self.dropout > 0.0: input = layers.dropout(input, dropout_prob=self.dropout) last_hidden = layers.concat(last_hidden_array, 0) last_hidden = layers.reshape( last_hidden, shape=[self.num_layers, -1, self.hidden_size]) return input, last_hidden
def inference(self): """ Used for inference with labels. """ graph_wrapper, logits = self.forward(is_test=True) pred = layers.sigmoid(logits) self.graph_wrapper = graph_wrapper self.pred = pred
def forward(self, input, state): #logging.info("input shape: {}".format(input.shape)) pre_hidden, pre_cell = state #logging.info("pre hidden shape: {}".format(pre_hidden.shape)) #logging.info("pre cell shape: {}".format(pre_cell.shape)) # i,f,c,o 四个值均有Wx+Wh+b 即W(x+h)+b # 因此: # 实际相乘为[x, b]·W+b # x,b 横向相连, shape为[batch_size, input_size+hidden_size] # W的shape为[input_size+hidden_size, 4*hidden_size] # b的shape为[4*hidden_size,] # 横向连接 # shape: [batch_size, input_size+hidden_size] concat_input_hidden = L.concat([input, pre_hidden], axis=1) #logging.info("x concat h shape: {}".format(concat_input_hidden.shape)) # 计算Wx+Wh+b # shape: [batch_size, 4*hidden_size] gate_input = L.matmul(x=concat_input_hidden, y=self._weight) #logging.info("[x, b]·W shape: {}".format(gate_input.shape)) # shape: [batch_size, 4*hidden_size] gate_input = L.elementwise_add(gate_input, self._bias) #logging.info("[x, b]·W+b shape: {}".format(gate_input.shape)) # i,f,c,o四值按最后一维分开 因此每个的最后一维都是hidden_size i, f, c, o = L.split(gate_input, num_or_sections=4, dim=-1) # new_c = pre_c·sigmoid(f+forget_bias) + sigmoid(i)·tanh(c) # shape: [batch_size, hidden_size] new_cell = L.elementwise_add( L.elementwise_mul( pre_cell, L.sigmoid(L.elementwise_add(f, self._forget_bias))), L.elementwise_mul(L.sigmoid(i), L.tanh(c)) ) #logging.info("new_cell shape: {}".format(new_cell.shape)) # new_h = tanh(new_c)*sigmoid(o) # shape: [batch_size, hidden_size] new_hidden = L.tanh(new_cell) * L.sigmoid(o) #logging.info("new_hidden shape: {}".format(new_hidden.shape)) return new_hidden, [new_hidden, new_cell]
def act(a, act='tanh'): if act == 'tanh': return layers.tanh(a) elif act == 'sigmoid': return layers.sigmoid(a) elif act == 'relu': return layers.relu(a) else: return a
def forward(self, t, k, pn1, pn2, mask=None): pc = self._compute_pc(t, mask) k = get_k_inter(t, k) k = layers.expand(k, [1, t.shape[1], 1]) h = self.multi_head_attn(t, mask=mask) h = layers.concat([t, h, k], axis=-1) h = self.conv1d(h) po = layers.sigmoid(self.po_fc(h)) po1 = layers.sigmoid(self.po1_fc(h)) po2 = layers.sigmoid(self.po2_fc(h)) po1 = po * po1 * pc * pn1 po2 = po * po2 * pc * pn2 return po1, po2
def _split_ioup(self, output, an_num, num_classes): """ Split output feature map to output, predicted iou along channel dimension """ ioup = output[:, :an_num, :, :] ioup = L.sigmoid(ioup) oriout = output[:, an_num:, :, :] return (ioup, oriout)
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): def linear(inputs): return layers.fc(input=inputs, size=size, bias_attr=True) forget_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) input_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) output_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) cell_tilde = layers.tanh(x=linear([hidden_t_prev, x_t])) cell_t = layers.sums(input=[ layers.elementwise_mul( x=forget_gate, y=cell_t_prev), layers.elementwise_mul( x=input_gate, y=cell_tilde) ]) hidden_t = layers.elementwise_mul( x=output_gate, y=layers.tanh(x=cell_t)) return hidden_t, cell_t
def build_model(self): node_features = self.graph_wrapper.node_feat["feat"] output = self.gcn(gw=self.graph_wrapper, feature=node_features, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_1") output1 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_2") output2 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_3") output = L.concat(input=[output1, output2, output], axis=-1) output, ratio_length = sag_pool(gw=self.graph_wrapper, feature=output, ratio=self.pooling_ratio, graph_id=self.graph_id, dataset=self.args.dataset_name, name="sag_pool_1") output = L.lod_reset(output, self.graph_wrapper.graph_lod) cat1 = L.sequence_pool(output, "sum") ratio_length = L.cast(ratio_length, dtype="float32") cat1 = L.elementwise_div(cat1, ratio_length, axis=-1) cat2 = L.sequence_pool(output, "max") output = L.concat(input=[cat2, cat1], axis=-1) output = L.fc(output, size=self.hidden_size, act="relu") output = L.dropout(output, dropout_prob=self.dropout_ratio) output = L.fc(output, size=self.hidden_size // 2, act="relu") output = L.fc(output, size=self.num_classes, act=None, param_attr=fluid.ParamAttr(name="final_fc")) self.labels = L.cast(self.labels, dtype="float32") loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels) self.loss = L.mean(loss) pred = L.sigmoid(output) self.pred = L.argmax(x=pred, axis=-1) correct = L.equal(self.pred, self.labels_1dim) correct = L.cast(correct, dtype="int32") self.correct = L.reduce_sum(correct)
def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) temp_l = layers.fc(input=x_t, size=self.input_dim, param_attr='W', bias_attr=False, **self.p_info) temp_r = layers.fc(input=h_pre, size=self.input_dim, param_attr='U', bias_attr=False, **self.p_info) h = layers.sigmoid( x=layers.elementwise_add( x=temp_l, y=temp_r, **self.p_info), **self.p_info) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()