def _beta(self, h, u_expr, h_expr): with layer.mixed(bias_attr=False) as dot_h_u_expr: dot_h_u_expr += layer.dotmul_operator(a=h, b=u_expr) with layer.mixed(bias_attr=False) as dot_h_h_expr: dot_h_h_expr += layer.dotmul_operator(a=h, b=h_expr) cat_all = layer.concat(input=[h, u_expr, dot_h_u_expr, dot_h_h_expr]) return cat_all
def test_operator(self): ipt0 = layer.data(name='data1', type=data_type.dense_vector(784)) ipt1 = layer.data(name='word1', type=data_type.dense_vector(128)) fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) dotmul0 = layer.mixed(input=dotmul_op) with layer.mixed() as dotmul1: dotmul1 += dotmul_op conv = layer.conv_operator( img=ipt0, filter=ipt1, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv) with layer.mixed() as conv1: conv1 += conv print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_operator(self): ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) dotmul0 = layer.mixed(input=dotmul_op) with layer.mixed() as dotmul1: dotmul1 += dotmul_op conv = layer.conv_operator(img=ipt0, filter=ipt1, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv) with layer.mixed() as conv1: conv1 += conv print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection( input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection( input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def get_loss(self, start_prob, end_prob, start_label, end_label): """ Compute the loss: $l_{\theta} = -logP(start)\cdotP(end|start)$ Returns: A LayerOutput object containing loss. """ probs = layer.seq_concat(a=start_prob, b=end_prob) labels = layer.seq_concat(a=start_label, b=end_label) log_probs = layer.mixed( size=probs.size, act=Act.Log(), bias_attr=False, input=paddle.layer.identity_projection(probs)) neg_log_probs = layer.slope_intercept( input=log_probs, slope=-1, intercept=0) loss = paddle.layer.mixed( size=1, input=paddle.layer.dotmul_operator(a=neg_log_probs, b=labels)) sum_val = paddle.layer.pooling(input=loss, pooling_type=paddle.pooling.Sum()) cost = paddle.layer.sum_cost(input=sum_val) return cost
def _u_step(self, h_cur, u): s = self._step_basic(h_cur, u) with layer.mixed(size=1, bias_attr=False, act=Act.SequenceSoftmax()) as h_weights: h_weights += layer.identity_projection(s) applied_weights = layer.scaling(input=u, weight=h_weights) u_ctx = layer.pooling(input=applied_weights, pooling_type=paddle.pooling.Sum()) return u_ctx
def inner_product_cost(input, label, weight, height, width, num_channel, interp='nearest', is_angle=False): """If is_angle, we can not back propagate through the angle, only back through the inner product, the loss is not consistent with the evaluation. """ # make sure all the input label and weight have the same size if height > 1 and width > 1: input = pd.bilinear_interp(input=input, out_size_x=width, out_size_y=height) label = pd.bilinear_interp(input=label, out_size_x=width, out_size_y=height) if weight: weight = image_resize_func[interp](input=weight, out_size_x=width, out_size_y=height) size = height * width * num_channel input = util_layers.norm(input, height, width, num_channel, trans_back=False) label = util_layers.norm(label, height, width, num_channel, trans_back=False) inner = pd.mixed(size=size, input=[pd.dotmul_operator(a=input, b=label, scale=1.0)]) inner = pd.resize(input=pd.sum_cost(input=inner), size=height * width, height=height, width=width) if is_angle: inner = util_layers.math_op(input=inner, act=pd.activation.Acos()) else: inner = pd.slope_intercept(input=inner, slope=-1, intercept=1.0) if weight: inner_error = sum_weighted_loss(inner, weight, size=height * width) else: fac = 1.0 / float(height * width) inner = pd.slope_intercept(input=inner, slope=fac, intercept=0.0) inner_error = pd.sum_cost(input=inner) return inner_error
def _step_basic(self, h_cur, u): expanded_h = layer.expand(input=h_cur, expand_as=u) hu = layer.concat(input=[expanded_h, u]) with layer.mixed(bias_attr=False) as dot_hu: dot_hu += layer.dotmul_operator(a=expanded_h, b=u) cat_all = layer.concat(input=[hu, dot_hu]) s = layer.fc(size=1, bias_attr=False, param_attr=Attr.Param(self.name + '.ws'), input=cat_all) return s
def sum_weighted_loss(loss, weight, size=1): """Loss has input batch_size x image_size, weight has input batch_size x weight ( i * w ) / sum(W) The output is normalized weighted loss """ weighted_loss = pd.mixed( size=size, input=[pd.dotmul_operator(a=loss, b=weight, scale=1.0)]) weight_fac = pd.sum_cost(input=weight) weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv()) weighted_loss = pd.scaling(input=loss, weight=weight_fac) weighted_loss = pd.sum_cost(input=weighted_loss) return weighted_loss
def ele_norm_cost(input, label, weight, height=None, width=None, num_channel=None, cost_type='l1'): if height > 1 and width > 1: input = pd.bilinear_interp(input=input, out_size_x=width, out_size_y=height) label = pd.bilinear_interp(input=label, out_size_x=width, out_size_y=height) if weight: weight = pd.nearest_interp(input=weight, out_size_x=width, out_size_y=height) size = height * width * num_channel if weight: input = pd.mixed( size=size, input=[pd.dotmul_operator(a=input, b=weight, scale=1.0)]) label = pd.mixed( size=size, input=[pd.dotmul_operator(a=label, b=weight, scale=1.0)]) cost = cost_func[cost_type](input=input, label=label) fac = pd.sum_cost(input=weight) fac = util_layers.math_op(input=fac, act=pd.activation.Inv()) cost = pd.scaling(input=cost, weight=fac) cost = pd.sum_cost(input=cost) else: cost = cost_func[cost_type](input=input, label=label) fac = 1.0 / float(height * width) cost = pd.slope_intercept(input=cost, slope=fac, intercept=0.0) cost = pd.sum_cost(input=cost) return cost
def math_op(input, act=pd.activation.Linear(), op='dot', size=0): if not isinstance(input, list): input = [input] if len(input) == 1: # unary operation result = pd.mixed( input=[pd.identity_projection(input=input[0])], act=act) elif len(input) == 2: # binary operation if op == 'dot': result = pd.mixed(size=size, input=pd.dotmul_operator( a=input[0], b=input[1], scale=1.0), act=act) else: raise ValueError('not supporting math op with more than two\ input') return result
def iou_score(input, label, weight, height, width, class_num, is_cost=True): """ class num is semantic classes plus background, this score can also serve as iou cost for training """ # input = pd.resize(input=input, size=height * width) # label = pd.resize(input=label, size=height * width) weight = pd.nearest_interp(input=weight, out_size_x=width, out_size_y=height) if not is_cost: # if not is cost, then it is eval, we can do # one hot for label. Otherwise input = util_layers.math_op(input=[input, weight], op='dot') input_one_hot = util_layers.ele_one_hot(input, class_num, height, width) else: input_one_hot = input input_one_hot = pd.bilinear_interp(input=input_one_hot, out_size_x=width, out_size_y=height) label = pd.nearest_interp(input=label, out_size_x=width, out_size_y=height) label = util_layers.math_op(input=[label, weight], op='dot') label_one_hot = util_layers.ele_one_hot(label, class_num, height, width) inter = util_layers.math_op(input=[input_one_hot, label_one_hot], op='dot') union = pd.addto(input=[input_one_hot, label_one_hot], act=pd.activation.Linear(), bias_attr=False) inter_neg = pd.slope_intercept(input=inter, slope=-1) union = pd.addto(input=[union, inter_neg], act=pd.activation.Linear(), bias_attr=False) inter = pd.resize(input=inter, size=height * width) inter = pd.sum_cost(input=inter) union = pd.resize(input=union, size=height * width) union = pd.sum_cost(input=union) union_inv = util_layers.math_op(input=union, act=pd.activation.Inv()) iou = pd.mixed(size=1, input=[pd.dotmul_operator(a=inter, b=union_inv, scale=1.0)]) iou = pd.resize(input=iou, size=class_num) if is_cost: iou = pd.sum_cost(iou) return iou
def ns_ele_l2_cost(input, label, weight, height, width, num_channel=None, interp='nearest'): assert interp in image_resize_func.keys() # make sure all the input label and weight have the same size input = pd.bilinear_interp(input=input, out_size_x=width, out_size_y=height) label = image_resize_func[interp](input=label, out_size_x=width, out_size_y=height) weight = image_resize_func[interp](input=weight, out_size_x=width, out_size_y=height) # reshape the orignal layer # input has shape c x h x w change to h x w x c input_ts = pd.transpose(input=input, trans_order=[1, 2, 0], height=height, width=width) input_rs = pd.resize(input=input_ts, size=num_channel, height=1, width=1) label_ts = pd.transpose(input=label, trans_order=[1, 2, 0], height=height, width=width) label_rs = pd.resize(input=label_ts, size=num_channel, height=1, width=1) weight_rs = pd.resize(input=weight, size=1, height=1, width=1) cost_rs = pd.mse_cost(input=input_rs, label=label_rs) sqrt_l2_cost = util_layers.math_op(input=cost_rs, act=pd.activation.Sqrt()) sqrt_l2_cost = pd.mixed( size=1, input=[pd.dotmul_operator(a=sqrt_l2_cost, b=weight_rs, scale=1.0)]) sqrt_l2_cost = pd.resize(input=sqrt_l2_cost, size=height * width, height=height, width=width) weight_fac = pd.sum_cost(input=weight) weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv()) sqrt_l2_cost = pd.scaling(input=sqrt_l2_cost, weight=weight_fac) cost = pd.sum_cost(input=sqrt_l2_cost) return cost
def _attention_flow(self, h, u): bs = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._h_step, reverse=False) b_weights = layer.mixed(act=Act.SequenceSoftmax(), bias_attr=False, input=layer.identity_projection(bs)) h_step_scaled = layer.scaling(input=h, weight=b_weights) h_step = layer.pooling(input=h_step_scaled, pooling_type=paddle.pooling.Sum()) h_expr = layer.expand(input=h_step, expand_as=h) u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._u_step, reverse=False) g = self._beta(h, u_expr, h_expr) return g
def drop_out(self, input, drop_rate=0.5): """ Implements drop out. Args: input: the LayerOutput needs to apply drop out. drop_rate: drop out rate. Returns: The layer output after applying drop out. """ with layer.mixed( layer_attr=Attr.ExtraLayerAttribute(drop_rate=drop_rate), bias_attr=False) as dropped: dropped += layer.identity_projection(input) return dropped
def fusion_layer(self, input1, input2): """ Combine input1 and input2 by concat(input1 .* input2, input1 - input2, input1, input2) """ # fusion layer neg_input2 = layer.slope_intercept(input=input2, slope=-1.0, intercept=0.0) diff1 = layer.addto(input=[input1, neg_input2], act=Act.Identity(), bias_attr=False) diff2 = layer.mixed(bias_attr=False, input=layer.dotmul_operator(a=input1, b=input2)) fused = layer.concat(input=[input1, input2, diff1, diff2]) return fused
def drop_out(self, input, drop_rate=0.5): """ Implements drop out. Args: input: the LayerOutput needs to apply drop out. drop_rate: drop out rate. Returns: The layer output after applying drop out. """ with layer.mixed( layer_attr=Attr.ExtraLayerAttribute( drop_rate=drop_rate), bias_attr=False) as dropped: dropped += layer.identity_projection(input) return dropped
def fusion_layer(self, input1, input2): """ Combine input1 and input2 by concat(input1 .* input2, input1 - input2, input1, input2) """ # fusion layer neg_input2 = layer.slope_intercept(input=input2, slope=-1.0, intercept=0.0) diff1 = layer.addto(input=[input1, neg_input2], act=Act.Identity(), bias_attr=False) diff2 = layer.mixed(bias_attr=False, input=layer.dotmul_operator(a=input1, b=input2)) fused = layer.concat(input=[input1, input2, diff1, diff2]) return fused
def relative_l1(input, label, weight, height, width, interp='nearest', is_inverse=False): """Relative l1 loss for depth """ assert interp in image_resize_func.keys() # make sure all the input label and weight have the same size if height > 1 and width > 1: input = pd.bilinear_interp(input=input, out_size_x=width, out_size_y=height) label = pd.bilinear_interp(input=label, out_size_x=width, out_size_y=height) if weight: weight = image_resize_func[interp](input=weight, out_size_x=width, out_size_y=height) label_inv = util_layers.math_op(input=label, act=pd.activation.Inv()) label_neg = pd.slope_intercept(input=label, slope=-1) diff = pd.addto(input=[input, label_neg], act=pd.activation.Abs(), bias_attr=False) rel_error = pd.mixed( size=1, input=[pd.dotmul_operator(a=diff, b=label_inv, scale=1.0)]) if weight: rel_error = sum_weighted_loss(rel_error, weight, size=height * width) else: fac = 1.0 / float(height * width) inner = pd.slope_intercept(input=inner, slope=fac, intercept=0.0) inner_error = pd.sum_cost(input=inner) return rel_error
def _step(self, name, h_q_all, q_proj, h_p_cur): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ direct = 'left' if 'left' in name else 'right' h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param( 'step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out
def _step(self, name, h_q_all, q_proj, h_p_cur): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ direct = 'left' if 'left' in name else 'right' h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group( self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group( self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group(self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group(self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def norm(input, height, width, channel, type='l2', trans_back=True): """Channel wise normalize each layer """ size = height * width * channel if height > 1 or width > 1: input= pd.transpose(input=input, trans_order=[1, 2, 0], height=height, width=width) input = pd.resize(input=input, size=channel) if type == 'l2': norm = pd.mixed(size=size, input=[pd.dotmul_operator(a=input, b=input, scale=1.0)]) norm = pd.sum_cost(input=norm) norm = math_op(norm, pd.activation.Sqrt()) if type == 'l1': norm = math_op(input, pd.activation.Abs()) norm = pd.sum_cost(input=norm) norm_inv = math_op(norm, pd.activation.Inv()) norm_inv = pd.repeat(input=norm_inv, num_repeats=3) input = math_op(input=[input, norm_inv], act=None, op='dot', size=size) if trans_back: input = pd.resize(input=input, size=size) input = pd.transpose(input=input, trans_order=[2, 0, 1], height=width, width=channel, channels=height) return input
def _step(self, name, h_q_all, q_proj, h_p_cur, qe_comm, ee_comm): """ Match-LSTM step. This function performs operations done in one time step. Args: h_p_cur: Current hidden of paragraph encodings: h_i. This is the `REAL` input of the group, like x_t in normal rnn. h_q_all: Question encodings. Returns: The $h^{r}_{i}$ in the paper. """ conf = mLSTM_crf_config.TrainingConfig() direct = 'left' if 'left' in name else 'right' # 获取上一个时间步的输出 h_r_prev = paddle.layer.memory(name=name + '_out_', size=h_q_all.size, boot_layer=None) # h_p_cur :: Current hidden of paragraph encodings # h_q_all :: q wordEmbedding # q_proj :: q_proj_(left or right) q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj) z_cur = self.fusion_layer(h_p_cur, q_expr) # feature embeddings comm_initial_std = 1 / math.sqrt(64.0) qe_comm_emb = paddle.layer.embedding(input=qe_comm, size=conf.com_vec_dim, param_attr=paddle.attr.ParamAttr( name="_cw_embedding.w0", initial_std=comm_initial_std, l2_rate=conf.default_l2_rate)) ee_comm_emb = paddle.layer.embedding(input=ee_comm, size=conf.com_vec_dim, param_attr=paddle.attr.ParamAttr( name="_eecom_embedding.w0", initial_std=comm_initial_std, l2_rate=conf.default_l2_rate)) # layer.mixed :: 综合输入映射到指定维度,为 lstm 的输入做准备! with layer.mixed(size=h_q_all.size * 4, act=Act.Tanh(), bias_attr=False) as match_input: match_input += layer.full_matrix_projection( input=z_cur, param_attr=Attr.Param('match_input_z_%s.w0' % direct)) match_input += layer.full_matrix_projection( input=qe_comm_emb, param_attr=Attr.Param('match_input_qe_%s.w0' % direct)) match_input += layer.full_matrix_projection( input=ee_comm_emb, param_attr=Attr.Param('match_input_ee_%s.w0' % direct)) step_out = paddle.networks.lstmemory_unit( name=name + '_out_', out_memory=h_r_prev, param_attr=Attr.Param('step_lstm_%s.w' % direct), input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct, initial_std=0.), lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct, initial_std=0.), input=match_input, size=h_q_all.size) return step_out
def test_projection(self): input = layer.data(name='data2', type=data_type.dense_vector(784)) word = layer.data( name='word2', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed( size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=500, input=context) with layer.mixed(size=500) as context1: context1 += context conv = layer.conv_projection( input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)
def test_projection(self): input = layer.data(name='data', type=data_type.dense_vector(784)) word = layer.data(name='word', type=data_type.integer_value_sequence(10000)) fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) mixed0 = layer.mixed(size=256, input=[ layer.full_matrix_projection(input=fc0), layer.full_matrix_projection(input=fc1) ]) with layer.mixed(size=200) as mixed1: mixed1 += layer.full_matrix_projection(input=fc0) mixed1 += layer.identity_projection(input=fc1) table = layer.table_projection(input=word) emb0 = layer.mixed(size=512, input=table) with layer.mixed(size=512) as emb1: emb1 += table scale = layer.scaling_projection(input=fc0) scale0 = layer.mixed(size=100, input=scale) with layer.mixed(size=100) as scale1: scale1 += scale dotmul = layer.dotmul_projection(input=fc0) dotmul0 = layer.mixed(size=100, input=dotmul) with layer.mixed(size=100) as dotmul1: dotmul1 += dotmul context = layer.context_projection(input=fc0, context_len=5) context0 = layer.mixed(size=100, input=context) with layer.mixed(size=100) as context1: context1 += context conv = layer.conv_projection(input=input, filter_size=1, num_channels=1, num_filters=128, stride=1, padding=0) conv0 = layer.mixed(input=conv, bias_attr=True) with layer.mixed(bias_attr=True) as conv1: conv1 += conv print layer.parse_network(mixed0) print layer.parse_network(mixed1) print layer.parse_network(emb0) print layer.parse_network(emb1) print layer.parse_network(scale0) print layer.parse_network(scale1) print layer.parse_network(dotmul0) print layer.parse_network(dotmul1) print layer.parse_network(conv0) print layer.parse_network(conv1)