def test_sum(self): program = Program() with program_guard(program): input = layers.data(name="input", shape=[13, 11], dtype='float32') out = layers.sum(input) self.assertIsNotNone(out) print(str(program))
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 3) self.feed_vars.append( fluid.data(name="data3", shape=[128, 128], dtype=dtype)) # subgraph with 2 op nodes tmp_0 = layers.sum( [self.feed_vars[0], self.feed_vars[1], self.feed_vars[2]]) tmp_1 = layers.sqrt(tmp_0) tmp_2 = layers.mul(tmp_0, self.feed_vars[3]) # subgraph with 2 op nodes tmp_3 = layers.square(layers.sum([tmp_1, tmp_2])) self.append_gradients(tmp_3) self.num_fused_ops = 4 self.fetch_list = [tmp_3, self.grad(tmp_0)]
def _add_average_apply_op(self, block, param): param = block._clone_variable(param) grad = block._clone_variable(self._get_accumulator('restore', param)) sum_1 = block._clone_variable(self._get_accumulator('sum_1', param)) sum_2 = block._clone_variable(self._get_accumulator('sum_2', param)) sum_3 = block._clone_variable(self._get_accumulator('sum_3', param)) num_accumulates = block._clone_variable( self._get_accumulator('num_accumulates', param)) old_num_accumulates = block._clone_variable( self._get_accumulator('old_num_accumulates', param)) # backup param value to grad layers.assign(input=param, output=grad) # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates) tmp = layers.sum(x=[num_accumulates, old_num_accumulates]) sum = layers.sum(x=[sum_1, sum_2, sum_3]) tmp = layers.cast( x=tmp, dtype='float32' if self._dtype is None else self._dtype) sum = layers.cast( x=sum, dtype='float32' if self._dtype is None else self._dtype) layers.ops._elementwise_div(x=sum, y=tmp, out=param)
def R2Penalty(fake_img, f): # gradient penalty fakes = fake_img fakes.stop_gradient = False fake_logit = f(fake) apply_loss_scaling = lambda x: x * layers.exp(x * np.log(2.0)) undo_loss_scaling = lambda x: x * layers.exp(-x * np.log(2.0)) fake_logit = apply_loss_scaling(layers.sum(fake_logit)) #grads = dygraph.grad(fake_logit, fakes,create_graph=True) grads = dygraph.grad(fake_logit, fakes, create_graph=False) fake_grads = layers.reshape(grads[0], (fakes.shape[0], -1)) fake_grads = undo_loss_scaling(fake_grads) r2_penalty = layers.reduce_sum( layers.elementwise_mul(fake_grads, fake_grads)) return r2_penalty
def R1Penalty(real_img, f): # gradient penalty reals = real_img reals.stop_gradient = False #reals = real_img real_logit = f(reals) apply_loss_scaling = lambda x: x * layers.exp(x * np.log(2.0, dtype='float32')) undo_loss_scaling = lambda x: x * layers.exp(-x * np.log(2.0, dtype='float32')) real_logit = apply_loss_scaling(layers.sum(real_logit)) #grads = dygraph.grad(real_logit, reals, create_graph=True) grads = dygraph.grad(real_logit, reals, create_graph=False) real_grads = layers.reshape(grads[0], (reals.shape[0], -1)) real_grads = undo_loss_scaling(real_grads) r1_penalty = layers.reduce_sum( layers.elementwise_mul(real_grads, real_grads)) return r1_penalty
with open(data_dir + '/sample.data', 'w') as fout: for slot in slots: print('%s\t%s' % (slot, ' '.join( ['%.2f' % random.random() for i in range(emb_size)])), file=fout) train_program = fluid.Program() start_program = fluid.Program() with fluid.program_guard(train_program, start_program): bows = [] for slot in slots: bow = fluid.layers.data(name=slot, shape=[emb_size], dtype='float32') bows.append(bow) bow_sum = layers.sum(bows) data_norm = layers.data_norm(input=bow_sum) fc1 = layers.fc(input=data_norm, size=8, act='relu') #print(fc1.name) # fc_0.tmp_2 fc2 = layers.fc(input=fc1, size=1) #print(fc2.name) # fc_1.tmp_1 sigmoid = layers.sigmoid(fc2) print('\nall variables:') for var in train_program.current_block().vars: print(var) print('\nall parameters:') for param in train_program.current_block().all_parameters(): print(param.name)
def transformer_gat_pgl(gw, feature, hidden_size, name, num_heads=4, attn_drop=0, edge_feature=None, concat=True, is_test=False): '''transformer_gat_pgl ''' def send_attention(src_feat, dst_feat, edge_feat): if edge_feat is None or not edge_feat: output = src_feat["k_h"] * dst_feat["q_h"] output = fluid.layers.reduce_sum(output, -1) return { "alpha": output, "v": src_feat["v_h"] } # batch x h batch x h x feat else: edge_feat = edge_feat["edge"] edge_feat = fluid.layers.reshape(edge_feat, [-1, num_heads, hidden_size]) output = (src_feat["k_h"] + edge_feat) * dst_feat["q_h"] output = fluid.layers.reduce_sum(output, -1) return { "alpha": output, "v": (src_feat["v_h"] + edge_feat) } # batch x h batch x h x feat def reduce_attention(msg): alpha = msg["alpha"] # lod-tensor (batch_size, seq_len, num_heads) h = msg["v"] alpha = paddle_helper.sequence_softmax(alpha) old_h = h if attn_drop > 1e-15: alpha = fluid.layers.dropout( alpha, dropout_prob=attn_drop, is_test=is_test, dropout_implementation="upscale_in_train") h = h * alpha #h = fluid.layers.lod_reset(h, old_h) h = fluid.layers.sequence_pool(h, "sum") if concat: h = fluid.layers.reshape(h, [-1, num_heads * hidden_size]) else: h = fluid.layers.reduce_mean(h, dim=1) return h q_w_attr = fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()) q_bias_attr = fluid.ParamAttr( initializer=fluid.initializer.ConstantInitializer(0.0)) q = fluid.layers.fc(feature, hidden_size * num_heads, name=name + '_q_weight', param_attr=q_w_attr, bias_attr=q_bias_attr) q = q / (hidden_size**0.5) k_w_attr = fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()) k_bias_attr = fluid.ParamAttr( initializer=fluid.initializer.ConstantInitializer(0.0)) k = fluid.layers.fc(feature, hidden_size * num_heads, name=name + '_k_weight', param_attr=k_w_attr, bias_attr=k_bias_attr) v_w_attr = fluid.ParamAttr( initializer=fluid.initializer.XavierInitializer()) v_bias_attr = fluid.ParamAttr( initializer=fluid.initializer.ConstantInitializer(0.0)) v = fluid.layers.fc(feature, hidden_size * num_heads, name=name + '_v_weight', param_attr=v_w_attr, bias_attr=v_bias_attr) reshape_q = fluid.layers.reshape(q, [-1, num_heads, hidden_size]) reshape_k = fluid.layers.reshape(k, [-1, num_heads, hidden_size]) reshape_v = fluid.layers.reshape(v, [-1, num_heads, hidden_size]) if not isinstance(gw, list): msg = gw.send( send_attention, nfeat_list=[("q_h", reshape_q), ("k_h", reshape_k), ("v_h", reshape_v)], efeat_list=edge_feature) output = gw.recv(msg, reduce_attention) return output else: checkpoints = [] outputs = [] for batch_no, (batch_gw, batch_edge_feat) in enumerate(zip(gw, edge_feature)): msg = batch_gw.send( send_attention, nfeat_list=[("q_h", reshape_q), ("k_h", reshape_k), ("v_h", reshape_v)], efeat_list=batch_edge_feat) output = batch_gw.recv(msg, reduce_attention) outputs.append(output) outputs = L.sum(outputs) return outputs, checkpoints