def decode(self, z): x = F.elu(self.fc1(z)) x = F.elu(self.fc2(x)) x = x.view(-1, 128, 7, 7) x = F.relu(self.conv_t1(x)) x = F.sigmoid(self.conv_t2(x)) return x.view(-1, 784)
def encode(self, x): x = x.view(-1, 1, 28, 28) x = F.elu(self.conv1(x)) x = F.elu(self.conv2(x)) x = x.view(-1, 128 * 28 * 28) mu = F.elu(self.fc11(x)) mu = self.fc12(mu) logvar = F.elu(self.fc21(x)) logvar = self.fc22(logvar) return mu, logvar
def forward(self, agent_qs, states): """Forward pass for the mixer. Arguments: agent_qs: Tensor of shape [B, T, n_agents, n_actions] states: Tensor of shape [B, T, state_dim] """ bs = agent_qs.size(0) states = states.reshape(-1, self.state_dim) agent_qs = agent_qs.view(-1, 1, self.n_agents) # First layer w1 = th.abs(self.hyper_w_1(states)) b1 = self.hyper_b_1(states) w1 = w1.view(-1, self.n_agents, self.embed_dim) b1 = b1.view(-1, 1, self.embed_dim) hidden = F.elu(th.bmm(agent_qs, w1) + b1) # Second layer w_final = th.abs(self.hyper_w_final(states)) w_final = w_final.view(-1, self.embed_dim, 1) # State-dependent bias v = self.V(states).view(-1, 1, 1) # Compute final output y = th.bmm(hidden, w_final) + v # Reshape and return q_tot = y.view(bs, -1, 1) return q_tot
def forward(self, x): x = self.conv1(x) x = self.elu1(self.bn1(x)) x = self.elu2(self.bn2(self.conv2(x))) x = F.max_pool1d(x, 160) x = x.unsqueeze(1) x = self.elu3(self.bn3(self.conv3(x))) x = F.max_pool2d(x, (3, 3)) x = self.elu4(self.bn4(self.conv4(x))) x = F.max_pool2d(x, (1, 3)) x = x.view(-1, 50*14*11) x = F.elu(self.fc5(x)) x = F.dropout(x, training=True) x = F.elu(self.fc6(x)) x = F.dropout(x, training=True) return self.fc7(x)
def forward(self, x): x = super().forward(x) if self.activation == "leaky_relu": return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) elif self.activation == "elu": return functional.elu(x, inplace=True) else: return x
def forward(self, x): x = self.elu1(self.bn1(self.conv1(x))) x = self.elu2(self.bn2(self.conv2(x))) x = F.max_pool1d(x, 64) x = x.unsqueeze(1) x = self.elu3(self.bn3(self.conv3(x))) x = self.elu4(self.bn4(self.conv4(x))) x = F.max_pool2d(x, (5, 3)) x = self.elu5(self.bn5(self.conv5(x))) x = self.elu6(self.bn6(self.conv6(x))) x = F.max_pool2d(x, (1, 2)) x = self.elu7(self.bn7(self.conv7(x))) x = self.elu8(self.bn8(self.conv8(x))) x = F.max_pool2d(x, (1, 2)) x = self.elu9(self.bn9(self.conv9(x))) x = self.elu10(self.bn10(self.conv10(x))) x = F.max_pool2d(x, (1, 2)) x = x.view(-1, 256*10*8) x = F.elu(self.fc11(x)) x = F.dropout(x, training=True) x = F.elu(self.fc12(x)) x = F.dropout(x, training=True) return self.fc13(x)
def forward(self, x, cond_blocks=None, sample=False): # similar as done in the tf repo : if self.init_padding is None and not sample: xs = [int(y) for y in x.size()] padding = Variable(torch.ones(xs[0], 1, xs[2], xs[3]), requires_grad=False) self.init_padding = padding.cuda() if x.is_cuda else padding if sample : xs = [int(y) for y in x.size()] padding = Variable(torch.ones(xs[0], 1, xs[2], xs[3]), requires_grad=False) padding = padding.cuda() if x.is_cuda else padding x = torch.cat((x, padding), 1) ### UP PASS ### x = x if sample else torch.cat((x, self.init_padding), 1) u_list = [self.u_init(x)] ul_list = [self.ul_init[0](x) + self.ul_init[1](x)] for i in range(3): # resnet block u_out, ul_out = self.up_layers[i](u_list[-1], ul_list[-1], cond_blocks=cond_blocks) u_list += u_out ul_list += ul_out if i != 2: # downscale (only twice) u_list += [self.downsize_u_stream[i](u_list[-1])] ul_list += [self.downsize_ul_stream[i](ul_list[-1])] ### DOWN PASS ### u = u_list.pop() ul = ul_list.pop() for i in range(3): # resnet block u, ul = self.down_layers[i](u, ul, u_list, ul_list, cond_blocks=cond_blocks) # upscale (only twice) if i != 2 : u = self.upsize_u_stream[i](u) ul = self.upsize_ul_stream[i](ul) x_out = self.nin_out(F.elu(ul)) assert len(u_list) == len(ul_list) == 0, pdb.set_trace() return x_out
def forward(self, input, adj): h = torch.mm(input, self.W) N = h.size()[0] a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features) e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2)) zero_vec = -9e15*torch.ones_like(e) attention = torch.where(adj > 0, e, zero_vec) attention = F.softmax(attention, dim=1) attention = F.dropout(attention, self.dropout, training=self.training) h_prime = torch.matmul(attention, h) if self.concat: return F.elu(h_prime) else: return h_prime
def forward(ctx, input, weights, bias, old_h, old_cell): X = torch.cat([old_h, input], dim=1) gate_weights = F.linear(X, weights, bias) gates = gate_weights.chunk(3, dim=1) input_gate = F.sigmoid(gates[0]) output_gate = F.sigmoid(gates[1]) candidate_cell = F.elu(gates[2]) new_cell = old_cell + candidate_cell * input_gate new_h = F.tanh(new_cell) * output_gate ctx.save_for_backward(X, weights, input_gate, output_gate, old_cell, new_cell, candidate_cell, gate_weights) return new_h, new_cell
def forward(self, input, state): old_h, old_cell = state X = torch.cat([old_h, input], dim=1) # Compute the input, output and candidate cell gates with one MM. gate_weights = F.linear(X, self.weights, self.bias) # Split the combined gate weight matrix into its components. gates = gate_weights.chunk(3, dim=1) input_gate = F.sigmoid(gates[0]) output_gate = F.sigmoid(gates[1]) # Here we use an ELU instead of the usual tanh. candidate_cell = F.elu(gates[2]) # Compute the new cell state. new_cell = old_cell + candidate_cell * input_gate # Compute the new hidden state and output. new_h = F.tanh(new_cell) * output_gate return new_h, new_cell
def lovasz_hinge_flat(logits, labels): """ Binary Lovasz hinge loss logits: [P] Variable, logits at each prediction (between -\infty and +\infty) labels: [P] Tensor, binary ground truth labels (0 or 1) ignore: label to ignore """ if len(labels) == 0: # only void pixels, the gradients should be 0 return logits.sum() * 0. signs = 2. * labels.float() - 1. #print('signs \n', signs) #print('\n logits', logits) errors = (1. - logits * Variable(signs)) errors_sorted, perm = torch.sort(errors, dim=0, descending=True) perm = perm.data gt_sorted = labels[perm] grad = lovasz_grad(gt_sorted) loss = torch.dot(F.elu(errors_sorted), Variable(grad)) return loss
def forward(self, input): output = torch.cat([self.conv(input), self.pool(input)], 1) output = self.bn(output) return F.elu(output)
def forward(self, x, edge_index, up_transform): out = Pool(x, up_transform) for layer in self.blocks: out = layer(out, edge_index) out = F.elu(out, inplace=True) return out
def forward(self, # type: ignore words: Dict[str, torch.LongTensor], pos_tags: torch.LongTensor, head_tags: torch.LongTensor = None, head_indices: torch.LongTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- words : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, sequence_length)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. pos_tags : ``torch.LongTensor``, required. The output of a ``SequenceLabelField`` containing POS tags. POS tags are required regardless of whether they are used in the model, because they are used to filter the evaluation metric to only consider heads of words which are not punctuation. head_tags : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer gold class labels for the arcs in the dependency parse. Has shape ``(batch_size, sequence_length)``. head_indices : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer indices denoting the parent of every word in the dependency parse. Has shape ``(batch_size, sequence_length)``. Returns ------- An output dictionary consisting of: loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. arc_loss : ``torch.FloatTensor`` The loss contribution from the unlabeled arcs. loss : ``torch.FloatTensor``, optional The loss contribution from predicting the dependency tags for the gold arcs. heads : ``torch.FloatTensor`` The predicted head indices for each word. A tensor of shape (batch_size, sequence_length). head_types : ``torch.FloatTensor`` The predicted head types for each arc. A tensor of shape (batch_size, sequence_length). mask : ``torch.LongTensor`` A mask denoting the padded elements in the batch. """ embedded_text_input = self.text_field_embedder(words) if pos_tags is not None and self._pos_tag_embedding is not None: embedded_pos_tags = self._pos_tag_embedding(pos_tags) embedded_text_input = torch.cat([embedded_text_input, embedded_pos_tags], -1) elif self._pos_tag_embedding is not None: raise ConfigurationError("Model uses a POS embedding, but no POS tags were passed.") mask = get_text_field_mask(words) float_mask = mask.float() embedded_text_input = self._input_dropout(embedded_text_input) encoded_text = self.encoder(embedded_text_input, mask) encoded_text = self._dropout(encoded_text) # shape (batch_size, sequence_length, arc_representation_dim) head_arc_representation = self._dropout(F.elu(self.head_arc_projection(encoded_text))) child_arc_representation = self._dropout(F.elu(self.child_arc_projection(encoded_text))) # shape (batch_size, sequence_length, tag_representation_dim) head_tag_representation = self._dropout(F.elu(self.head_tag_projection(encoded_text))) child_tag_representation = self._dropout(F.elu(self.child_tag_projection(encoded_text))) # shape (batch_size, sequence_length, sequence_length) attended_arcs = self.arc_attention(head_arc_representation, child_arc_representation) minus_inf = -1e8 minus_mask = (1 - float_mask) * minus_inf attended_arcs = attended_arcs + minus_mask.unsqueeze(2) + minus_mask.unsqueeze(1) if self.training or not self.use_mst_decoding_for_validation: predicted_heads, predicted_head_tags = self._greedy_decode(head_tag_representation, child_tag_representation, attended_arcs, mask) else: predicted_heads, predicted_head_tags = self._mst_decode(head_tag_representation, child_tag_representation, attended_arcs, mask) if head_indices is not None and head_tags is not None: arc_nll, tag_nll = self._construct_loss(head_tag_representation=head_tag_representation, child_tag_representation=child_tag_representation, attended_arcs=attended_arcs, head_indices=head_indices, head_tags=head_tags, mask=mask) loss = arc_nll + tag_nll evaluation_mask = self._get_mask_for_eval(mask, pos_tags) # We calculate attatchment scores for the whole sentence # but excluding the symbolic ROOT token at the start, # which is why we start from the second element in the sequence. self._attachment_scores(predicted_heads[:, 1:], predicted_head_tags[:, 1:], head_indices[:, 1:], head_tags[:, 1:], evaluation_mask[:, 1:]) else: arc_nll = None tag_nll = None loss = None output_dict = { "heads": predicted_heads, "head_tags": predicted_head_tags, "arc_loss": arc_nll, "tag_loss": tag_nll, "loss": loss, "mask": mask } return output_dict
def selu(x): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 # noinspection PyTypeChecker return scale * where(x >= 0, x, alpha * F.elu(x))
def forward(self, features, batch_size, size_board): features_view = features.view(batch_size, 16, size_board, size_board) conv1_output = F.elu(self.__cnn_1(features_view)) conv2_output = F.elu(self.__cnn_2(features_view)) conv1_2_1_output = F.elu(self.__cnn_1_2(conv1_output)) conv1_2_2_output = F.elu(self.__cnn_1_2(conv2_output)) conv2_2_1_output = F.elu(self.__cnn_2_2(conv1_output)) conv2_2_2_output = F.elu(self.__cnn_2_2(conv2_output)) conv1_output_shape = list(conv1_output.shape) conv2_output_shape = list(conv2_output.shape) conv1_2_1_output_shape = list(conv1_2_1_output.shape) conv1_2_2_output_shape = list(conv1_2_2_output.shape) conv2_2_1_output_shape = list(conv2_2_1_output.shape) conv2_2_2_output_shape = list(conv2_2_2_output.shape) hidden1 = conv1_output.view( batch_size, (conv1_output_shape[1] * conv1_output_shape[2] * conv1_output_shape[3]), ) hidden2 = conv2_output.view( batch_size, (conv2_output_shape[1] * conv2_output_shape[2] * conv2_output_shape[3]), ) hidden1_2_1 = conv1_2_1_output.view( batch_size, (conv1_2_1_output_shape[1] * conv1_2_1_output_shape[2] * conv1_2_1_output_shape[3]), ) hidden1_2_2 = conv1_2_2_output.view( batch_size, (conv1_2_2_output_shape[1] * conv1_2_2_output_shape[2] * conv1_2_2_output_shape[3]), ) hidden2_2_1 = conv2_2_1_output.view( batch_size, (conv2_2_1_output_shape[1] * conv2_2_1_output_shape[2] * conv2_2_1_output_shape[3]), ) hidden2_2_2 = conv2_2_2_output.view( batch_size, (conv2_2_2_output_shape[1] * conv2_2_2_output_shape[2] * conv2_2_2_output_shape[3]), ) hidden = torch.cat((hidden1, hidden2, hidden1_2_1, hidden1_2_2, hidden2_2_1, hidden2_2_2), 1) hidden_value_1 = F.elu(self.__dense_value_1(hidden)) hidden_value_2 = self.__dense_value_2(hidden_value_1) advantage_action_1 = F.elu(self.__dense_advantage_1(hidden)) advantage_action_2 = self.__dense_advantage_2(advantage_action_1) # Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a')) reduced_mean = torch.mean(advantage_action_2, dim=1, keepdim=True) output = hidden_value_2 + (advantage_action_2 - reduced_mean) return output
plt.clf() for i in range(len(dmlp.h)): plt.subplot(len(dmlp.h),1,i+1) plt.hist(dmlp.h[i].data.numpy(), bins=60) plt.ylabel(i) plt.xlim(-4,4) plt.tight_layout() N = 1000 d = [] for _ in range(1000): y = np.random.randn(N) x = F.elu(Variable(torch.Tensor(y))).data.numpy() w = np.random.randn(N) * np.sqrt(2/(N*(1+.00))) d.append(np.dot(x,w)) np.array(d).var() dmlp.fc[3].bias ndist = simple_vae.ReparamNormal_MuLogvar() ndist(torch.zeros(10,2)) ndist.condition(torch.zeros(10))
def forward(self, atom_list, bond_list, atom_degree_list, bond_degree_list, atom_mask): atom_mask = atom_mask.unsqueeze(2) batch_size, mol_length, num_atom_feat = atom_list.size() atom_feature_preact = self.atom_fc(atom_list) if self.do_bn: atom_feature_preact = self.bns[0](atom_feature_preact.transpose( 1, 2)).transpose(1, 2) # transpose of the dataset atom_feature = F.leaky_relu(atom_feature_preact) atom_feature_viz = [] atom_feature_viz.append(self.atom_fc(atom_list)) bond_neighbor = [ bond_list[i][bond_degree_list[i]] for i in range(batch_size) ] bond_neighbor = torch.stack(bond_neighbor, dim=0) atom_neighbor = [ atom_list[i][atom_degree_list[i]] for i in range(batch_size) ] atom_neighbor = torch.stack(atom_neighbor, dim=0) #then catenate them neighbor_feature = torch.cat([atom_neighbor, bond_neighbor], dim=-1) neighbor_feature_preact = self.neighbor_fc(neighbor_feature) if self.do_bn: neighbor_feature_preact = self.bns[1]( neighbor_feature_preact.transpose(1, 3)).transpose( 1, 3) # transpose of the dataset neighbor_feature = F.leaky_relu(neighbor_feature_preact) # generate mask to eliminate the influence of blank atoms attend_mask = atom_degree_list.clone() attend_mask[attend_mask != mol_length - 1] = 1 attend_mask[attend_mask == mol_length - 1] = 0 attend_mask = attend_mask.type(torch.cuda.FloatTensor).unsqueeze(-1) softmax_mask = atom_degree_list.clone() softmax_mask[softmax_mask != mol_length - 1] = 0 softmax_mask[softmax_mask == mol_length - 1] = -9e8 # make the softmax value extremly small softmax_mask = softmax_mask.type(torch.cuda.FloatTensor).unsqueeze(-1) batch_size, mol_length, max_neighbor_num, fingerprint_dim = neighbor_feature.shape atom_feature_expand = atom_feature.unsqueeze(-2).expand( batch_size, mol_length, max_neighbor_num, fingerprint_dim) feature_attention = torch.cat([atom_feature_expand, neighbor_feature], dim=-1) align_score = self.dropout( F.leaky_relu(self.align[0](feature_attention))) # print(attention_weight) align_score = align_score + softmax_mask attention_weight = F.softmax(align_score, -2) # print(attention_weight) attention_weight = attention_weight * attend_mask # print(attention_weight) atom_attention_weight_viz = [] atom_attention_weight_viz.append(attention_weight) neighbor_feature_transform = self.attend[0]( self.dropout(neighbor_feature)) # print(features_neighbor_transform.shape) context = torch.sum( torch.mul(attention_weight, neighbor_feature_transform), -2) # print(context.shape) context = F.elu(context) context_reshape = context.view(batch_size * mol_length, fingerprint_dim) atom_feature_reshape = atom_feature.view(batch_size * mol_length, fingerprint_dim) atom_feature_reshape = self.GRUCell[0](context_reshape, atom_feature_reshape) atom_feature = atom_feature_reshape.view(batch_size, mol_length, fingerprint_dim) #do nonlinearity activated_features = F.relu(atom_feature) atom_feature_viz.append(activated_features) for d in range(self.radius - 1): # bonds_indexed = [bond_list[i][torch.cuda.LongTensor(bond_degree_list)[i]] for i in range(batch_size)] neighbor_feature = [ activated_features[i][atom_degree_list[i]] for i in range(batch_size) ] # neighbor_feature is a list of 3D tensor, so we need to stack them into a 4D tensor first neighbor_feature = torch.stack(neighbor_feature, dim=0) atom_feature_expand = activated_features.unsqueeze(-2).expand( batch_size, mol_length, max_neighbor_num, fingerprint_dim) feature_attention = torch.cat( [atom_feature_expand, neighbor_feature], dim=-1) align_score = self.dropout( F.leaky_relu(self.align[d + 1](feature_attention))) # print(attention_weight) align_score = align_score + softmax_mask attention_weight = F.softmax(align_score, -2) # print(attention_weight) attention_weight = attention_weight * attend_mask atom_attention_weight_viz.append(attention_weight) # print(attention_weight) neighbor_feature_transform = self.attend[d + 1]( self.dropout(neighbor_feature)) # print(features_neighbor_transform.shape) context = torch.sum( torch.mul(attention_weight, neighbor_feature_transform), -2) # print(context.shape) context = F.elu(context) context_reshape = context.view(batch_size * mol_length, fingerprint_dim) # atom_feature_reshape = atom_feature.view(batch_size*mol_length, fingerprint_dim) atom_feature_reshape = self.GRUCell[d + 1](context_reshape, atom_feature_reshape) atom_feature = atom_feature_reshape.view(batch_size, mol_length, fingerprint_dim) # do nonlinearity activated_features = F.relu(atom_feature) atom_feature_viz.append(activated_features) # when the descriptor value are unbounded, like partial charge or LogP mol_feature_unbounded_viz = [] mol_feature_unbounded_viz.append( torch.sum(atom_feature * atom_mask, dim=-2)) mol_feature = torch.sum(activated_features * atom_mask, dim=-2) if self.do_bn: mol_feature = self.bns[2](mol_feature) # transpose of the dataset activated_features_mol = F.relu(mol_feature) # when the descriptor value has lower or upper bounds mol_feature_viz = [] mol_feature_viz.append(mol_feature) mol_attention_weight_viz = [] mol_softmax_mask = atom_mask.clone() mol_softmax_mask[mol_softmax_mask == 0] = -9e8 mol_softmax_mask[mol_softmax_mask == 1] = 0 mol_softmax_mask = mol_softmax_mask.type(torch.cuda.FloatTensor) for t in range(self.T): mol_prediction_expand = activated_features_mol.unsqueeze( -2).expand(batch_size, mol_length, fingerprint_dim) mol_align = torch.cat([mol_prediction_expand, activated_features], dim=-1) mol_align_score = self.dropout( F.leaky_relu(self.mol_align(mol_align))) mol_align_score = mol_align_score + mol_softmax_mask mol_attention_weight = F.softmax(mol_align_score, -2) mol_attention_weight = mol_attention_weight * atom_mask # print(mol_attention_weight.shape,mol_attention_weight) mol_attention_weight_viz.append(mol_attention_weight) activated_features_transform = self.mol_attend( self.dropout(activated_features)) mol_context = torch.sum( torch.mul(mol_attention_weight, activated_features_transform), -2) # print(mol_context.shape,mol_context) mol_context = F.elu(mol_context) mol_feature = self.mol_GRUCell(mol_context, mol_feature) # print(mol_feature.shape,mol_feature) mol_feature_unbounded_viz.append(mol_feature) #do nonlinearity activated_features_mol = F.relu(mol_feature) mol_feature_viz.append(activated_features_mol) mol_prediction = self.output(self.dropout(mol_feature)) return atom_feature_viz, atom_attention_weight_viz, mol_feature_viz, mol_feature_unbounded_viz, mol_attention_weight_viz, mol_prediction
def forward(self, x): x = F.elu(self.fc1(x)) x = F.softmax(self.fc2(x), dim=1) return x
def forward(self, x): x = F.elu(self.fc1(x)) value = self.fc2(x) return value.squeeze()
def forward(self, x, edge_index): x = F.dropout(x, p=self.dropout, training=self.training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=self.dropout, training=self.training) x = F.elu(self.conv2(x, edge_index)) return F.log_softmax(x, dim=1)
def concat_elu(x): """ like concatenated ReLU (http://arxiv.org/abs/1603.05201), but then with ELU """ # Pytorch ordering axis = len(x.size()) - 3 return F.elu(torch.cat([x, -x], dim=axis))
def forward(self, x, edge_index): x = F.elu(self.conv1(x, edge_index) + self.lin1(x)) x = F.elu(self.conv2(x, edge_index) + self.lin2(x)) x = self.conv3(x, edge_index) + self.lin3(x) return x
def forward(self): x = F.dropout(data.x, p=0.6, training=self.training) x = F.elu(self.conv1(x, data.edge_index)) x = F.dropout(x, p=0.6, training=self.training) x = self.conv2(x, data.edge_index) return F.log_softmax(x, dim=1)
def _get_gcn_output(self, input_word_orig, input_word, input_char, adjs, target=None, mask=None, length=None, hx=None, leading_symbolic=0, return_edge=False, show_net=False, graph_types=['coref']): if "wonderful" in graph_types: gold_adj = adjs[:, -1, :].clone() gnn_adjs = adjs[:, :-1, :] mask_singles = self.mask_singles assert len(input_word.size()) == 3, "the input is not document level" # input_word is the packed sents [n_sent, sent_len] input_word, input_char, target, sent_mask, length, doc_n_sent = self._doc2sent( input_word, input_char, target, show_net=show_net) # input: [n_sent, sent_len, enc_dim] input, length = self._get_word_enc(input_word_orig, input_word, input_char, mask=sent_mask, length=length, show_net=show_net) # output from rnn [n_sent, sent_len, enc_dim] sent_output, hn = self._get_rnn_enc(input, length, sent_mask, hx, show_net=show_net) # flatten sents to words [batch, n_word, dim] # mask for packed_doc [batch, n_word] output, doc_word_mask = self._sent2word(sent_output, sent_mask, doc_n_sent, show_net=show_net) # enc for non-repetitive words if mask_singles: if show_net: print("[Net] Block singles from here.") coref_ix = 0 # single is 1, repetitive word is 0 single_mask = gnn_adjs[:, coref_ix].sum(-1, keepdim=True).eq(0).float() sent_single_mask = self._word2sent(single_mask, doc_word_mask, length, sent_mask, show_net=show_net) singles = sent_output * sent_single_mask.expand_as(sent_output) if self.tag_space: # [batch, length, tag_space] singles = self.dropout_tag( F.elu(self.lstm_to_tag_space(singles))) if show_net: print("singles -> self.lstm_to_tag_space") singles = singles * sent_single_mask.expand_as(singles) # [batch, n_word, d_graph] output = output * (1 - single_mask).expand_as(output) # go thru gcn [batch, n_word, d_graph] h_gcn, *_ = self.gcn(output, gnn_adjs, doc_word_mask, return_edge=return_edge, show_net=show_net) output = self._word2sent(h_gcn, doc_word_mask, length, sent_mask, show_net=show_net) if self.post_lstm: # output from rnn [n_sent, sent_len, enc_dim] output, hn = self._get_rnn_enc2(output, length, sent_mask, hx, show_net=show_net) # output from rnn_out [batch, length, tag_space] output = self.dropout_tag(F.elu(self.to_tag_space(output))) if show_net: print("<") print("[Net] to_tag") show_var(["self.to_tag_space"]) show_var(["F.elu"]) show_var(["self.dropout_tag"]) print(">") if mask_singles: output = output * (1 - sent_single_mask).expand_as(output) output = output + singles # repetive word enc + single word enc if show_net: print("[Net] output + singles") if length is not None: max_len = length.max() target = target[:, :max_len] adj_loss = self._adj_loss( gnn_adjs[:, 0, :], gold_adj) if "wonderful" in graph_types else 0 return output, target, sent_mask, length, adj_loss
def forward(self, x): x = F.dropout(x, p=0.6, training=self.training) x = F.elu(self.conv1(x)) x = F.dropout(x, p=0.6, training=self.training) x = self.conv2(x) return F.log_softmax(x, dim=1)
def forward(self, x, down_transform): out = F.elu(self.conv(x)) out = Pool(out, down_transform) return out
def decode(self, z): # g h33 = F.elu(self.fc3(z)) #20-200 h3 = F.elu(self.fc33(h33)) #200-450 return F.sigmoid(self.fc4(h3)) #450-784
def forward(self, x, up_transform): out = Pool(x, up_transform) out = F.elu(self.conv(out)) return out
def selu(x): alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 return scale * F.elu(x, alpha)
def forward(self, x): x = F.elu(self.conv0a(x)) self.li0 = x = F.elu(self.bn0a(self.conv0b(x))) x = self.ma1(x) x = F.elu(self.conv1a(x)) self.li1 = x = F.elu(self.bn1a(self.conv1b(x))) x = self.ma2(x) x = F.elu(self.conv2a(x)) self.li2 = x = F.elu(self.bn2a(self.conv2b(x))) x = self.ma3(x) x = F.elu(self.conv3a(x)) self.li3 = x = F.elu(self.bn3a(self.conv3b(x))) x = F.interpolate(x, scale_factor=2, mode="nearest") x = F.elu(self.conv2u(x)) x = torch.cat([x, self.li2], 1) x = F.elu(self.bn2u(self.conv2v(x))) self.lo1 = x x = F.interpolate(x, scale_factor=2, mode="nearest") x = F.elu(self.conv1u(x)) x = torch.cat([x, self.li1], 1) x = F.elu(self.bn1u(self.conv1v(x))) x = F.interpolate(x, scale_factor=2, mode="nearest") self.la1 = x x = F.elu(self.conv0u(x)) x = torch.cat([x, self.li0], 1) x = F.elu(self.bn0u(self.conv0v(x))) self.out = x = self.conv1x(x) x = torch.sigmoid(x) return x
def forward(self, x): x = F.elu(self.map1(x)) x = F.elu(self.map2(x)) return F.sigmoid(self.map3(x))
def forward(self, input): output = F.elu(self.fc1(input)) output = F.elu(self.fc2(output)) output = self.fc3(output) return output
import numpy as np import matplotlib.pyplot as plt import numpy as np from itertools import chain # 合并 generator ################################################################## ## Initialization print(nn.init.calculate_gain('relu')) # 1.4142135623730951 print(nn.init.calculate_gain('leaky_relu')) # 1.4141428569978354 w = torch.empty(3, 5); print(w) # Returns a tensor filled with uninitialized data print(nn.init.xavier_uniform_(w)) print(nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))) ################################################################## ## Activation x = torch.Tensor([1]); print(x) print(F.elu(torch.Tensor([1, 0, -1]))) # tensor([ 1.0000, 0.0000, -0.6321]); ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1)); alpha=1.0 print(F.relu(torch.Tensor([1, 0, -1]))) # tensor([ 1., 0., 0.]) print(torch.sigmoid(x), F.softmax(x, dim=-1)) # tensor([0.7311]) tensor([1.]); softmax 对应一个数的时候, 输出全为 1 print(torch.sigmoid(torch.Tensor([0, 1, 2, 3]))) # tensor([0.5000, 0.7311, 0.8808, 0.9526]) print(torch.tanh(torch.Tensor([0, 1, 2, 3]))) # tensor([0.0000, 0.7616, 0.9640, 0.9951]) print(F.softmax(torch.Tensor([1, 2, 3]), dim=0)) # tensor([ 0.0900, 0.2447, 0.6652]); dim=0 should be added print(F.softmax(F.softmax(torch.Tensor([1, 2, 3]), dim=0), dim=0)) # tensor([0.2535, 0.2959, 0.4506]); softmax() 不能 two times!!! print(np.log(F.softmax(torch.Tensor([1, 2, 3]), dim=0))) # tensor([-2.4076, -1.4076, -0.4076]) print(F.log_softmax(torch.Tensor([1, 2, 3]), dim=0)) # tensor([-2.4076, -1.4076, -0.4076]); equal to log(softmax(x)) print(F.log_softmax(torch.Tensor([[1, 2, 3], [4, 5, 6]]), dim=1)) # tensor([[-2.4076, -1.4076, -0.4076], [-2.4076, -1.4076, -0.4076]]) print(F.log_softmax(torch.Tensor([[1, 2, 3], [4, 5, 6]]), dim=-1)) # tensor([[-2.4076, -1.4076, -0.4076], [-2.4076, -1.4076, -0.4076]]); -1 结果一样 ## Plot x = torch.linspace(-5, 5, 200); print(type(x)) # <class 'torch.Tensor'> x_np = x.data.numpy(); print(type(x_np)) # <class 'numpy.ndarray'>; matplotlib don't support type of Torch; equal to ```x_np = np.linspace(-5, 5, 200); print(type(x_np))```
def forward(self, x, adj): x = F.dropout(x, self.dropout, training=self.training) x = torch.cat([att(x, adj) for att in self.attentions], dim=1) x = F.dropout(x, self.dropout, training=self.training) x = F.elu(self.out_att(x, adj)) return F.log_softmax(x, dim=1)
def f_enc(self, env, args): merge = torch.cat([env, args], -1) elu = F.elu(self.fenc1(merge)) elu = F.elu(self.fenc2(elu)) out = self.fenc3(elu) return out
def elu(x: T.Tensor, **kwargs): """ ELU activation. """ return F.elu(x, kwargs.get('alpha', 1.), kwargs.get('inplace', False))
def forward(self, x): x = F.elu(self.map1(x)) # (1, 2*D_I) -> (1, D_H) x = F.elu(self.map2(x)) return torch.sigmoid(self.map3(x)).squeeze(0)
def forward(self, x): return F.elu( self.bn( self.conv3d( F.interpolate(x, scale_factor=self.scale_factor, mode='trilinear', align_corners=False) ) ), inplace=False )
def forward(self, x): x = F.elu(self.fc1(x)) x = self.fc3(x) return x
def forward(self, x): return F.elu( self.bn( self.conv3d( x ) ), inplace=False )
def forward(self, g, h): #h = self.dropout(inputs) # if self.training: # for l, layer in enumerate(self.layers): # torch.cuda.synchronize() # t1 = time.perf_counter() # h = layer(g, h) # torch.cuda.synchronize() # print("conv", l, "forward time: ", time.perf_counter() - t1) # h.register_hook(hook_gcn) # h = F.relu(h) # h.register_hook(hook_relu) # #h = self.dropout(h) # torch.cuda.synchronize() # t2 = time.perf_counter() # h = self.avgpooling(g, h) # torch.cuda.synchronize() # print("pooling forward time: ", time.perf_counter() - t2) # h.register_hook(hook_pool) # # torch.cuda.synchronize() # t3 = time.perf_counter() # h = self.fc1(h) # torch.cuda.synchronize() # print("fc1 forward time: ", time.perf_counter() - t3) # h.register_hook(hook) # # h = F.elu(h) # h.register_hook(hook_relu) # # torch.cuda.synchronize() # t4 = time.perf_counter() # h = self.fc2(h) # torch.cuda.synchronize() # print("fc2 forward time: ", time.perf_counter() - t4) # h.register_hook(hook) # #h = self.readout(h) # h = F.log_softmax(h, dim=0) # h.register_hook(hook) # else: for l, layer in enumerate(self.layers): t1 = time.perf_counter() h = layer(g, h) print("conv", l, "forward time: ", time.perf_counter() - t1) h = F.relu(h) # h = self.dropout(h) t2 = time.perf_counter() h = self.avgpooling(g, h) print("pooling forward time: ", time.perf_counter() - t2) t3 = time.perf_counter() h = self.fc1(h) print("fc1 forward time: ", time.perf_counter() - t3) h = F.elu(h) t4 = time.perf_counter() h = self.fc2(h) print("fc2 forward time: ", time.perf_counter() - t4) # h = self.readout(h) h = F.log_softmax(h, dim=0) return h
def _activate(self, x, predict): return F.elu(x)
def forward(self, x): p = int(np.floor((self.kernel_size-1)/2)) p2d = (p, p, p, p) x = self.conv_base(F.pad(x, p2d)) x = self.normalize(x) return F.elu(x, inplace=True)
def concat_elu(x): return F.elu(torch.cat([x, -x], 1))
def forward(self, z): h1 = F.elu( self.fc1(z) ) h2 = F.elu( self.fc2(h1) ) h3 = F.elu( self.fc3(h2) ) return F.sigmoid( self.fc4(h3) )