Beispiel #1
0
    def decode(self, z):
        x = F.elu(self.fc1(z))
        x = F.elu(self.fc2(x))
        x = x.view(-1, 128, 7, 7)
        x = F.relu(self.conv_t1(x))
        x = F.sigmoid(self.conv_t2(x))

        return x.view(-1, 784)
Beispiel #2
0
    def encode(self, x):
        x = x.view(-1, 1, 28, 28)
        x = F.elu(self.conv1(x))
        x = F.elu(self.conv2(x))
        x = x.view(-1, 128 * 28 * 28)

        mu = F.elu(self.fc11(x))
        mu = self.fc12(mu)

        logvar = F.elu(self.fc21(x))
        logvar = self.fc22(logvar)

        return mu, logvar
Beispiel #3
0
    def forward(self, agent_qs, states):
        """Forward pass for the mixer.

        Arguments:
            agent_qs: Tensor of shape [B, T, n_agents, n_actions]
            states: Tensor of shape [B, T, state_dim]
        """
        bs = agent_qs.size(0)
        states = states.reshape(-1, self.state_dim)
        agent_qs = agent_qs.view(-1, 1, self.n_agents)
        # First layer
        w1 = th.abs(self.hyper_w_1(states))
        b1 = self.hyper_b_1(states)
        w1 = w1.view(-1, self.n_agents, self.embed_dim)
        b1 = b1.view(-1, 1, self.embed_dim)
        hidden = F.elu(th.bmm(agent_qs, w1) + b1)
        # Second layer
        w_final = th.abs(self.hyper_w_final(states))
        w_final = w_final.view(-1, self.embed_dim, 1)
        # State-dependent bias
        v = self.V(states).view(-1, 1, 1)
        # Compute final output
        y = th.bmm(hidden, w_final) + v
        # Reshape and return
        q_tot = y.view(bs, -1, 1)
        return q_tot
Beispiel #4
0
 def forward(self, x):
     x = self.conv1(x)
     x = self.elu1(self.bn1(x))
     x = self.elu2(self.bn2(self.conv2(x)))
     x = F.max_pool1d(x, 160)
     x = x.unsqueeze(1)
     x = self.elu3(self.bn3(self.conv3(x)))
     
     x = F.max_pool2d(x, (3, 3))
     x = self.elu4(self.bn4(self.conv4(x)))
     x = F.max_pool2d(x, (1, 3))
     x = x.view(-1, 50*14*11)
     x = F.elu(self.fc5(x))
     x = F.dropout(x, training=True)
     x = F.elu(self.fc6(x))
     x = F.dropout(x, training=True)
     return self.fc7(x)
Beispiel #5
0
    def forward(self, x):
        x = super().forward(x)

        if self.activation == "leaky_relu":
            return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
        elif self.activation == "elu":
            return functional.elu(x, inplace=True)
        else:
            return x
Beispiel #6
0
 def forward(self, x):
     x = self.elu1(self.bn1(self.conv1(x)))
     x = self.elu2(self.bn2(self.conv2(x)))
     x = F.max_pool1d(x, 64)
     x = x.unsqueeze(1)
     x = self.elu3(self.bn3(self.conv3(x)))
     x = self.elu4(self.bn4(self.conv4(x)))
     x = F.max_pool2d(x, (5, 3))
     x = self.elu5(self.bn5(self.conv5(x)))
     x = self.elu6(self.bn6(self.conv6(x)))
     x = F.max_pool2d(x, (1, 2))
     x = self.elu7(self.bn7(self.conv7(x)))
     x = self.elu8(self.bn8(self.conv8(x)))
     x = F.max_pool2d(x, (1, 2))
     x = self.elu9(self.bn9(self.conv9(x)))
     x = self.elu10(self.bn10(self.conv10(x)))
     x = F.max_pool2d(x, (1, 2))
     x = x.view(-1, 256*10*8)
     x = F.elu(self.fc11(x))
     x = F.dropout(x, training=True)
     x = F.elu(self.fc12(x))
     x = F.dropout(x, training=True)
     return self.fc13(x)
Beispiel #7
0
    def forward(self, x, cond_blocks=None, sample=False):
        # similar as done in the tf repo :  
        if self.init_padding is None and not sample: 
            xs = [int(y) for y in x.size()]
            padding = Variable(torch.ones(xs[0], 1, xs[2], xs[3]), requires_grad=False)
            self.init_padding = padding.cuda() if x.is_cuda else padding
        
        if sample : 
            xs = [int(y) for y in x.size()]
            padding = Variable(torch.ones(xs[0], 1, xs[2], xs[3]), requires_grad=False)
            padding = padding.cuda() if x.is_cuda else padding
            x = torch.cat((x, padding), 1)

        ###      UP PASS    ###
        x = x if sample else torch.cat((x, self.init_padding), 1)
        u_list  = [self.u_init(x)]
        ul_list = [self.ul_init[0](x) + self.ul_init[1](x)]
        for i in range(3):
            # resnet block
            u_out, ul_out = self.up_layers[i](u_list[-1], ul_list[-1], cond_blocks=cond_blocks)
            u_list  += u_out
            ul_list += ul_out

            if i != 2: 
                # downscale (only twice)
                u_list  += [self.downsize_u_stream[i](u_list[-1])]
                ul_list += [self.downsize_ul_stream[i](ul_list[-1])]

        ###    DOWN PASS    ###
        u  = u_list.pop()
        ul = ul_list.pop()
        
        for i in range(3):
            # resnet block
            u, ul = self.down_layers[i](u, ul, u_list, ul_list, cond_blocks=cond_blocks)

            # upscale (only twice)
            if i != 2 :
                u  = self.upsize_u_stream[i](u)
                ul = self.upsize_ul_stream[i](ul)

        x_out = self.nin_out(F.elu(ul))

        assert len(u_list) == len(ul_list) == 0, pdb.set_trace()

        return x_out
Beispiel #8
0
    def forward(self, input, adj):
        h = torch.mm(input, self.W)
        N = h.size()[0]

        a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, -1, 2 * self.out_features)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2))

        zero_vec = -9e15*torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = F.softmax(attention, dim=1)
        attention = F.dropout(attention, self.dropout, training=self.training)
        h_prime = torch.matmul(attention, h)

        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime
    def forward(ctx, input, weights, bias, old_h, old_cell):
        X = torch.cat([old_h, input], dim=1)

        gate_weights = F.linear(X, weights, bias)
        gates = gate_weights.chunk(3, dim=1)

        input_gate = F.sigmoid(gates[0])
        output_gate = F.sigmoid(gates[1])
        candidate_cell = F.elu(gates[2])

        new_cell = old_cell + candidate_cell * input_gate
        new_h = F.tanh(new_cell) * output_gate

        ctx.save_for_backward(X, weights, input_gate, output_gate, old_cell,
                              new_cell, candidate_cell, gate_weights)

        return new_h, new_cell
Beispiel #10
0
    def forward(self, input, state):
        old_h, old_cell = state
        X = torch.cat([old_h, input], dim=1)

        # Compute the input, output and candidate cell gates with one MM.
        gate_weights = F.linear(X, self.weights, self.bias)
        # Split the combined gate weight matrix into its components.
        gates = gate_weights.chunk(3, dim=1)

        input_gate = F.sigmoid(gates[0])
        output_gate = F.sigmoid(gates[1])
        # Here we use an ELU instead of the usual tanh.
        candidate_cell = F.elu(gates[2])

        # Compute the new cell state.
        new_cell = old_cell + candidate_cell * input_gate
        # Compute the new hidden state and output.
        new_h = F.tanh(new_cell) * output_gate

        return new_h, new_cell
def lovasz_hinge_flat(logits, labels):
    """
    Binary Lovasz hinge loss
      logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
      labels: [P] Tensor, binary ground truth labels (0 or 1)
      ignore: label to ignore
    """
    if len(labels) == 0:
        # only void pixels, the gradients should be 0
        return logits.sum() * 0.
    signs = 2. * labels.float() - 1.
    #print('signs \n', signs)
    #print('\n logits', logits)
    errors = (1. - logits * Variable(signs)) 
    errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
    perm = perm.data
    gt_sorted = labels[perm]
    grad = lovasz_grad(gt_sorted)
    loss = torch.dot(F.elu(errors_sorted), Variable(grad))
    return loss
Beispiel #12
0
 def forward(self, input):
     output = torch.cat([self.conv(input), self.pool(input)], 1)
     output = self.bn(output)
     return F.elu(output)
 def forward(self, x, edge_index, up_transform):
     out = Pool(x, up_transform)
     for layer in self.blocks:
         out = layer(out, edge_index)
     out = F.elu(out, inplace=True)
     return out
    def forward(self,  # type: ignore
                words: Dict[str, torch.LongTensor],
                pos_tags: torch.LongTensor,
                head_tags: torch.LongTensor = None,
                head_indices: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        words : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, sequence_length)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        pos_tags : ``torch.LongTensor``, required.
            The output of a ``SequenceLabelField`` containing POS tags.
            POS tags are required regardless of whether they are used in the model,
            because they are used to filter the evaluation metric to only consider
            heads of words which are not punctuation.
        head_tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels for the arcs
            in the dependency parse. Has shape ``(batch_size, sequence_length)``.
        head_indices : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer indices denoting the parent of every
            word in the dependency parse. Has shape ``(batch_size, sequence_length)``.

        Returns
        -------
        An output dictionary consisting of:
        loss : ``torch.FloatTensor``, optional
            A scalar loss to be optimised.
        arc_loss : ``torch.FloatTensor``
            The loss contribution from the unlabeled arcs.
        loss : ``torch.FloatTensor``, optional
            The loss contribution from predicting the dependency
            tags for the gold arcs.
        heads : ``torch.FloatTensor``
            The predicted head indices for each word. A tensor
            of shape (batch_size, sequence_length).
        head_types : ``torch.FloatTensor``
            The predicted head types for each arc. A tensor
            of shape (batch_size, sequence_length).
        mask : ``torch.LongTensor``
            A mask denoting the padded elements in the batch.
        """
        embedded_text_input = self.text_field_embedder(words)
        if pos_tags is not None and self._pos_tag_embedding is not None:
            embedded_pos_tags = self._pos_tag_embedding(pos_tags)
            embedded_text_input = torch.cat([embedded_text_input, embedded_pos_tags], -1)
        elif self._pos_tag_embedding is not None:
            raise ConfigurationError("Model uses a POS embedding, but no POS tags were passed.")

        mask = get_text_field_mask(words)
        float_mask = mask.float()
        embedded_text_input = self._input_dropout(embedded_text_input)
        encoded_text = self.encoder(embedded_text_input, mask)
        encoded_text = self._dropout(encoded_text)

        # shape (batch_size, sequence_length, arc_representation_dim)
        head_arc_representation = self._dropout(F.elu(self.head_arc_projection(encoded_text)))
        child_arc_representation = self._dropout(F.elu(self.child_arc_projection(encoded_text)))

        # shape (batch_size, sequence_length, tag_representation_dim)
        head_tag_representation = self._dropout(F.elu(self.head_tag_projection(encoded_text)))
        child_tag_representation = self._dropout(F.elu(self.child_tag_projection(encoded_text)))
        # shape (batch_size, sequence_length, sequence_length)
        attended_arcs = self.arc_attention(head_arc_representation,
                                           child_arc_representation)

        minus_inf = -1e8
        minus_mask = (1 - float_mask) * minus_inf
        attended_arcs = attended_arcs + minus_mask.unsqueeze(2) + minus_mask.unsqueeze(1)

        if self.training or not self.use_mst_decoding_for_validation:
            predicted_heads, predicted_head_tags = self._greedy_decode(head_tag_representation,
                                                                       child_tag_representation,
                                                                       attended_arcs,
                                                                       mask)
        else:
            predicted_heads, predicted_head_tags = self._mst_decode(head_tag_representation,
                                                                    child_tag_representation,
                                                                    attended_arcs,
                                                                    mask)
        if head_indices is not None and head_tags is not None:

            arc_nll, tag_nll = self._construct_loss(head_tag_representation=head_tag_representation,
                                                    child_tag_representation=child_tag_representation,
                                                    attended_arcs=attended_arcs,
                                                    head_indices=head_indices,
                                                    head_tags=head_tags,
                                                    mask=mask)
            loss = arc_nll + tag_nll

            evaluation_mask = self._get_mask_for_eval(mask, pos_tags)
            # We calculate attatchment scores for the whole sentence
            # but excluding the symbolic ROOT token at the start,
            # which is why we start from the second element in the sequence.
            self._attachment_scores(predicted_heads[:, 1:],
                                    predicted_head_tags[:, 1:],
                                    head_indices[:, 1:],
                                    head_tags[:, 1:],
                                    evaluation_mask[:, 1:])
        else:
            arc_nll = None
            tag_nll = None
            loss = None

        output_dict = {
                "heads": predicted_heads,
                "head_tags": predicted_head_tags,
                "arc_loss": arc_nll,
                "tag_loss": tag_nll,
                "loss": loss,
                "mask": mask
                }

        return output_dict
Beispiel #15
0
 def selu(x):
     alpha = 1.6732632423543772848170429916717
     scale = 1.0507009873554804934193349852946
     # noinspection PyTypeChecker
     return scale * where(x >= 0, x, alpha * F.elu(x))
    def forward(self, features, batch_size, size_board):
        features_view = features.view(batch_size, 16, size_board, size_board)
        conv1_output = F.elu(self.__cnn_1(features_view))
        conv2_output = F.elu(self.__cnn_2(features_view))
        conv1_2_1_output = F.elu(self.__cnn_1_2(conv1_output))
        conv1_2_2_output = F.elu(self.__cnn_1_2(conv2_output))
        conv2_2_1_output = F.elu(self.__cnn_2_2(conv1_output))
        conv2_2_2_output = F.elu(self.__cnn_2_2(conv2_output))

        conv1_output_shape = list(conv1_output.shape)
        conv2_output_shape = list(conv2_output.shape)
        conv1_2_1_output_shape = list(conv1_2_1_output.shape)
        conv1_2_2_output_shape = list(conv1_2_2_output.shape)
        conv2_2_1_output_shape = list(conv2_2_1_output.shape)
        conv2_2_2_output_shape = list(conv2_2_2_output.shape)

        hidden1 = conv1_output.view(
            batch_size,
            (conv1_output_shape[1] * conv1_output_shape[2] *
             conv1_output_shape[3]),
        )

        hidden2 = conv2_output.view(
            batch_size,
            (conv2_output_shape[1] * conv2_output_shape[2] *
             conv2_output_shape[3]),
        )

        hidden1_2_1 = conv1_2_1_output.view(
            batch_size,
            (conv1_2_1_output_shape[1] * conv1_2_1_output_shape[2] *
             conv1_2_1_output_shape[3]),
        )

        hidden1_2_2 = conv1_2_2_output.view(
            batch_size,
            (conv1_2_2_output_shape[1] * conv1_2_2_output_shape[2] *
             conv1_2_2_output_shape[3]),
        )

        hidden2_2_1 = conv2_2_1_output.view(
            batch_size,
            (conv2_2_1_output_shape[1] * conv2_2_1_output_shape[2] *
             conv2_2_1_output_shape[3]),
        )

        hidden2_2_2 = conv2_2_2_output.view(
            batch_size,
            (conv2_2_2_output_shape[1] * conv2_2_2_output_shape[2] *
             conv2_2_2_output_shape[3]),
        )

        hidden = torch.cat((hidden1, hidden2, hidden1_2_1, hidden1_2_2,
                            hidden2_2_1, hidden2_2_2), 1)

        hidden_value_1 = F.elu(self.__dense_value_1(hidden))
        hidden_value_2 = self.__dense_value_2(hidden_value_1)

        advantage_action_1 = F.elu(self.__dense_advantage_1(hidden))
        advantage_action_2 = self.__dense_advantage_2(advantage_action_1)

        # Q(s,a) = V(s) + (A(s,a) - 1/|A| * sum A(s,a'))
        reduced_mean = torch.mean(advantage_action_2, dim=1, keepdim=True)
        output = hidden_value_2 + (advantage_action_2 - reduced_mean)
        return output
plt.clf()
for i in range(len(dmlp.h)):
    plt.subplot(len(dmlp.h),1,i+1)
    plt.hist(dmlp.h[i].data.numpy(), bins=60)
    plt.ylabel(i)
    plt.xlim(-4,4)
plt.tight_layout()



N = 1000
d = []
for _ in range(1000):
    y = np.random.randn(N)
    x = F.elu(Variable(torch.Tensor(y))).data.numpy()
    w = np.random.randn(N) * np.sqrt(2/(N*(1+.00)))
    d.append(np.dot(x,w))
np.array(d).var()



dmlp.fc[3].bias



ndist = simple_vae.ReparamNormal_MuLogvar()

ndist(torch.zeros(10,2))

ndist.condition(torch.zeros(10))
Beispiel #18
0
    def forward(self, atom_list, bond_list, atom_degree_list, bond_degree_list,
                atom_mask):

        atom_mask = atom_mask.unsqueeze(2)
        batch_size, mol_length, num_atom_feat = atom_list.size()
        atom_feature_preact = self.atom_fc(atom_list)
        if self.do_bn:
            atom_feature_preact = self.bns[0](atom_feature_preact.transpose(
                1, 2)).transpose(1, 2)  # transpose of the dataset
        atom_feature = F.leaky_relu(atom_feature_preact)

        atom_feature_viz = []
        atom_feature_viz.append(self.atom_fc(atom_list))

        bond_neighbor = [
            bond_list[i][bond_degree_list[i]] for i in range(batch_size)
        ]
        bond_neighbor = torch.stack(bond_neighbor, dim=0)
        atom_neighbor = [
            atom_list[i][atom_degree_list[i]] for i in range(batch_size)
        ]
        atom_neighbor = torch.stack(atom_neighbor, dim=0)

        #then catenate them
        neighbor_feature = torch.cat([atom_neighbor, bond_neighbor], dim=-1)
        neighbor_feature_preact = self.neighbor_fc(neighbor_feature)
        if self.do_bn:
            neighbor_feature_preact = self.bns[1](
                neighbor_feature_preact.transpose(1, 3)).transpose(
                    1, 3)  # transpose of the dataset
        neighbor_feature = F.leaky_relu(neighbor_feature_preact)

        # generate mask to eliminate the influence of blank atoms
        attend_mask = atom_degree_list.clone()
        attend_mask[attend_mask != mol_length - 1] = 1
        attend_mask[attend_mask == mol_length - 1] = 0
        attend_mask = attend_mask.type(torch.cuda.FloatTensor).unsqueeze(-1)

        softmax_mask = atom_degree_list.clone()
        softmax_mask[softmax_mask != mol_length - 1] = 0
        softmax_mask[softmax_mask == mol_length -
                     1] = -9e8  # make the softmax value extremly small
        softmax_mask = softmax_mask.type(torch.cuda.FloatTensor).unsqueeze(-1)

        batch_size, mol_length, max_neighbor_num, fingerprint_dim = neighbor_feature.shape
        atom_feature_expand = atom_feature.unsqueeze(-2).expand(
            batch_size, mol_length, max_neighbor_num, fingerprint_dim)
        feature_attention = torch.cat([atom_feature_expand, neighbor_feature],
                                      dim=-1)

        align_score = self.dropout(
            F.leaky_relu(self.align[0](feature_attention)))
        #             print(attention_weight)
        align_score = align_score + softmax_mask
        attention_weight = F.softmax(align_score, -2)
        #             print(attention_weight)
        attention_weight = attention_weight * attend_mask
        #         print(attention_weight)
        atom_attention_weight_viz = []
        atom_attention_weight_viz.append(attention_weight)

        neighbor_feature_transform = self.attend[0](
            self.dropout(neighbor_feature))
        #             print(features_neighbor_transform.shape)
        context = torch.sum(
            torch.mul(attention_weight, neighbor_feature_transform), -2)
        #             print(context.shape)
        context = F.elu(context)
        context_reshape = context.view(batch_size * mol_length,
                                       fingerprint_dim)
        atom_feature_reshape = atom_feature.view(batch_size * mol_length,
                                                 fingerprint_dim)
        atom_feature_reshape = self.GRUCell[0](context_reshape,
                                               atom_feature_reshape)
        atom_feature = atom_feature_reshape.view(batch_size, mol_length,
                                                 fingerprint_dim)

        #do nonlinearity
        activated_features = F.relu(atom_feature)
        atom_feature_viz.append(activated_features)

        for d in range(self.radius - 1):
            # bonds_indexed = [bond_list[i][torch.cuda.LongTensor(bond_degree_list)[i]] for i in range(batch_size)]
            neighbor_feature = [
                activated_features[i][atom_degree_list[i]]
                for i in range(batch_size)
            ]

            # neighbor_feature is a list of 3D tensor, so we need to stack them into a 4D tensor first
            neighbor_feature = torch.stack(neighbor_feature, dim=0)
            atom_feature_expand = activated_features.unsqueeze(-2).expand(
                batch_size, mol_length, max_neighbor_num, fingerprint_dim)

            feature_attention = torch.cat(
                [atom_feature_expand, neighbor_feature], dim=-1)

            align_score = self.dropout(
                F.leaky_relu(self.align[d + 1](feature_attention)))
            #             print(attention_weight)
            align_score = align_score + softmax_mask
            attention_weight = F.softmax(align_score, -2)
            #             print(attention_weight)
            attention_weight = attention_weight * attend_mask
            atom_attention_weight_viz.append(attention_weight)
            #             print(attention_weight)
            neighbor_feature_transform = self.attend[d + 1](
                self.dropout(neighbor_feature))
            #             print(features_neighbor_transform.shape)
            context = torch.sum(
                torch.mul(attention_weight, neighbor_feature_transform), -2)
            #             print(context.shape)
            context = F.elu(context)
            context_reshape = context.view(batch_size * mol_length,
                                           fingerprint_dim)
            #             atom_feature_reshape = atom_feature.view(batch_size*mol_length, fingerprint_dim)
            atom_feature_reshape = self.GRUCell[d + 1](context_reshape,
                                                       atom_feature_reshape)
            atom_feature = atom_feature_reshape.view(batch_size, mol_length,
                                                     fingerprint_dim)

            # do nonlinearity
            activated_features = F.relu(atom_feature)
            atom_feature_viz.append(activated_features)

        # when the descriptor value are unbounded, like partial charge or LogP
        mol_feature_unbounded_viz = []
        mol_feature_unbounded_viz.append(
            torch.sum(atom_feature * atom_mask, dim=-2))

        mol_feature = torch.sum(activated_features * atom_mask, dim=-2)
        if self.do_bn:
            mol_feature = self.bns[2](mol_feature)  # transpose of the dataset
        activated_features_mol = F.relu(mol_feature)

        # when the descriptor value has lower or upper bounds
        mol_feature_viz = []
        mol_feature_viz.append(mol_feature)

        mol_attention_weight_viz = []
        mol_softmax_mask = atom_mask.clone()
        mol_softmax_mask[mol_softmax_mask == 0] = -9e8
        mol_softmax_mask[mol_softmax_mask == 1] = 0
        mol_softmax_mask = mol_softmax_mask.type(torch.cuda.FloatTensor)

        for t in range(self.T):

            mol_prediction_expand = activated_features_mol.unsqueeze(
                -2).expand(batch_size, mol_length, fingerprint_dim)
            mol_align = torch.cat([mol_prediction_expand, activated_features],
                                  dim=-1)
            mol_align_score = self.dropout(
                F.leaky_relu(self.mol_align(mol_align)))
            mol_align_score = mol_align_score + mol_softmax_mask
            mol_attention_weight = F.softmax(mol_align_score, -2)
            mol_attention_weight = mol_attention_weight * atom_mask
            #             print(mol_attention_weight.shape,mol_attention_weight)
            mol_attention_weight_viz.append(mol_attention_weight)

            activated_features_transform = self.mol_attend(
                self.dropout(activated_features))
            mol_context = torch.sum(
                torch.mul(mol_attention_weight, activated_features_transform),
                -2)
            #             print(mol_context.shape,mol_context)
            mol_context = F.elu(mol_context)
            mol_feature = self.mol_GRUCell(mol_context, mol_feature)
            #             print(mol_feature.shape,mol_feature)

            mol_feature_unbounded_viz.append(mol_feature)
            #do nonlinearity
            activated_features_mol = F.relu(mol_feature)
            mol_feature_viz.append(activated_features_mol)

        mol_prediction = self.output(self.dropout(mol_feature))

        return atom_feature_viz, atom_attention_weight_viz, mol_feature_viz, mol_feature_unbounded_viz, mol_attention_weight_viz, mol_prediction
 def forward(self, x):
     x = F.elu(self.fc1(x))
     x = F.softmax(self.fc2(x), dim=1)
     return x
 def forward(self, x):
     x = F.elu(self.fc1(x))
     value = self.fc2(x)
     return value.squeeze()
Beispiel #21
0
 def forward(self, x, edge_index):
     x = F.dropout(x, p=self.dropout, training=self.training)
     x = F.elu(self.conv1(x, edge_index))
     x = F.dropout(x, p=self.dropout, training=self.training)
     x = F.elu(self.conv2(x, edge_index))
     return F.log_softmax(x, dim=1)
Beispiel #22
0
def concat_elu(x):
    """ like concatenated ReLU (http://arxiv.org/abs/1603.05201), but then with ELU """
    # Pytorch ordering
    axis = len(x.size()) - 3
    return F.elu(torch.cat([x, -x], dim=axis))
Beispiel #23
0
 def forward(self, x, edge_index):
     x = F.elu(self.conv1(x, edge_index) + self.lin1(x))
     x = F.elu(self.conv2(x, edge_index) + self.lin2(x))
     x = self.conv3(x, edge_index) + self.lin3(x)
     return x
Beispiel #24
0
 def forward(self):
     x = F.dropout(data.x, p=0.6, training=self.training)
     x = F.elu(self.conv1(x, data.edge_index))
     x = F.dropout(x, p=0.6, training=self.training)
     x = self.conv2(x, data.edge_index)
     return F.log_softmax(x, dim=1)
Beispiel #25
0
    def _get_gcn_output(self,
                        input_word_orig,
                        input_word,
                        input_char,
                        adjs,
                        target=None,
                        mask=None,
                        length=None,
                        hx=None,
                        leading_symbolic=0,
                        return_edge=False,
                        show_net=False,
                        graph_types=['coref']):
        if "wonderful" in graph_types:
            gold_adj = adjs[:, -1, :].clone()
            gnn_adjs = adjs[:, :-1, :]

        mask_singles = self.mask_singles

        assert len(input_word.size()) == 3, "the input is not document level"
        # input_word is the packed sents [n_sent, sent_len]
        input_word, input_char, target, sent_mask, length, doc_n_sent = self._doc2sent(
            input_word, input_char, target, show_net=show_net)

        # input: [n_sent, sent_len, enc_dim]
        input, length = self._get_word_enc(input_word_orig,
                                           input_word,
                                           input_char,
                                           mask=sent_mask,
                                           length=length,
                                           show_net=show_net)

        # output from rnn [n_sent, sent_len, enc_dim]
        sent_output, hn = self._get_rnn_enc(input,
                                            length,
                                            sent_mask,
                                            hx,
                                            show_net=show_net)

        # flatten sents to words [batch, n_word, dim]
        # mask for packed_doc [batch, n_word]
        output, doc_word_mask = self._sent2word(sent_output,
                                                sent_mask,
                                                doc_n_sent,
                                                show_net=show_net)

        # enc for non-repetitive words

        if mask_singles:
            if show_net:
                print("[Net] Block singles from here.")

            coref_ix = 0
            # single is 1, repetitive word is 0
            single_mask = gnn_adjs[:,
                                   coref_ix].sum(-1,
                                                 keepdim=True).eq(0).float()
            sent_single_mask = self._word2sent(single_mask,
                                               doc_word_mask,
                                               length,
                                               sent_mask,
                                               show_net=show_net)
            singles = sent_output * sent_single_mask.expand_as(sent_output)
            if self.tag_space:
                # [batch, length, tag_space]
                singles = self.dropout_tag(
                    F.elu(self.lstm_to_tag_space(singles)))
                if show_net:
                    print("singles -> self.lstm_to_tag_space")
                singles = singles * sent_single_mask.expand_as(singles)

            # [batch, n_word, d_graph]
            output = output * (1 - single_mask).expand_as(output)

        # go thru gcn [batch, n_word, d_graph]
        h_gcn, *_ = self.gcn(output,
                             gnn_adjs,
                             doc_word_mask,
                             return_edge=return_edge,
                             show_net=show_net)

        output = self._word2sent(h_gcn,
                                 doc_word_mask,
                                 length,
                                 sent_mask,
                                 show_net=show_net)

        if self.post_lstm:
            # output from rnn [n_sent, sent_len, enc_dim]
            output, hn = self._get_rnn_enc2(output,
                                            length,
                                            sent_mask,
                                            hx,
                                            show_net=show_net)

        # output from rnn_out [batch, length, tag_space]
        output = self.dropout_tag(F.elu(self.to_tag_space(output)))
        if show_net:
            print("<")
            print("[Net] to_tag")
            show_var(["self.to_tag_space"])
            show_var(["F.elu"])
            show_var(["self.dropout_tag"])
            print(">")

        if mask_singles:
            output = output * (1 - sent_single_mask).expand_as(output)
            output = output + singles  # repetive word enc + single word enc
            if show_net:
                print("[Net] output + singles")

        if length is not None:
            max_len = length.max()
            target = target[:, :max_len]

        adj_loss = self._adj_loss(
            gnn_adjs[:, 0, :], gold_adj) if "wonderful" in graph_types else 0
        return output, target, sent_mask, length, adj_loss
Beispiel #26
0
 def forward(self, x):
     x = F.dropout(x, p=0.6, training=self.training)
     x = F.elu(self.conv1(x))
     x = F.dropout(x, p=0.6, training=self.training)
     x = self.conv2(x)
     return F.log_softmax(x, dim=1)
Beispiel #27
0
 def forward(self, x, down_transform):
     out = F.elu(self.conv(x))
     out = Pool(out, down_transform)
     return out
 def decode(self, z):
     # g
     h33 = F.elu(self.fc3(z))  #20-200
     h3 = F.elu(self.fc33(h33))  #200-450
     return F.sigmoid(self.fc4(h3))  #450-784
Beispiel #29
0
 def forward(self, x, up_transform):
     out = Pool(x, up_transform)
     out = F.elu(self.conv(out))
     return out
Beispiel #30
0
def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * F.elu(x, alpha)
    def forward(self, x):
        x = F.elu(self.conv0a(x))
        self.li0 = x = F.elu(self.bn0a(self.conv0b(x)))

        x = self.ma1(x)
        x = F.elu(self.conv1a(x))
        self.li1 = x = F.elu(self.bn1a(self.conv1b(x)))

        x = self.ma2(x)
        x = F.elu(self.conv2a(x))
        self.li2 = x = F.elu(self.bn2a(self.conv2b(x)))

        x = self.ma3(x)
        x = F.elu(self.conv3a(x))
        self.li3 = x = F.elu(self.bn3a(self.conv3b(x)))

        x = F.interpolate(x, scale_factor=2, mode="nearest")

        x = F.elu(self.conv2u(x))
        x = torch.cat([x, self.li2], 1)
        x = F.elu(self.bn2u(self.conv2v(x)))

        self.lo1 = x
        x = F.interpolate(x, scale_factor=2, mode="nearest")

        x = F.elu(self.conv1u(x))
        x = torch.cat([x, self.li1], 1)
        x = F.elu(self.bn1u(self.conv1v(x)))

        x = F.interpolate(x, scale_factor=2, mode="nearest")
        self.la1 = x

        x = F.elu(self.conv0u(x))
        x = torch.cat([x, self.li0], 1)
        x = F.elu(self.bn0u(self.conv0v(x)))

        self.out = x = self.conv1x(x)
        x = torch.sigmoid(x)
        return x
Beispiel #32
0
 def forward(self, x):
     x = F.elu(self.map1(x))
     x = F.elu(self.map2(x))
     return F.sigmoid(self.map3(x))
Beispiel #33
0
 def forward(self, input):
     output = F.elu(self.fc1(input))
     output = F.elu(self.fc2(output))
     output = self.fc3(output)
     return output
Beispiel #34
0
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from itertools import chain  # 合并 generator
##################################################################
## Initialization
print(nn.init.calculate_gain('relu'))  # 1.4142135623730951
print(nn.init.calculate_gain('leaky_relu'))  # 1.4141428569978354

w = torch.empty(3, 5); print(w)  # Returns a tensor filled with uninitialized data
print(nn.init.xavier_uniform_(w))
print(nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu')))
##################################################################
## Activation
x = torch.Tensor([1]); print(x)
print(F.elu(torch.Tensor([1, 0, -1])))  # tensor([ 1.0000,  0.0000, -0.6321]); ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1));  alpha=1.0
print(F.relu(torch.Tensor([1, 0, -1])))  # tensor([ 1.,  0.,  0.])
print(torch.sigmoid(x), F.softmax(x, dim=-1))  # tensor([0.7311]) tensor([1.]); softmax 对应一个数的时候, 输出全为 1
print(torch.sigmoid(torch.Tensor([0, 1, 2, 3])))  # tensor([0.5000, 0.7311, 0.8808, 0.9526])
print(torch.tanh(torch.Tensor([0, 1, 2, 3])))  # tensor([0.0000, 0.7616, 0.9640, 0.9951])

print(F.softmax(torch.Tensor([1, 2, 3]), dim=0))  # tensor([ 0.0900,  0.2447,  0.6652]); dim=0 should be added
print(F.softmax(F.softmax(torch.Tensor([1, 2, 3]), dim=0), dim=0))  # tensor([0.2535, 0.2959, 0.4506]); softmax() 不能 two times!!!
print(np.log(F.softmax(torch.Tensor([1, 2, 3]), dim=0)))  # tensor([-2.4076, -1.4076, -0.4076])
print(F.log_softmax(torch.Tensor([1, 2, 3]), dim=0))  # tensor([-2.4076, -1.4076, -0.4076]); equal to log(softmax(x))
print(F.log_softmax(torch.Tensor([[1, 2, 3], [4, 5, 6]]), dim=1))  # tensor([[-2.4076, -1.4076, -0.4076], [-2.4076, -1.4076, -0.4076]])
print(F.log_softmax(torch.Tensor([[1, 2, 3], [4, 5, 6]]), dim=-1))  # tensor([[-2.4076, -1.4076, -0.4076], [-2.4076, -1.4076, -0.4076]]); -1 结果一样

## Plot
x = torch.linspace(-5, 5, 200); print(type(x))  # <class 'torch.Tensor'>
x_np = x.data.numpy(); print(type(x_np))   # <class 'numpy.ndarray'>; matplotlib don't support type of Torch; equal to ```x_np = np.linspace(-5, 5, 200); print(type(x_np))```
Beispiel #35
0
 def forward(self, x, adj):
     x = F.dropout(x, self.dropout, training=self.training)
     x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
     x = F.dropout(x, self.dropout, training=self.training)
     x = F.elu(self.out_att(x, adj))
     return F.log_softmax(x, dim=1)
Beispiel #36
0
 def f_enc(self, env, args):
     merge = torch.cat([env, args], -1)
     elu = F.elu(self.fenc1(merge))
     elu = F.elu(self.fenc2(elu))
     out = self.fenc3(elu)
     return out
Beispiel #37
0
def elu(x: T.Tensor, **kwargs):
    """
    ELU activation.
    """

    return F.elu(x, kwargs.get('alpha', 1.), kwargs.get('inplace', False))
Beispiel #38
0
 def forward(self, x):
     x = F.elu(self.map1(x))  # (1, 2*D_I) -> (1, D_H)
     x = F.elu(self.map2(x))
     return torch.sigmoid(self.map3(x)).squeeze(0)
 def forward(self, x):
    return F.elu( self.bn( self.conv3d( F.interpolate(x, scale_factor=self.scale_factor, mode='trilinear', align_corners=False) ) ), inplace=False )
Beispiel #40
0
 def forward(self, x):
     x = F.elu(self.fc1(x))
     x = self.fc3(x)
     return x
 def forward(self, x):
    return F.elu( self.bn( self.conv3d( x ) ), inplace=False )
Beispiel #42
0
def concat_elu(x):
    """ like concatenated ReLU (http://arxiv.org/abs/1603.05201), but then with ELU """
    # Pytorch ordering
    axis = len(x.size()) - 3
    return F.elu(torch.cat([x, -x], dim=axis))
Beispiel #43
0
 def forward(self, x, adj):
     x = F.dropout(x, self.dropout, training=self.training)
     x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
     x = F.dropout(x, self.dropout, training=self.training)
     x = F.elu(self.out_att(x, adj))
     return F.log_softmax(x, dim=1)
 def forward(self, x):
     x = F.elu(self.map1(x))
     x = F.elu(self.map2(x))
     return F.sigmoid(self.map3(x))
Beispiel #45
0
    def forward(self, g, h):
        #h = self.dropout(inputs)
        # if self.training:
        #     for l, layer in enumerate(self.layers):
        #         torch.cuda.synchronize()
        #         t1 = time.perf_counter()
        #         h = layer(g, h)
        #         torch.cuda.synchronize()
        #         print("conv", l, "forward time: ", time.perf_counter() - t1)
        #         h.register_hook(hook_gcn)
        #         h = F.relu(h)
        #         h.register_hook(hook_relu)
        #         #h = self.dropout(h)
        #     torch.cuda.synchronize()
        #     t2 = time.perf_counter()
        #     h = self.avgpooling(g, h)
        #     torch.cuda.synchronize()
        #     print("pooling forward time: ", time.perf_counter() - t2)
        #     h.register_hook(hook_pool)
        #
        #     torch.cuda.synchronize()
        #     t3 = time.perf_counter()
        #     h = self.fc1(h)
        #     torch.cuda.synchronize()
        #     print("fc1 forward time: ", time.perf_counter() - t3)
        #     h.register_hook(hook)
        #
        #     h = F.elu(h)
        #     h.register_hook(hook_relu)
        #
        #     torch.cuda.synchronize()
        #     t4 = time.perf_counter()
        #     h = self.fc2(h)
        #     torch.cuda.synchronize()
        #     print("fc2 forward time: ", time.perf_counter() - t4)
        #     h.register_hook(hook)
        #     #h = self.readout(h)
        #     h = F.log_softmax(h, dim=0)
        #     h.register_hook(hook)
        # else:
        for l, layer in enumerate(self.layers):
            t1 = time.perf_counter()
            h = layer(g, h)
            print("conv", l, "forward time: ", time.perf_counter() - t1)
            h = F.relu(h)
            # h = self.dropout(h)

        t2 = time.perf_counter()
        h = self.avgpooling(g, h)
        print("pooling forward time: ", time.perf_counter() - t2)

        t3 = time.perf_counter()
        h = self.fc1(h)
        print("fc1 forward time: ", time.perf_counter() - t3)

        h = F.elu(h)

        t4 = time.perf_counter()
        h = self.fc2(h)
        print("fc2 forward time: ", time.perf_counter() - t4)
        # h = self.readout(h)
        h = F.log_softmax(h, dim=0)
        return h
Beispiel #46
0
 def _activate(self, x, predict):
     return F.elu(x)
Beispiel #47
0
 def forward(self, x):
     p = int(np.floor((self.kernel_size-1)/2))
     p2d = (p, p, p, p)
     x = self.conv_base(F.pad(x, p2d))
     x = self.normalize(x)
     return F.elu(x, inplace=True)
Beispiel #48
0
def concat_elu(x):
    return F.elu(torch.cat([x, -x], 1))
Beispiel #49
0
    def forward(self, z):
        h1 = F.elu( self.fc1(z) )
        h2 = F.elu( self.fc2(h1) )
        h3 = F.elu( self.fc3(h2) )

        return F.sigmoid( self.fc4(h3) )