def dis_loss_function(fake, real, target, data_size=(256, 256)): y_in = dis(real, target) y_out = dis(fake, target) L1 = torch.sum( nn.softmax(-y_in)) / args.batch_size / data_size[0] / data_size[1] L2 = torch.sum( nn.softmax(y_out)) / args.batch_size / data_size[0] / data_size[1] return L1 + L2
def __init__(self, states_num, actions_num, hidden1=400, hidden2=300): super(Actor, self).__init__() self.fc1 = nn.Linear(states_num, hidden1) self.fc2 = nn.Linear(hidden1, hidden2) self.fc3 = nn.Linear(hidden2, actions_num) self.relu = nn.ReLU() self.softmax = nn.softmax()
def __init__(self): super().__init__() self.fc1 = nn.Linear(784, 256) self.fc2 = nn.Linear(256, 10) self.sigmoid = nn.Sigmoid() # 看这里 self.softmax = nn.softmax(dim=1)
def __init__(self, incoming, num_units, nonlinearity = nn.ReLU, **kwargs): super(NeuralNet, self).__init__() self.nonlinearity=nonlinearity num_inputs = int(np.prod(self.input_shape[1:])) self.num_units = num_units self.fc1 = nn.Linear(num_inputs, self.num_units) if not EXP_SOFTMAX or self.nonlinearity != nn.softmax: self.nonlinearity = nonlinearity else: self.nonlinearity = nn.softmax()
def __init__(self, state_dim, action_dim, option_dim, hidden_dim): super(Option_Network, self).__init__() # Option architecture self.linear1 = nn.Linear(state_dim + action_dim, hidden_dim) self.linear2 = nn.Linear(hidden_dim, hidden_dim) self.linear3 = nn.Linear(hidden_dim, option_dim) self.out_option = nn.softmax(option_dim) self.apply(weights_init_)
def gumbel_softmax_sample(trng, logits, tau, U=None, hard=False): """Sample from Gumbel(0, 1)""" ylog = logits + sample_gumbel(trng, logits.shape, U=U) y = nn.softmax(ylog / tau) if hard: print 'Using hard gumbel' # Still working on this #one_hot = tensor.cast( tensor.eq(y, y.max(axis=-1,keepdims=1)) ,dtype=config.floatX) #y = theano.gradient.disconnected_grad(one_hot -y) + y return y
def forward( self, K, Q, V, mask ): # TODO (ldery) : mask contains -inf in places where needs to be for encoder K = torch.matmul(K, self.K_w) Q = torch.matmul(Q, self.K_w) V = torch.matmul(V, self.K_w) logits = torch.matmul(Q, K.T) / sqrt(self.out_dim) logits = logits + mask embeddings = torch.matmul(nn.softmax(logits, dim=1), V) return embeddings
def softmax(x): ndim = get_ndim(x) if ndim == 2: return nn.softmax(x) elif ndim == 3: e = torch.exp(x - model_ops.max(x, axis=-1, keepdims=True)) s = model_ops.sum(e, axis=-1, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor ' 'that is not 2D or 3D. ' 'Here, ndim=' + str(ndim))
def _get_att_weight(self, hidden, encoder_hiddens): seq_len = len(encoder_hiddens) # Create variable to store attention energies attn_scores = cuda_variable(torch.zeros(seq_len)) # B x 1 x S # Calculate energies for each encoder hidden for i in range(seq_len): attn_scores[i] = self.get_att_score(hidden, encoder_hiddens[i]) # Normalize scores to weights in range 0 to 1, # resize to 1 x 1 x seq_len # print("att_scores", attn_scores.size()) return nn.softmax(attn_scores).view(1, 1, -1)
def forward(self, x): x = F.relu(self.bn1(self.conv1(x))) x = self.pool(F.relu(self.bn2(self.conv2(x)))) x = self.dropoutConv(x) x = F.relu(self.bn3(self.conv3(x))) x = self.pool(F.relu(self.bn4(self.conv4(x)))) x = self.dropoutConv(x) x = F.relu(self.bn5(self.conv5(x))) x = self.pool(F.relu(self.bn6(self.conv6(x)))) x = self.dropoutConv(x) x = torch.flatten(x, 1) x = self.dropoutLinear(self.fc1(x)) x = self.dropoutLinear(self.fc2(x)) return nn.softmax(self.final(x))
def __init__(self, vocab, embedding_dim, hop, dropout, unk_mask): super(EncoderMemNN, self).__init__() self.num_vocab = vocab self.max_hops = hop self.embedding_dim = embedding_dim self.dropout = dropout self.unk_mask = unk_mask for hop in range(self.max_hops + 1): C = nn.Embedding(self.num_vocab, embedding_dim, padding_idx=PAD_token) C.weight.data.normal_(0, 0.1) self.add_module("C_{}".format(hop), C) self.C = AttrProxy(self, "C_") self.softmax = nn.softmax(dim=1)
def forward(self, input, hidden, encoder_outputs): embedded_out = self.embedding(input) attn_weights = nn.softmax( self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1) attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) output = torch.cat((embedded[0], attn_applied[0]), 1) output = self.attn_combine(output).unsqueeze(0) output = nn.relu(output) output, hidden = self.gru(output, hidden) output = nn.log_softmax(self.out(output[0]), dim=1) return output, hidden, attn_weights
def forward(self, inp, adj): """ inp: input_fea [N, in_features] in_features表示节点的输入特征向量元素个数 adj: 图的邻接矩阵 [N, N] 非零即一,数据结构基本知识""" h = torch.mm(inp, self.W) # [N, out_features] N = h.size()[0] # N 图的节点数 a_input = torch.cat([h.repeat(1, N).view(N * N, -1), h.repeat(N, 1)], dim=1).view(N, N, 2 * self.out_features) # [N, N, 2*out_features] e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(2)) # [N, N, 1] => [N, N] 图注意力的相关系数(未归一化) zero_vec = -1e12 * torch.ones_like(e) # 将没有连接的边置为负无穷 attention = torch.where(adj > 0, e, zero_vec) # [N, N] # 表示如果邻接矩阵元素大于0时,则两个节点有连接,该位置的注意力系数保留,否则需要mask并置为非常小的值,原因是softmax的时候这个最小值会不考虑。 attention = nn.softmax(attention, dim=1) # softmax形状保持不变 [N, N],得到归一化的注意力权重! attention = nn.dropout(attention, self.dropout, training=self.training) # dropout,防止过拟合 h_prime = torch.matmul(attention, h) # [N, N].[N, out_features] => [N, out_features] # 得到由周围节点通过注意力权重进行更新的表示 if self.concat: return self.elu(h_prime) else: return h_prime
def forward(self, pred, labels, index=None): # denominator denom = torch.log(nn.softmax(pred)).sum(-1) # numerator ce = self.cross_entropy(pred, labels) return ce / denom # class APLoss(torch.nn.Module): # def __init__(self, alpha, beta, num_classes=10): # super(SCELoss, self).__init__() # self.device = 'cuda' if torch.cuda.is_available() else 'cpu' # self.alpha = alpha # self.beta = beta # self.num_classes = num_classes # self.cross_entropy = torch.nn.CrossEntropyLoss() # self.A = math.exp(-4) # def forward(self, pred, labels, index=None): # # index is redundant input for SCELoss # # CCE # ce = self.cross_entropy(pred, labels) # # RCE # pred = F.softmax(pred, dim=1) # pred = torch.clamp(pred, min=1e-7, max=1.0) # label_one_hot = torch.nn.functional.one_hot(labels, self.num_classes).float().to(self.device) # label_one_hot = torch.clamp(label_one_hot, min=self.A, max=1.0) # rce = (-1*torch.sum(pred * torch.log(label_one_hot), dim=1)) # # Loss # loss = self.alpha * ce + self.beta * rce.mean() # return loss
def gumbel_softmax_sample_np(logits, tau): """Sample from Gumbel(0, 1)""" y = logits + sample_gumbel_np(logits.shape) return nn.softmax(y / tau)
def __init__(self, dimension: int): self.dim = dimension self.WK = nn.Linear(self.dim, self.dim) self.WV = nn.Linear(self.dim, self.dim) self.WQ = nn.Linear(self.dim, self.dim) self.softmax = nn.softmax(dim=2)
def __init__(self): super(Comparison, self).__init__() self.fc1 = nn.Linear(20, 64) self.fc2 = nn.Linear(64, 2) self.m = nn.softmax()
def __init__(self, n, vocab_size, dim, h): super(Net, self).__init__() self.embedding = nn.Embedding(vocab_size, dim) self.linear = nn.linear(dim, vocab_size) self.tanh = nn.tanh() self.softmax = nn.softmax()
def loss(self, nodes, labels): scores = self.forward(nodes) return self.xent(nn.softmax(scores), labels.squeeze())
net.eval() # set model to evaluation mode for images, label in testloader: # validation pass here pass # set model back to train mode model.train() ###################################### 写法二:前向使用torch.nn.functional函数 ############################## class Net(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(784, 256) self.fc2 = nn.Linear(256, 10) def forward(self, x): x = F.sigmoid(self.fc1(x)) x = F.dropout(x, p=0.5, training=self.training) # 看这里 设置training参数 x = F.softmax(self.fc2(x), dim=1) return x ###################################### 写法三:前向使用torch.nn.functional函数 ############################## net = nn.Sequential( nn.Linear(784, 256), nn.Dropout(0.5), # drop 50% of the neuron nn.Sigmoid(), nn.Linear(256, 10), nn.softmax(dim=1))
def __init__(self, dim=1): super(att_pool, self).__init__() self.attention = nn.softmax()