def main(): # Read sentences sentences = readFile("words2.txt") print(sentences) # Make uniq words list words = [] uniqWords = [] for sentence in sentences: for word in sentence: words.append(word) if word not in uniqWords: uniqWords.append(word) print(uniqWords) uniqWordSize = len(uniqWords) # Make trainPairs trainPairs = trainGenerator(sentences, uniqWords) dims = 5 W1 = Variable(torch.randn(dims, uniqWordSize).float(), requires_grad=True) W2 = Variable(torch.randn(uniqWordSize, dims).float(), requires_grad=True) epo = 1001 for i in range(epo): avg_loss = 0 samples = 0 for x, y in trainPairs: x = Variable(torch.from_numpy(x)).float() y = Variable(torch.from_numpy(np.array([y])).long()) samples += len(y) a1 = torch.matmul(W1, x) a2 = torch.matmul(W2, a1) logSoftmax = F.log_softmax(a2, dim=0) loss = F.nll_loss(logSoftmax.view(1, -1), y) loss.backward() avg_loss += loss.item() W1.data -= 0.002 * W1.grad.data W2.data -= 0.002 * W2.grad.data W1.grad.data.zero_() W2.grad.data.zero_() if i != 0 and 100 < i and i % 100 == 0: print(avg_loss / samples) parisVecter = W1[:, uniqWords.index('paris')].data.numpy() context_to_predict = parisVecter hidden = Variable(torch.from_numpy(context_to_predict)).float() a = torch.matmul(W2, hidden) probs = F.softmax(a, dim=0).data.numpy() for context, prob in zip(uniqWords, probs): print(f'{context}: {prob:.2f}')
def _train_embeddings(self, epochs, lr): for epoch in range(epochs): loss_val = 0 for data, target in self.idx_pairs: x = torch.zeros(self.n_vocab).float() x[data] = 1.0 y_true = Variable(torch.from_numpy(np.array([target])).long()) z1 = torch.matmul(self.W1, x) z2 = torch.matmul(self.W2, z1) log_softmax = F.log_softmax(z2, dim=0) ''' print('data') print(data) print('target') print(target) print('y_true') print(y_true) print('log_softmax') print(log_softmax) ''' loss = F.nll_loss(log_softmax.view(1, -1), y_true) loss_val += loss.data.item() loss.backward() self.W1.data -= lr * self.W1.grad.data self.W2.data -= lr * self.W2.grad.data self.W1.grad.data = self.W1.grad.data.zero_() self.W2.grad.data = self.W2.grad.data.zero_() if epoch % 10 == 0: print('Loss at epoch {}: {}'.format( epoch, loss_val / len(self.idx_pairs)))
def train(num_epochs=100, lr=0.001): embedding_size = 10 W1 = Variable(torch.randn(embedding_size, vocab_size).float(), requires_grad=True) W2 = Variable(torch.randn(vocab_size, embedding_size).float(), requires_grad=True) for epoch in range(num_epochs): loss_val = 0 for data, target in dataset: x = Variable(input_layer(data)).float() y_true = Variable(torch.from_numpy(np.array([target])).long()) z1 = torch.matmul(W1, x) z2 = torch.matmul(W2, z1) log_softmax = F.log_softmax(z2, dim=0) loss = F.nll_loss(log_softmax.view(1, -1), y_true) loss_val += loss.item() loss.backward() W1.data -= lr * W1.grad.data W2.data -= lr * W2.grad.data W1.grad.data.zero_() W2.grad.data.zero_() if epoch % 10 == 0: print(f'Loss at epoch {epoch}: {loss_val/len(dataset)}')
def rnn_senti(X_train, y_train, X_test, y_test): rnn = DoubanRNN().to(device) optimizer = torch.optim.Adam(rnn.parameters(), lr=1e-4) print('prepare datasets for RNN...') train_loader = torch.utils.data.DataLoader(DoubanCommentsDataset( X_train, y_train), batch_size=BATCH_SIZE, shuffle=True, num_workers=4) test_loader = torch.utils.data.DataLoader(DoubanCommentsDataset( X_test, y_test), batch_size=BATCH_SIZE, shuffle=False, num_workers=4) # Train the model total_step = len(train_loader) rnn.train() for epoch in range(EPOCH): for i, data in enumerate(train_loader): ft, senti = data['ft'].reshape( -1, BATCH_SIZE, 30).to(device), data['senti'].to(device) # Forward pass outputs = rnn.forward(ft) loss = F.nll_loss(outputs, senti) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, EPOCH, i + 1, total_step, loss.item())) # Test the model rnn.eval() with torch.no_grad(): correct = 0 total = 0 for data in test_loader: ft, senti = data['ft'].to(device), data['senti'].to(device) outputs = rnn(ft) _, predicted = torch.max(outputs.data, 1) total += senti.size(0) correct += (predicted == senti).sum().item() print('Test Accuracy of the model on test set: {} %'.format( 100 * correct / total))
def evaluate(net, dataloader, num_ens=1): """Calculate ensemble accuracy and NLL""" accs = [] nlls = [] for i, (inputs, labels) in enumerate(dataloader): inputs = torch.autograd.Variable(inputs.cuda(async=True)) labels = torch.autograd.Variable(labels.cuda(async=True)) outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).cuda() for j in range(num_ens): outputs[:, :, j] = F.log_softmax(net(inputs), dim=1).data accs.append(logits2acc(logmeanexp(outputs, dim=2), labels)) nlls.append( F.nll_loss(torch.autograd.Variable(logmeanexp(outputs, dim=2)), labels, size_average=False).data.cpu().numpy()) return np.mean(accs), np.sum(nlls)
def main(): tokens = tokenize(corpus) vocabulary = set(sum(tokens, [])) # sum() flattens the 2d list vocab_size = len(vocabulary) cc_pair = generate_center_context_pair(tokens, 2) # pprint(cc_pair) word2idx = word2index(tokens) idx2word = {key: val for (val, key) in word2idx.items()} print(word2idx) print(idx2word) idx_pairs = get_idxpairs(cc_pair, word2idx) idx_pairs = np.array(idx_pairs) embedding_dims = 5 W1 = Variable(torch.randn(embedding_dims, vocab_size).float(), requires_grad=True) W2 = Variable(torch.randn(vocab_size, embedding_dims).float(), requires_grad=True) max_iter = int(sys.argv[1]) learning_rate = 0.001 for i in range(max_iter): loss_val = 0 for data, target in idx_pairs: x = Variable(get_input_layer(data, vocab_size)).float() y_true = Variable(torch.from_numpy(np.array([target])).long()) z1 = torch.matmul(W1, x) z2 = torch.matmul(W2, z1) log_softmax = F.log_softmax(z2, dim=0) loss = F.nll_loss(log_softmax.view(1, -1), y_true) loss_val += loss.item() loss.backward() W1.data -= learning_rate * W1.grad.data W2.data -= learning_rate * W2.grad.data W1.grad.data.zero_() W2.grad.data.zero_() if i % 10 == 0: print(f"Loss at iter {i}: {loss_val/len(idx_pairs)}")
def MNISTtest(test_loader, model): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output, _ = model(data) test_loss += F.nll_loss( output, target, size_average=False).data[0] # sum up batch loss pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() test_loss /= len(test_loader.dataset) logging.info( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) return 100.0 * correct / len(test_loader.dataset)
def MNISTtrain(train_loader, model, epochs): with_cuda = torch.cuda.is_available() lr = 0.01 momentum = 0.5 model.train() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) for epoch in range(epochs): for batch_idx, (data, target) in enumerate(train_loader): if with_cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output, _ = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % 100 == 0: logging.info( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]))
def train(): W1 = torch.randn(EMBEDDING_DIMENSION, VOCAB_SIZE, dtype=torch.float, device=DEVICE, requires_grad=True) W2 = torch.randn(VOCAB_SIZE, EMBEDDING_DIMENSION, dtype=torch.float, device=DEVICE, requires_grad=True) dataloader = DataLoader(MSMARCO('data/pairs.txt'), MB_SIZE, shuffle=True) epoch = 0 for center, context in dataloader: if epoch > EPOCHS: break total_loss = 0 for i in tqdm(range(0, MB_SIZE)): x = Variable(get_input_layer(center[i])).float().to(DEVICE) y = Variable(torch.from_numpy(np.array([context[i] ])).long()).to(DEVICE) z1 = torch.matmul(W1, x).to(DEVICE) z2 = torch.matmul(W2, z1).to(DEVICE) log_softmax = F.log_softmax(z2, dim=0).to(DEVICE) loss = F.nll_loss(log_softmax.view(1, -1), y) total_loss += loss.item() loss.backward() W1.data -= learning_rate * W1.grad.data W2.data -= learning_rate * W2.grad.data tmp = W1.grad.data.zero_() tmp = W2.grad.data.zero_() del x, y, z1, z2, log_softmax, loss, tmp torch.cuda.empty_cache() epoch += 1 print_message("Epoch {}: loss {}".format(epoch, total_loss / MB_SIZE)) idx2vec = W2.data.cpu().numpy() pickle.dump(idx2vec, open('data/idx2vec.txt', 'wb')) print_message("Word2Vec Finished Training")
def run_model(vocabulary_size: int, documents: list, word2idx: dict, embedding_dims: int = 128, num_epochs: int = 101, learning_rate: float = 0.001): W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True) W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True) for epo in range(num_epochs): start_time = time.time() loss_val = 0 idx_pairs = create_idx_pairs(documents, word2idx) for data, target in idx_pairs: x = Variable(get_input_layer(data)).float() y_true = Variable(torch.from_numpy(np.array([target])).long()) z1 = torch.matmul(W1, x) z2 = torch.matmul(W2, z1) log_softmax = F.log_softmax(z2, dim=0) loss = F.nll_loss(log_softmax.view(1, -1), y_true) loss_val += loss.data.item() loss.backward() with torch.no_grad(): W1 -= learning_rate * W1.grad W2 -= learning_rate * W2.grad W1.grad.zero_() W2.grad.zero_() print('Loss at epo {0}: {1}; {2} seconds'.format( epo, loss_val / len(idx_pairs), int(time.time() - start_time))) return W1, W2
loss_value = 0 for data, target in index_pairs: x = Variable(get_input_layer(data)).float() t_ini = np.array([target]) label_Y = Variable(torch.from_numpy(t_ini).long()) mult_1 = torch.matmul(W1, x) mult_2 = torch.matmul(W2, mult_1) log_softmax = F.log_softmax(mult_2, dim=0) loss = F.nll_loss(log_softmax.view(1, -1), label_Y) loss_value += loss.data loss.backward() W1.data -= learning_rate * W1.grad.data W2.data -= learning_rate * W2.grad.data W1.grad.data.zero_() W2.grad.data.zero_() total = len(index_pairs) t_loss = loss_value / total loss_array.append(t_loss) # Print Loss per epoch
def forward(self, pred, target): ''' pred should be the linear output. softmax will be calculated here ''' batch_size = pred.data.size(0) pred = pred.view(batch_size, self.M + self._num_proto * self.N) if self._num_proto > 1: if self._multi_policy_proto == 'max_softmax': first = pred[:, :(self.N * self._num_proto)].contiguous().view( batch_size, self.N, self._num_proto) first_max, _ = torch.max(first, dim=2) second = pred[:, (self.N * self._num_proto):] pred = torch.cat((first_max, second), dim=1) prediction = F.softmax(pred, dim=1) elif self._multi_policy_proto == 'softmax_sum': prediction = F.softmax(pred, dim=1) first = prediction[:, :(self.N * self._num_proto)].contiguous().view( batch_size, self.N, self._num_proto) first_sum = torch.sum(first, dim=2) second = prediction[:, (self.N * self._num_proto):] prediction = torch.cat((first_max, second), dim=1) else: prediction = F.softmax(pred, dim=1) loss = 0 # cross entropy loss loss_ce = 0 if 'cross_entropy' in self.loss_type: prob_N = prediction.index_select( 1, torch.autograd.Variable(torch.arange(0, self.N).long().cuda())) prob_M = prediction.index_select( 1, torch.autograd.Variable( torch.arange(self.N, self.N + self.M).long().cuda())) prob_sM = torch.sum(prob_M, 1, keepdim=True) prob_N1 = torch.cat((prob_N, prob_sM), dim=1) log_prob_N1 = torch.log(prob_N1 + self.eps) loss_ce = F.nll_loss(log_prob_N1, target) loss += loss_ce * self.loss_type.get('cross_entropy', 1) # entropy loss loss_en = 0 if 'entropy_loss' in self.loss_type or \ 'uniform_loss' in self.loss_type: negative_prob_M = prob_M[( target.data == self.N).nonzero().squeeze(1), :] norm_neg_prob_M = negative_prob_M / ( torch.sum(negative_prob_M, dim=1) + self.eps).view( -1, 1).expand_as(negative_prob_M) if 'entropy_loss' in self.loss_type: #loss_en = - torch.mean(torch.sum(norm_neg_prob_M * torch.log(norm_neg_prob_M+ #self.eps), dim=1)) loss_en = -torch.mean( torch.sum(prediction * torch.log(prediction + self.eps), dim=1)) loss += loss_en * self.loss_type.get('entropy_loss', 1) # loss to make sure all loss_uniform = 0 if 'uniform_loss' in self.loss_type: avg_norm_neg_prob_M = torch.mean(norm_neg_prob_M, dim=0) loss_uniform = -torch.mean( torch.log(avg_norm_neg_prob_M + self.eps)) - Variable( torch.log(torch.FloatTensor([self.M]).cuda())) #loss_uniform *= Variable(torch.FloatTensor([0.001]).cuda()) loss += loss_uniform * self.loss_type.get('uniform_loss', 1) if (self._iter % 100) == 0: logging.info( 'loss ce = {}; loss en = {}; loss uniform = {}'.format( loss_ce.data.cpu()[0], loss_en.data.cpu()[0], loss_uniform.data.cpu()[0])) if 'max_out' in self.loss_type: pred_N = pred.index_select( 1, torch.autograd.Variable(torch.arange(0, self.N).long().cuda())) pred_M = pred.index_select( 1, torch.autograd.Variable( torch.arange(self.N, self.N + self.M).long().cuda())) pred_maxM, _ = torch.max(pred_M, dim=1, keepdim=True) pred_NmaxM = torch.cat((pred_N, pred_maxM), dim=1) loss += self._ce(pred_NmaxM, target) self._iter = self._iter + 1 return loss
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True) num_epochs = 101 learning_rate = 0.001 for epo in range(num_epochs): loss_val = 0 for data, target in idx_pairs: x = Variable(get_input_layer(data)).float() y_true = Variable(torch.from_numpy(np.array([target])).long()) z1 = torch.matmul(W1, x) z2 = torch.matmul(W2, z1) log_softmax = F.log_softmax(z2, dim=0) loss = F.nll_loss(log_softmax.view(1,-1), y_true) loss_val += loss.item() loss.backward() W1.data -= learning_rate * W1.grad.data W2.data -= learning_rate * W2.grad.data W1.grad.data.zero_() W2.grad.data.zero_() #if epo % 10 == 0: print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}') #%% W1numpy = torch.Tensor.cpu(W1).detach().numpy() W2numpy = torch.Tensor.cpu(W2).detach().numpy() #%%
def loss(self, input, target): pred = self.proj(input) # batch x seq x tags pred = pred.view(-1, self.tags_num) # ... x tags target = target.flatten() # # batch x seq -> ... return F.nll_loss(pred, target)
def forward(self, input, target): nll_loss = F.nll_loss(input, target, weight=self.weight) return {'nll': nll_loss}
def test_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = F.nll_loss(logits, y) return {"test_loss": loss}
def cross_entropy(input, target): return F.nll_loss(input, target)
def word2vec(self, words): vocabulary = [] for token in words: if token not in vocabulary: vocabulary.append(token) word2idx = {w: idx for (idx, w) in enumerate(vocabulary)} idx2word = {idx: w for (idx, w) in enumerate(vocabulary)} vocabulary_size = len(vocabulary) window_size = 2 idx_pairs = [] # for sentence in words: indices = [word2idx[word] for word in words] for center_word_pos in range(len(indices)): for w in range(-window_size, window_size + 1): context_word_pos = center_word_pos + w if context_word_pos < 0 or context_word_pos >= len( indices) or center_word_pos == context_word_pos: continue context_word_idx = indices[context_word_pos] idx_pairs.append((indices[center_word_pos], context_word_idx)) idx_pairs = np.array(idx_pairs) embedding_dims = 5 W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True) W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True) num_epochs = 1 learning_rate = 0.01 for epo in range(num_epochs): loss_val = 0 for data, target in idx_pairs: x = Variable(self.get_input_layer(data, vocabulary_size)).float() y_true = Variable(torch.from_numpy(np.array([target])).long()) z1 = torch.matmul(W1, x) z2 = torch.matmul(W2, z1) log_softmax = F.log_softmax(z2, dim=0) loss = F.nll_loss(log_softmax.view(1, -1), y_true) loss_val += loss.item() loss.backward() W1.data -= learning_rate * W1.grad.data W2.data -= learning_rate * W2.grad.data W1.grad.data.zero_() W2.grad.data.zero_() if epo % 10 == 0: print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}') tmp = [] for data, target in idx_pairs: x = Variable(self.get_input_layer(data, vocabulary_size)).float() z1 = torch.matmul(W1, x) tmp.append(z1) return tmp
def cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100, reduction='mean'): if size_average: reduction = 'mean' return F.nll_loss(torch.log(input + 1e-8), target, weight, None, ignore_index, None, reduction)