Ejemplo n.º 1
0
def main():

    # Read sentences
    sentences = readFile("words2.txt")
    print(sentences)

    # Make uniq words list
    words = []
    uniqWords = []
    for sentence in sentences:
        for word in sentence:
            words.append(word)
            if word not in uniqWords:
                uniqWords.append(word)
    print(uniqWords)
    uniqWordSize = len(uniqWords)

    # Make trainPairs
    trainPairs = trainGenerator(sentences, uniqWords)

    dims = 5
    W1 = Variable(torch.randn(dims, uniqWordSize).float(), requires_grad=True)
    W2 = Variable(torch.randn(uniqWordSize, dims).float(), requires_grad=True)

    epo = 1001

    for i in range(epo):
        avg_loss = 0
        samples = 0
        for x, y in trainPairs:
            x = Variable(torch.from_numpy(x)).float()
            y = Variable(torch.from_numpy(np.array([y])).long())

            samples += len(y)

            a1 = torch.matmul(W1, x)
            a2 = torch.matmul(W2, a1)

            logSoftmax = F.log_softmax(a2, dim=0)
            loss = F.nll_loss(logSoftmax.view(1, -1), y)
            loss.backward()

            avg_loss += loss.item()

            W1.data -= 0.002 * W1.grad.data
            W2.data -= 0.002 * W2.grad.data

            W1.grad.data.zero_()
            W2.grad.data.zero_()

            if i != 0 and 100 < i and i % 100 == 0:
                print(avg_loss / samples)

    parisVecter = W1[:, uniqWords.index('paris')].data.numpy()
    context_to_predict = parisVecter
    hidden = Variable(torch.from_numpy(context_to_predict)).float()
    a = torch.matmul(W2, hidden)
    probs = F.softmax(a, dim=0).data.numpy()
    for context, prob in zip(uniqWords, probs):
        print(f'{context}: {prob:.2f}')
Ejemplo n.º 2
0
    def _train_embeddings(self, epochs, lr):
        for epoch in range(epochs):
            loss_val = 0
            for data, target in self.idx_pairs:
                x = torch.zeros(self.n_vocab).float()
                x[data] = 1.0
                y_true = Variable(torch.from_numpy(np.array([target])).long())
                z1 = torch.matmul(self.W1, x)
                z2 = torch.matmul(self.W2, z1)
                log_softmax = F.log_softmax(z2, dim=0)
                '''
                print('data')
                print(data)
                print('target')
                print(target)
                print('y_true')
                print(y_true)
                print('log_softmax')
                print(log_softmax)
                '''
                loss = F.nll_loss(log_softmax.view(1, -1), y_true)
                loss_val += loss.data.item()
                loss.backward()
                self.W1.data -= lr * self.W1.grad.data
                self.W2.data -= lr * self.W2.grad.data

                self.W1.grad.data = self.W1.grad.data.zero_()
                self.W2.grad.data = self.W2.grad.data.zero_()
            if epoch % 10 == 0:
                print('Loss at epoch {}: {}'.format(
                    epoch, loss_val / len(self.idx_pairs)))
Ejemplo n.º 3
0
def train(num_epochs=100, lr=0.001):
    embedding_size = 10
    W1 = Variable(torch.randn(embedding_size, vocab_size).float(),
                  requires_grad=True)
    W2 = Variable(torch.randn(vocab_size, embedding_size).float(),
                  requires_grad=True)

    for epoch in range(num_epochs):
        loss_val = 0
        for data, target in dataset:
            x = Variable(input_layer(data)).float()
            y_true = Variable(torch.from_numpy(np.array([target])).long())

            z1 = torch.matmul(W1, x)
            z2 = torch.matmul(W2, z1)

            log_softmax = F.log_softmax(z2, dim=0)

            loss = F.nll_loss(log_softmax.view(1, -1), y_true)
            loss_val += loss.item()
            loss.backward()
            W1.data -= lr * W1.grad.data
            W2.data -= lr * W2.grad.data

            W1.grad.data.zero_()
            W2.grad.data.zero_()
        if epoch % 10 == 0:
            print(f'Loss at epoch {epoch}: {loss_val/len(dataset)}')
Ejemplo n.º 4
0
def rnn_senti(X_train, y_train, X_test, y_test):
    rnn = DoubanRNN().to(device)
    optimizer = torch.optim.Adam(rnn.parameters(), lr=1e-4)

    print('prepare datasets for RNN...')

    train_loader = torch.utils.data.DataLoader(DoubanCommentsDataset(
        X_train, y_train),
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=4)
    test_loader = torch.utils.data.DataLoader(DoubanCommentsDataset(
        X_test, y_test),
                                              batch_size=BATCH_SIZE,
                                              shuffle=False,
                                              num_workers=4)

    # Train the model
    total_step = len(train_loader)
    rnn.train()
    for epoch in range(EPOCH):
        for i, data in enumerate(train_loader):
            ft, senti = data['ft'].reshape(
                -1, BATCH_SIZE, 30).to(device), data['senti'].to(device)

            # Forward pass
            outputs = rnn.forward(ft)
            loss = F.nll_loss(outputs, senti)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
                    epoch + 1, EPOCH, i + 1, total_step, loss.item()))

    # Test the model
    rnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data in test_loader:
            ft, senti = data['ft'].to(device), data['senti'].to(device)
            outputs = rnn(ft)
            _, predicted = torch.max(outputs.data, 1)
            total += senti.size(0)
            correct += (predicted == senti).sum().item()

        print('Test Accuracy of the model on test set: {} %'.format(
            100 * correct / total))
Ejemplo n.º 5
0
def evaluate(net, dataloader, num_ens=1):
    """Calculate ensemble accuracy and NLL"""
    accs = []
    nlls = []
    for i, (inputs, labels) in enumerate(dataloader):
        inputs = torch.autograd.Variable(inputs.cuda(async=True))
        labels = torch.autograd.Variable(labels.cuda(async=True))
        outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).cuda()
        for j in range(num_ens):
            outputs[:, :, j] = F.log_softmax(net(inputs), dim=1).data
        accs.append(logits2acc(logmeanexp(outputs, dim=2), labels))
        nlls.append(
            F.nll_loss(torch.autograd.Variable(logmeanexp(outputs, dim=2)),
                       labels,
                       size_average=False).data.cpu().numpy())
    return np.mean(accs), np.sum(nlls)
Ejemplo n.º 6
0
def main():
    tokens = tokenize(corpus)
    vocabulary = set(sum(tokens, []))  # sum() flattens the 2d list
    vocab_size = len(vocabulary)
    cc_pair = generate_center_context_pair(tokens, 2)
    # pprint(cc_pair)

    word2idx = word2index(tokens)
    idx2word = {key: val for (val, key) in word2idx.items()}
    print(word2idx)
    print(idx2word)

    idx_pairs = get_idxpairs(cc_pair, word2idx)
    idx_pairs = np.array(idx_pairs)

    embedding_dims = 5
    W1 = Variable(torch.randn(embedding_dims, vocab_size).float(),
                  requires_grad=True)
    W2 = Variable(torch.randn(vocab_size, embedding_dims).float(),
                  requires_grad=True)
    max_iter = int(sys.argv[1])
    learning_rate = 0.001

    for i in range(max_iter):
        loss_val = 0
        for data, target in idx_pairs:
            x = Variable(get_input_layer(data, vocab_size)).float()
            y_true = Variable(torch.from_numpy(np.array([target])).long())

            z1 = torch.matmul(W1, x)
            z2 = torch.matmul(W2, z1)

            log_softmax = F.log_softmax(z2, dim=0)

            loss = F.nll_loss(log_softmax.view(1, -1), y_true)
            loss_val += loss.item()
            loss.backward()
            W1.data -= learning_rate * W1.grad.data
            W2.data -= learning_rate * W2.grad.data

            W1.grad.data.zero_()
            W2.grad.data.zero_()
        if i % 10 == 0:
            print(f"Loss at iter {i}: {loss_val/len(idx_pairs)}")
Ejemplo n.º 7
0
def MNISTtest(test_loader, model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output, _ = model(data)
        test_loss += F.nll_loss(
            output, target, size_average=False).data[0]  # sum up batch loss
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(test_loader.dataset)
    logging.info(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    return 100.0 * correct / len(test_loader.dataset)
Ejemplo n.º 8
0
def MNISTtrain(train_loader, model, epochs):
    with_cuda = torch.cuda.is_available()
    lr = 0.01
    momentum = 0.5
    model.train()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            if with_cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output, _ = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx % 100 == 0:
                logging.info(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, batch_idx * len(data),
                        len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), loss.data[0]))
Ejemplo n.º 9
0
def train():
    W1 = torch.randn(EMBEDDING_DIMENSION,
                     VOCAB_SIZE,
                     dtype=torch.float,
                     device=DEVICE,
                     requires_grad=True)
    W2 = torch.randn(VOCAB_SIZE,
                     EMBEDDING_DIMENSION,
                     dtype=torch.float,
                     device=DEVICE,
                     requires_grad=True)
    dataloader = DataLoader(MSMARCO('data/pairs.txt'), MB_SIZE, shuffle=True)
    epoch = 0
    for center, context in dataloader:
        if epoch > EPOCHS:
            break
        total_loss = 0
        for i in tqdm(range(0, MB_SIZE)):
            x = Variable(get_input_layer(center[i])).float().to(DEVICE)
            y = Variable(torch.from_numpy(np.array([context[i]
                                                    ])).long()).to(DEVICE)
            z1 = torch.matmul(W1, x).to(DEVICE)
            z2 = torch.matmul(W2, z1).to(DEVICE)
            log_softmax = F.log_softmax(z2, dim=0).to(DEVICE)
            loss = F.nll_loss(log_softmax.view(1, -1), y)
            total_loss += loss.item()
            loss.backward()
            W1.data -= learning_rate * W1.grad.data
            W2.data -= learning_rate * W2.grad.data
            tmp = W1.grad.data.zero_()
            tmp = W2.grad.data.zero_()
            del x, y, z1, z2, log_softmax, loss, tmp
            torch.cuda.empty_cache()
        epoch += 1
        print_message("Epoch {}: loss {}".format(epoch, total_loss / MB_SIZE))
    idx2vec = W2.data.cpu().numpy()
    pickle.dump(idx2vec, open('data/idx2vec.txt', 'wb'))
    print_message("Word2Vec Finished Training")
Ejemplo n.º 10
0
def run_model(vocabulary_size: int,
              documents: list,
              word2idx: dict,
              embedding_dims: int = 128,
              num_epochs: int = 101,
              learning_rate: float = 0.001):
    W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(),
                  requires_grad=True)
    W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(),
                  requires_grad=True)

    for epo in range(num_epochs):
        start_time = time.time()
        loss_val = 0
        idx_pairs = create_idx_pairs(documents, word2idx)
        for data, target in idx_pairs:
            x = Variable(get_input_layer(data)).float()
            y_true = Variable(torch.from_numpy(np.array([target])).long())

            z1 = torch.matmul(W1, x)
            z2 = torch.matmul(W2, z1)
            log_softmax = F.log_softmax(z2, dim=0)

            loss = F.nll_loss(log_softmax.view(1, -1), y_true)
            loss_val += loss.data.item()
            loss.backward()

            with torch.no_grad():
                W1 -= learning_rate * W1.grad
                W2 -= learning_rate * W2.grad

                W1.grad.zero_()
                W2.grad.zero_()

        print('Loss at epo {0}: {1}; {2} seconds'.format(
            epo, loss_val / len(idx_pairs), int(time.time() - start_time)))
    return W1, W2
Ejemplo n.º 11
0
    loss_value = 0

    for data, target in index_pairs:

        x = Variable(get_input_layer(data)).float()
        t_ini = np.array([target])

        label_Y = Variable(torch.from_numpy(t_ini).long())

        mult_1 = torch.matmul(W1, x)
        mult_2 = torch.matmul(W2, mult_1)

        log_softmax = F.log_softmax(mult_2, dim=0)

        loss = F.nll_loss(log_softmax.view(1, -1), label_Y)
        loss_value += loss.data

        loss.backward()

        W1.data -= learning_rate * W1.grad.data
        W2.data -= learning_rate * W2.grad.data

        W1.grad.data.zero_()
        W2.grad.data.zero_()

    total = len(index_pairs)
    t_loss = loss_value / total
    loss_array.append(t_loss)

# Print Loss per epoch
Ejemplo n.º 12
0
    def forward(self, pred, target):
        '''
        pred should be the linear output. softmax will be calculated here
        '''
        batch_size = pred.data.size(0)
        pred = pred.view(batch_size, self.M + self._num_proto * self.N)
        if self._num_proto > 1:
            if self._multi_policy_proto == 'max_softmax':
                first = pred[:, :(self.N * self._num_proto)].contiguous().view(
                    batch_size, self.N, self._num_proto)
                first_max, _ = torch.max(first, dim=2)
                second = pred[:, (self.N * self._num_proto):]
                pred = torch.cat((first_max, second), dim=1)
                prediction = F.softmax(pred, dim=1)
            elif self._multi_policy_proto == 'softmax_sum':
                prediction = F.softmax(pred, dim=1)
                first = prediction[:, :(self.N *
                                        self._num_proto)].contiguous().view(
                                            batch_size, self.N,
                                            self._num_proto)
                first_sum = torch.sum(first, dim=2)
                second = prediction[:, (self.N * self._num_proto):]
                prediction = torch.cat((first_max, second), dim=1)
        else:
            prediction = F.softmax(pred, dim=1)

        loss = 0

        # cross entropy loss
        loss_ce = 0
        if 'cross_entropy' in self.loss_type:
            prob_N = prediction.index_select(
                1,
                torch.autograd.Variable(torch.arange(0, self.N).long().cuda()))
            prob_M = prediction.index_select(
                1,
                torch.autograd.Variable(
                    torch.arange(self.N, self.N + self.M).long().cuda()))
            prob_sM = torch.sum(prob_M, 1, keepdim=True)
            prob_N1 = torch.cat((prob_N, prob_sM), dim=1)
            log_prob_N1 = torch.log(prob_N1 + self.eps)
            loss_ce = F.nll_loss(log_prob_N1, target)
            loss += loss_ce * self.loss_type.get('cross_entropy', 1)

        # entropy loss
        loss_en = 0
        if 'entropy_loss' in self.loss_type or \
                'uniform_loss' in self.loss_type:
            negative_prob_M = prob_M[(
                target.data == self.N).nonzero().squeeze(1), :]
            norm_neg_prob_M = negative_prob_M / (
                torch.sum(negative_prob_M, dim=1) + self.eps).view(
                    -1, 1).expand_as(negative_prob_M)

        if 'entropy_loss' in self.loss_type:
            #loss_en = - torch.mean(torch.sum(norm_neg_prob_M * torch.log(norm_neg_prob_M+
            #self.eps), dim=1))
            loss_en = -torch.mean(
                torch.sum(prediction * torch.log(prediction + self.eps),
                          dim=1))
            loss += loss_en * self.loss_type.get('entropy_loss', 1)

        # loss to make sure all
        loss_uniform = 0
        if 'uniform_loss' in self.loss_type:
            avg_norm_neg_prob_M = torch.mean(norm_neg_prob_M, dim=0)
            loss_uniform = -torch.mean(
                torch.log(avg_norm_neg_prob_M + self.eps)) - Variable(
                    torch.log(torch.FloatTensor([self.M]).cuda()))
            #loss_uniform *= Variable(torch.FloatTensor([0.001]).cuda())
            loss += loss_uniform * self.loss_type.get('uniform_loss', 1)
            if (self._iter % 100) == 0:
                logging.info(
                    'loss ce = {}; loss en = {}; loss uniform = {}'.format(
                        loss_ce.data.cpu()[0],
                        loss_en.data.cpu()[0],
                        loss_uniform.data.cpu()[0]))

        if 'max_out' in self.loss_type:
            pred_N = pred.index_select(
                1,
                torch.autograd.Variable(torch.arange(0, self.N).long().cuda()))
            pred_M = pred.index_select(
                1,
                torch.autograd.Variable(
                    torch.arange(self.N, self.N + self.M).long().cuda()))
            pred_maxM, _ = torch.max(pred_M, dim=1, keepdim=True)
            pred_NmaxM = torch.cat((pred_N, pred_maxM), dim=1)
            loss += self._ce(pred_NmaxM, target)

        self._iter = self._iter + 1
        return loss
Ejemplo n.º 13
0
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
num_epochs = 101
learning_rate = 0.001

for epo in range(num_epochs):
    loss_val = 0
    for data, target in idx_pairs:
        x = Variable(get_input_layer(data)).float()
        y_true = Variable(torch.from_numpy(np.array([target])).long())

        z1 = torch.matmul(W1, x)
        z2 = torch.matmul(W2, z1)
    
        log_softmax = F.log_softmax(z2, dim=0)

        loss = F.nll_loss(log_softmax.view(1,-1), y_true)
        loss_val += loss.item()
        loss.backward()
        W1.data -= learning_rate * W1.grad.data
        W2.data -= learning_rate * W2.grad.data

        W1.grad.data.zero_()
        W2.grad.data.zero_()
    #if epo % 10 == 0:    
    print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')


#%%
W1numpy = torch.Tensor.cpu(W1).detach().numpy()
W2numpy = torch.Tensor.cpu(W2).detach().numpy()
#%%
Ejemplo n.º 14
0
 def loss(self, input, target):
     pred = self.proj(input)  # batch x seq x tags
     pred = pred.view(-1, self.tags_num)  # ... x tags
     target = target.flatten()  # # batch x seq -> ...
     return F.nll_loss(pred, target)
Ejemplo n.º 15
0
    def forward(self, input, target):
        nll_loss = F.nll_loss(input, target, weight=self.weight)

        return {'nll': nll_loss}
Ejemplo n.º 16
0
 def test_step(self, batch, batch_idx):
     x, y = batch
     logits = self(x)
     loss = F.nll_loss(logits, y)
     return {"test_loss": loss}
Ejemplo n.º 17
0
def cross_entropy(input, target):
    return F.nll_loss(input, target)
Ejemplo n.º 18
0
    def word2vec(self, words):
        vocabulary = []
        for token in words:
            if token not in vocabulary:
                vocabulary.append(token)

        word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
        idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

        vocabulary_size = len(vocabulary)

        window_size = 2
        idx_pairs = []

        # for sentence in words:
        indices = [word2idx[word] for word in words]

        for center_word_pos in range(len(indices)):
            for w in range(-window_size, window_size + 1):
                context_word_pos = center_word_pos + w
                if context_word_pos < 0 or context_word_pos >= len(
                        indices) or center_word_pos == context_word_pos:
                    continue
                context_word_idx = indices[context_word_pos]
                idx_pairs.append((indices[center_word_pos], context_word_idx))

        idx_pairs = np.array(idx_pairs)

        embedding_dims = 5
        W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(),
                      requires_grad=True)
        W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(),
                      requires_grad=True)
        num_epochs = 1
        learning_rate = 0.01

        for epo in range(num_epochs):
            loss_val = 0
            for data, target in idx_pairs:
                x = Variable(self.get_input_layer(data,
                                                  vocabulary_size)).float()
                y_true = Variable(torch.from_numpy(np.array([target])).long())

                z1 = torch.matmul(W1, x)
                z2 = torch.matmul(W2, z1)

                log_softmax = F.log_softmax(z2, dim=0)

                loss = F.nll_loss(log_softmax.view(1, -1), y_true)
                loss_val += loss.item()
                loss.backward()
                W1.data -= learning_rate * W1.grad.data
                W2.data -= learning_rate * W2.grad.data

                W1.grad.data.zero_()
                W2.grad.data.zero_()
            if epo % 10 == 0:
                print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')

        tmp = []
        for data, target in idx_pairs:
            x = Variable(self.get_input_layer(data, vocabulary_size)).float()
            z1 = torch.matmul(W1, x)
            tmp.append(z1)
        return tmp
Ejemplo n.º 19
0
def cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100, reduction='mean'):
    if size_average:
        reduction = 'mean'
    return F.nll_loss(torch.log(input + 1e-8), target, weight, None, ignore_index, None, reduction)