Exemple #1
0
def train_skipgram(vocab, sg_loader):
    losses = []
    loss_fn = nn.L1Loss()
    model = SkipGram(len(vocab),
                     file=embeddings_bin) if load_emb else SkipGram(
                         len(vocab), embed_size, simple)
    print(model)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    for epoch in range(n_epoch):
        total_loss = 0.0
        for i, sample_batched in enumerate(sg_loader):
            sample_batched = sample_batched[0]

            model.zero_grad()
            log_probs = model(sample_batched[:, :-1], average)
            loss = loss_fn(log_probs, Variable(sample_batched[:, -1].float()))

            loss.backward()
            optimizer.step()

            total_loss += loss.data

        losses.append(total_loss.item())
        print('Epoch:', epoch, 'Loss:', total_loss.item())
        save_params(emb=model, losses=losses, vocab=vocab)
        # Early stopping
        if len(losses) > 2 and losses[-1] > losses[-2]:
            break
    return model, losses
Exemple #2
0
def train_skipgram(vocab, sg_loader):
    losses = []
    loss_fn = nn.L1Loss()
    model = SkipGram(len(vocab), embed_size, simple)
    print(model)

    if load_prev:
        try:
            model.load_state_dict(torch.load(model_file))
        except:
            print('Could not load file')

    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(n_epoch):
        total_loss = 0.0
        for i, sample_batched in enumerate(sg_loader):
            sample_batched = sample_batched[0]
            in_w_var = Variable(sample_batched[:, 0])
            ctx_w_var = Variable(sample_batched[:, 1])
            # print(in_w_var.shape)
            model.zero_grad()
            log_probs = model(in_w_var, ctx_w_var)
            loss = loss_fn(log_probs, Variable(sample_batched[:, 2].float()))

            loss.backward()
            optimizer.step()

            total_loss += loss.data

        losses.append(total_loss.item())
        print('Epoch:', epoch, 'Loss:', total_loss.item())
        save_params(vocab, model, losses)
    return model, losses