Exemple #1
0
def main():

    dataloader = CorpusReader("./data/wili-2018/x_train_sub.txt",
                              "./data/wili-2018/y_train_sub.txt")
    char_to_idx, idx_to_char, char_frequency = dataloader.get_mappings()
    model = SkipGram(12300, 256, char_frequency)

    with open("./models/skipgram/5.pt", 'rb') as f:
        state_dict = torch.load(f)
        model.load_state_dict(state_dict)
        print("Model Loaded")

    save_embeddings = True
    if save_embeddings:
        central_embeddings = model.central_embedding.weight
        torch.save(central_embeddings, './models/character_embeddings.pt')
        print("{} Embedding Weights Saved".format(central_embeddings.shape))
    model = model.to(device)
    model.eval()

    similarities = model.vocabulary_similarities()
    show_chars = [
        't', 'b', 'a', 'e', 'x', ',', '.', '@', '%', '4', '9', "բ", "Հ", "ñ",
        "名", "Θ"
    ]
    show_results(show_chars, similarities, char_to_idx, idx_to_char)
Exemple #2
0
def train_skipgram(vocab, sg_loader):
    losses = []
    loss_fn = nn.L1Loss()
    model = SkipGram(len(vocab), embed_size, simple)
    print(model)

    if load_prev:
        try:
            model.load_state_dict(torch.load(model_file))
        except:
            print('Could not load file')

    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(n_epoch):
        total_loss = 0.0
        for i, sample_batched in enumerate(sg_loader):
            sample_batched = sample_batched[0]
            in_w_var = Variable(sample_batched[:, 0])
            ctx_w_var = Variable(sample_batched[:, 1])
            # print(in_w_var.shape)
            model.zero_grad()
            log_probs = model(in_w_var, ctx_w_var)
            loss = loss_fn(log_probs, Variable(sample_batched[:, 2].float()))

            loss.backward()
            optimizer.step()

            total_loss += loss.data

        losses.append(total_loss.item())
        print('Epoch:', epoch, 'Loss:', total_loss.item())
        save_params(vocab, model, losses)
    return model, losses