def main(): dataloader = CorpusReader("./data/wili-2018/x_train_sub.txt", "./data/wili-2018/y_train_sub.txt") char_to_idx, idx_to_char, char_frequency = dataloader.get_mappings() model = SkipGram(12300, 256, char_frequency) with open("./models/skipgram/5.pt", 'rb') as f: state_dict = torch.load(f) model.load_state_dict(state_dict) print("Model Loaded") save_embeddings = True if save_embeddings: central_embeddings = model.central_embedding.weight torch.save(central_embeddings, './models/character_embeddings.pt') print("{} Embedding Weights Saved".format(central_embeddings.shape)) model = model.to(device) model.eval() similarities = model.vocabulary_similarities() show_chars = [ 't', 'b', 'a', 'e', 'x', ',', '.', '@', '%', '4', '9', "բ", "Հ", "ñ", "名", "Θ" ] show_results(show_chars, similarities, char_to_idx, idx_to_char)
def train_skipgram(vocab, sg_loader): losses = [] loss_fn = nn.L1Loss() model = SkipGram(len(vocab), embed_size, simple) print(model) if load_prev: try: model.load_state_dict(torch.load(model_file)) except: print('Could not load file') optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i, sample_batched in enumerate(sg_loader): sample_batched = sample_batched[0] in_w_var = Variable(sample_batched[:, 0]) ctx_w_var = Variable(sample_batched[:, 1]) # print(in_w_var.shape) model.zero_grad() log_probs = model(in_w_var, ctx_w_var) loss = loss_fn(log_probs, Variable(sample_batched[:, 2].float())) loss.backward() optimizer.step() total_loss += loss.data losses.append(total_loss.item()) print('Epoch:', epoch, 'Loss:', total_loss.item()) save_params(vocab, model, losses) return model, losses