def train_chordRNN(vocab, data): cfg = Config() model = SkipGram(len(vocab), file=embeddings_bin) if load_emb else SkipGram( len(vocab), embed_size, simple) rnn = ChordRNN(vocab, model, cfg, file=rnn_bin) if load_rnn else ChordRNN( vocab, model, cfg) losses = [] optimizer = optim.SGD(rnn.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i in range(len(data) // batch_size): loss, _, _ = rnn(data[i * batch_size:(i + 1) * batch_size]) loss.backward() optimizer.step() total_loss += loss print('Epoch:', epoch, 'Loss:', total_loss.item()) losses.append(total_loss.item()) save_params(rnn=rnn, losses=losses) # Early stopping if len(losses) > 2 and losses[-1] > losses[-2]: break out = rnn.decodeGreedy(data[0][0:2], 3) print([model.vec2chord(o, vocab) for o in out])
def train_classifier(vocab): embeddings = SkipGram(len(vocab), file=embeddings_bin) if load_emb else SkipGram( len(vocab), embed_size, simple) model = ChordClassifier( embeddings, file=cnn_bin) if load_cnn else ChordClassifier(embeddings) losses = [] optimizer = optim.SGD(model.parameters(), lr=learning_rate) X_train, Y_train, X_test, Y_test = chordsDataset(vocab) prev_acc = 0 acc = 0 for epoch in range(n_epoch): total_loss = 0.0 for i, x in enumerate(X_train): _, loss, _ = model(x, Y_train[i], use_emb=use_emb) loss.backward() optimizer.step() total_loss += loss total_loss /= len(X_train) print('Epoch:', epoch, 'Loss:', total_loss.item()) losses.append(total_loss.item()) if epoch % 10 == 9: acc = 0 for i, x in enumerate(X_test): _, _, correct = model(x, Y_test[i]) acc += correct.type(torch.float) acc /= len(X_test) print('Test accuracy:', acc.item()) prev_acc = acc save_params(cnn=model, losses=losses)
def train_skipgram(vocab, sg_loader): losses = [] loss_fn = nn.L1Loss() model = SkipGram(len(vocab), file=embeddings_bin) if load_emb else SkipGram( len(vocab), embed_size, simple) print(model) optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i, sample_batched in enumerate(sg_loader): sample_batched = sample_batched[0] model.zero_grad() log_probs = model(sample_batched[:, :-1], average) loss = loss_fn(log_probs, Variable(sample_batched[:, -1].float())) loss.backward() optimizer.step() total_loss += loss.data losses.append(total_loss.item()) print('Epoch:', epoch, 'Loss:', total_loss.item()) save_params(emb=model, losses=losses, vocab=vocab) # Early stopping if len(losses) > 2 and losses[-1] > losses[-2]: break return model, losses
def main(): dataloader = CorpusReader("./data/wili-2018/x_train_sub.txt", "./data/wili-2018/y_train_sub.txt") char_to_idx, idx_to_char, char_frequency = dataloader.get_mappings() model = SkipGram(12300, 256, char_frequency) with open("./models/skipgram/5.pt", 'rb') as f: state_dict = torch.load(f) model.load_state_dict(state_dict) print("Model Loaded") save_embeddings = True if save_embeddings: central_embeddings = model.central_embedding.weight torch.save(central_embeddings, './models/character_embeddings.pt') print("{} Embedding Weights Saved".format(central_embeddings.shape)) model = model.to(device) model.eval() similarities = model.vocabulary_similarities() show_chars = [ 't', 'b', 'a', 'e', 'x', ',', '.', '@', '%', '4', '9', "բ", "Հ", "ñ", "名", "Θ" ] show_results(show_chars, similarities, char_to_idx, idx_to_char)
def train_skipgram(vocab, sg_loader): losses = [] loss_fn = nn.L1Loss() model = SkipGram(len(vocab), embed_size, simple) print(model) if load_prev: try: model.load_state_dict(torch.load(model_file)) except: print('Could not load file') optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i, sample_batched in enumerate(sg_loader): sample_batched = sample_batched[0] in_w_var = Variable(sample_batched[:, 0]) ctx_w_var = Variable(sample_batched[:, 1]) # print(in_w_var.shape) model.zero_grad() log_probs = model(in_w_var, ctx_w_var) loss = loss_fn(log_probs, Variable(sample_batched[:, 2].float())) loss.backward() optimizer.step() total_loss += loss.data losses.append(total_loss.item()) print('Epoch:', epoch, 'Loss:', total_loss.item()) save_params(vocab, model, losses) return model, losses
def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True): self._window = window self._size = size self._decay = decay self._prior_pred = prior self._stopwordslist = [] self._props = { "window": window, "size": size, "decay": decay, "prior": prior, "balanced": balanced } super(Ensemble, self).__init__(filename) self.collaborative = {} self.skipgram = {} self.cbowsim = {} self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True) self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True) self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False) self._models = ["collaborative", "cbowsim", "skipgram"]
def main(args): data_loader = DataLoader() data_loader.load_graph() data_loader.load_sequence(args.seq_file) data_loader.construct_distribution() data_loader.generate_positive_samples() # data_loader.generate_negative_samples(3,1) model = SkipGram() model.initialize(data_loader.get_node_size(), args.emb_dim) model.train_process(args.epochs, data_loader, args.neg_num)
def run_skipgram(): vec = SkipGram() vec.train()