def train_chordRNN(vocab, data): cfg = Config() model = SkipGram(len(vocab), file=embeddings_bin) if load_emb else SkipGram( len(vocab), embed_size, simple) rnn = ChordRNN(vocab, model, cfg, file=rnn_bin) if load_rnn else ChordRNN( vocab, model, cfg) losses = [] optimizer = optim.SGD(rnn.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i in range(len(data) // batch_size): loss, _, _ = rnn(data[i * batch_size:(i + 1) * batch_size]) loss.backward() optimizer.step() total_loss += loss print('Epoch:', epoch, 'Loss:', total_loss.item()) losses.append(total_loss.item()) save_params(rnn=rnn, losses=losses) # Early stopping if len(losses) > 2 and losses[-1] > losses[-2]: break out = rnn.decodeGreedy(data[0][0:2], 3) print([model.vec2chord(o, vocab) for o in out])
def train_classifier(vocab): embeddings = SkipGram(len(vocab), file=embeddings_bin) if load_emb else SkipGram( len(vocab), embed_size, simple) model = ChordClassifier( embeddings, file=cnn_bin) if load_cnn else ChordClassifier(embeddings) losses = [] optimizer = optim.SGD(model.parameters(), lr=learning_rate) X_train, Y_train, X_test, Y_test = chordsDataset(vocab) prev_acc = 0 acc = 0 for epoch in range(n_epoch): total_loss = 0.0 for i, x in enumerate(X_train): _, loss, _ = model(x, Y_train[i], use_emb=use_emb) loss.backward() optimizer.step() total_loss += loss total_loss /= len(X_train) print('Epoch:', epoch, 'Loss:', total_loss.item()) losses.append(total_loss.item()) if epoch % 10 == 9: acc = 0 for i, x in enumerate(X_test): _, _, correct = model(x, Y_test[i]) acc += correct.type(torch.float) acc /= len(X_test) print('Test accuracy:', acc.item()) prev_acc = acc save_params(cnn=model, losses=losses)
def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True): self._window = window self._size = size self._decay = decay self._prior_pred = prior self._stopwordslist = [] self._props = { "window": window, "size": size, "decay": decay, "prior": prior, "balanced": balanced } super(Ensemble, self).__init__(filename) self.collaborative = {} self.skipgram = {} self.cbowsim = {} self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True) self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True) self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False) self._models = ["collaborative", "cbowsim", "skipgram"]
def main(): dataloader = CorpusReader("./data/wili-2018/x_train_sub.txt", "./data/wili-2018/y_train_sub.txt") char_to_idx, idx_to_char, char_frequency = dataloader.get_mappings() model = SkipGram(12300, 256, char_frequency) with open("./models/skipgram/5.pt", 'rb') as f: state_dict = torch.load(f) model.load_state_dict(state_dict) print("Model Loaded") save_embeddings = True if save_embeddings: central_embeddings = model.central_embedding.weight torch.save(central_embeddings, './models/character_embeddings.pt') print("{} Embedding Weights Saved".format(central_embeddings.shape)) model = model.to(device) model.eval() similarities = model.vocabulary_similarities() show_chars = [ 't', 'b', 'a', 'e', 'x', ',', '.', '@', '%', '4', '9', "բ", "Հ", "ñ", "名", "Θ" ] show_results(show_chars, similarities, char_to_idx, idx_to_char)
def train_skipgram(vocab, sg_loader): losses = [] loss_fn = nn.L1Loss() model = SkipGram(len(vocab), embed_size, simple) print(model) if load_prev: try: model.load_state_dict(torch.load(model_file)) except: print('Could not load file') optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i, sample_batched in enumerate(sg_loader): sample_batched = sample_batched[0] in_w_var = Variable(sample_batched[:, 0]) ctx_w_var = Variable(sample_batched[:, 1]) # print(in_w_var.shape) model.zero_grad() log_probs = model(in_w_var, ctx_w_var) loss = loss_fn(log_probs, Variable(sample_batched[:, 2].float())) loss.backward() optimizer.step() total_loss += loss.data losses.append(total_loss.item()) print('Epoch:', epoch, 'Loss:', total_loss.item()) save_params(vocab, model, losses) return model, losses
def train_skipgram(vocab, sg_loader): losses = [] loss_fn = nn.L1Loss() model = SkipGram(len(vocab), file=embeddings_bin) if load_emb else SkipGram( len(vocab), embed_size, simple) print(model) optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(n_epoch): total_loss = 0.0 for i, sample_batched in enumerate(sg_loader): sample_batched = sample_batched[0] model.zero_grad() log_probs = model(sample_batched[:, :-1], average) loss = loss_fn(log_probs, Variable(sample_batched[:, -1].float())) loss.backward() optimizer.step() total_loss += loss.data losses.append(total_loss.item()) print('Epoch:', epoch, 'Loss:', total_loss.item()) save_params(emb=model, losses=losses, vocab=vocab) # Early stopping if len(losses) > 2 and losses[-1] > losses[-2]: break return model, losses
def testSkipGram(vocabulary_file, training_dir): last_sentence = None print("Reading vocabulary " + vocabulary_file + "...") words, dictionary = read_vocabulary(vocabulary_file, MAX_VOCAB_SIZE) print("Reading sentences and training SkipGram...") start = timer() skip_gram = SkipGram(len(words), WINDOW_SIZE, HIDDEN_LAYER_SIZE) num_words = 0 for i in range(NUM_ITER): sentences = tokenize_files(dictionary, training_dir) for sentence in itertools.islice(sentences, MAX_SENTENCES): last_sentence = sentence skip_gram.train(sentence) num_words += len(sentence) ll = skip_gram.train(last_sentence, compute_ll=True) print("Iteration " + str(i + 1) + "/" + str(NUM_ITER) + " finished (" + str(num_words) + " words)") print("Log-likelihood: " + str(ll)) num_words = 0 print("- Took %.2f sec" % (timer() - start))
def main(args): data_loader = DataLoader() data_loader.load_graph() data_loader.load_sequence(args.seq_file) data_loader.construct_distribution() data_loader.generate_positive_samples() # data_loader.generate_negative_samples(3,1) model = SkipGram() model.initialize(data_loader.get_node_size(), args.emb_dim) model.train_process(args.epochs, data_loader, args.neg_num)
def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True): self._window = window self._size = size self._decay = decay self._prior_pred = prior self._stopwordslist = [] self._props = {"window": window, "size": size, "decay": decay, "prior": prior, "balanced": balanced} super(Ensemble, self).__init__(filename) self.collaborative = {} self.skipgram = {} self.cbowsim = {} self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True) self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True) self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False) self._models = ["collaborative", "cbowsim", "skipgram"]
class Ensemble(BinaryPredictor): def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True): self._window = window self._size = size self._decay = decay self._prior_pred = prior self._stopwordslist = [] self._props = { "window": window, "size": size, "decay": decay, "prior": prior, "balanced": balanced } super(Ensemble, self).__init__(filename) self.collaborative = {} self.skipgram = {} self.cbowsim = {} self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True) self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True) self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False) self._models = ["collaborative", "cbowsim", "skipgram"] def train(self, filename): self.collaborative.train(filename) self.cbowsim.train(filename) self.skipgram.train(filename) self._prior = self.cbowsim._prior self._weights = {m: defaultdict(lambda: 0) for m in self._models} with open(filename) as f: for line in f: feed_events = line.split("|")[2].split(" ") actual = line.split("|")[0].split(",") cf_preds = self.collaborative.predict(feed_events) cbow_preds = self.cbowsim.predict(feed_events) skip_preds = self.skipgram.predict(feed_events) for diag in self._diags: if diag in actual: self._weights["collaborative"][diag] += cf_preds[diag] self._weights["cbowsim"][diag] += cbow_preds[diag] self._weights["skipgram"][diag] += skip_preds[diag] else: self._weights["collaborative"][ diag] += 1 - cf_preds[diag] self._weights["cbowsim"][diag] += 1 - cbow_preds[diag] self._weights["skipgram"][diag] += 1 - skip_preds[diag] # Normalize weights for diag in self._diags: norm = (self._weights["collaborative"][diag] + self._weights["cbowsim"][diag] + self._weights["skipgram"][diag]) self._weights["collaborative"][diag] /= norm self._weights["cbowsim"][diag] /= norm self._weights["skipgram"][diag] /= norm print(self._weights) def predict(self, feed_events): cf_preds = self.collaborative.predict(feed_events) cbow_preds = self.cbowsim.predict(feed_events) skip_preds = self.skipgram.predict(feed_events) predictions = {} for diag in self._diags: predictions[ diag] = cf_preds[diag] * self._weights["collaborative"][diag] predictions[ diag] += cbow_preds[diag] * self._weights["cbowsim"][diag] predictions[ diag] += skip_preds[diag] * self._weights["skipgram"][diag] return predictions
def run_skipgram(): vec = SkipGram() vec.train()
class Ensemble(BinaryPredictor): def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True): self._window = window self._size = size self._decay = decay self._prior_pred = prior self._stopwordslist = [] self._props = {"window": window, "size": size, "decay": decay, "prior": prior, "balanced": balanced} super(Ensemble, self).__init__(filename) self.collaborative = {} self.skipgram = {} self.cbowsim = {} self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True) self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True) self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False) self._models = ["collaborative", "cbowsim", "skipgram"] def train(self, filename): self.collaborative.train(filename) self.cbowsim.train(filename) self.skipgram.train(filename) self._prior = self.cbowsim._prior self._weights = {m: defaultdict(lambda: 0) for m in self._models} with open(filename) as f: for line in f: feed_events = line.split("|")[2].split(" ") actual = line.split("|")[0].split(",") cf_preds = self.collaborative.predict(feed_events) cbow_preds = self.cbowsim.predict(feed_events) skip_preds = self.skipgram.predict(feed_events) for diag in self._diags: if diag in actual: self._weights["collaborative"][diag] += cf_preds[diag] self._weights["cbowsim"][diag] += cbow_preds[diag] self._weights["skipgram"][diag] += skip_preds[diag] else: self._weights["collaborative"][diag] += 1 - cf_preds[diag] self._weights["cbowsim"][diag] += 1 - cbow_preds[diag] self._weights["skipgram"][diag] += 1 - skip_preds[diag] # Normalize weights for diag in self._diags: norm = (self._weights["collaborative"][diag] + self._weights["cbowsim"][diag] + self._weights["skipgram"][diag]) self._weights["collaborative"][diag] /= norm self._weights["cbowsim"][diag] /= norm self._weights["skipgram"][diag] /= norm print(self._weights) def predict(self, feed_events): cf_preds = self.collaborative.predict(feed_events) cbow_preds = self.cbowsim.predict(feed_events) skip_preds = self.skipgram.predict(feed_events) predictions = {} for diag in self._diags: predictions[diag] = cf_preds[diag] * self._weights["collaborative"][diag] predictions[diag] += cbow_preds[diag] * self._weights["cbowsim"][diag] predictions[diag] += skip_preds[diag] * self._weights["skipgram"][diag] return predictions