Example #1
0
def train_chordRNN(vocab, data):
    cfg = Config()
    model = SkipGram(len(vocab),
                     file=embeddings_bin) if load_emb else SkipGram(
                         len(vocab), embed_size, simple)
    rnn = ChordRNN(vocab, model, cfg, file=rnn_bin) if load_rnn else ChordRNN(
        vocab, model, cfg)

    losses = []
    optimizer = optim.SGD(rnn.parameters(), lr=learning_rate)

    for epoch in range(n_epoch):
        total_loss = 0.0
        for i in range(len(data) // batch_size):
            loss, _, _ = rnn(data[i * batch_size:(i + 1) * batch_size])
            loss.backward()
            optimizer.step()
            total_loss += loss

        print('Epoch:', epoch, 'Loss:', total_loss.item())
        losses.append(total_loss.item())
        save_params(rnn=rnn, losses=losses)
        # Early stopping
        if len(losses) > 2 and losses[-1] > losses[-2]:
            break

    out = rnn.decodeGreedy(data[0][0:2], 3)
    print([model.vec2chord(o, vocab) for o in out])
Example #2
0
def train_classifier(vocab):
    embeddings = SkipGram(len(vocab),
                          file=embeddings_bin) if load_emb else SkipGram(
                              len(vocab), embed_size, simple)
    model = ChordClassifier(
        embeddings, file=cnn_bin) if load_cnn else ChordClassifier(embeddings)

    losses = []
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    X_train, Y_train, X_test, Y_test = chordsDataset(vocab)

    prev_acc = 0
    acc = 0

    for epoch in range(n_epoch):
        total_loss = 0.0
        for i, x in enumerate(X_train):
            _, loss, _ = model(x, Y_train[i], use_emb=use_emb)
            loss.backward()
            optimizer.step()
            total_loss += loss
        total_loss /= len(X_train)
        print('Epoch:', epoch, 'Loss:', total_loss.item())
        losses.append(total_loss.item())

        if epoch % 10 == 9:
            acc = 0
            for i, x in enumerate(X_test):
                _, _, correct = model(x, Y_test[i])
                acc += correct.type(torch.float)
            acc /= len(X_test)
            print('Test accuracy:', acc.item())

            prev_acc = acc
            save_params(cnn=model, losses=losses)
Example #3
0
    def __init__(self,
                 filename,
                 window=10,
                 size=600,
                 decay=8.0,
                 balanced=False,
                 prior=True):
        self._window = window
        self._size = size
        self._decay = decay
        self._prior_pred = prior
        self._stopwordslist = []
        self._props = {
            "window": window,
            "size": size,
            "decay": decay,
            "prior": prior,
            "balanced": balanced
        }
        super(Ensemble, self).__init__(filename)

        self.collaborative = {}
        self.skipgram = {}
        self.cbowsim = {}
        self.collaborative = CollaborativeFiltering(filename, 27, 300, decay,
                                                    balanced, True)
        self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True)
        self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False)
        self._models = ["collaborative", "cbowsim", "skipgram"]
Example #4
0
def main():

    dataloader = CorpusReader("./data/wili-2018/x_train_sub.txt",
                              "./data/wili-2018/y_train_sub.txt")
    char_to_idx, idx_to_char, char_frequency = dataloader.get_mappings()
    model = SkipGram(12300, 256, char_frequency)

    with open("./models/skipgram/5.pt", 'rb') as f:
        state_dict = torch.load(f)
        model.load_state_dict(state_dict)
        print("Model Loaded")

    save_embeddings = True
    if save_embeddings:
        central_embeddings = model.central_embedding.weight
        torch.save(central_embeddings, './models/character_embeddings.pt')
        print("{} Embedding Weights Saved".format(central_embeddings.shape))
    model = model.to(device)
    model.eval()

    similarities = model.vocabulary_similarities()
    show_chars = [
        't', 'b', 'a', 'e', 'x', ',', '.', '@', '%', '4', '9', "բ", "Հ", "ñ",
        "名", "Θ"
    ]
    show_results(show_chars, similarities, char_to_idx, idx_to_char)
Example #5
0
def train_skipgram(vocab, sg_loader):
    losses = []
    loss_fn = nn.L1Loss()
    model = SkipGram(len(vocab), embed_size, simple)
    print(model)

    if load_prev:
        try:
            model.load_state_dict(torch.load(model_file))
        except:
            print('Could not load file')

    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(n_epoch):
        total_loss = 0.0
        for i, sample_batched in enumerate(sg_loader):
            sample_batched = sample_batched[0]
            in_w_var = Variable(sample_batched[:, 0])
            ctx_w_var = Variable(sample_batched[:, 1])
            # print(in_w_var.shape)
            model.zero_grad()
            log_probs = model(in_w_var, ctx_w_var)
            loss = loss_fn(log_probs, Variable(sample_batched[:, 2].float()))

            loss.backward()
            optimizer.step()

            total_loss += loss.data

        losses.append(total_loss.item())
        print('Epoch:', epoch, 'Loss:', total_loss.item())
        save_params(vocab, model, losses)
    return model, losses
Example #6
0
def train_skipgram(vocab, sg_loader):
    losses = []
    loss_fn = nn.L1Loss()
    model = SkipGram(len(vocab),
                     file=embeddings_bin) if load_emb else SkipGram(
                         len(vocab), embed_size, simple)
    print(model)

    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    for epoch in range(n_epoch):
        total_loss = 0.0
        for i, sample_batched in enumerate(sg_loader):
            sample_batched = sample_batched[0]

            model.zero_grad()
            log_probs = model(sample_batched[:, :-1], average)
            loss = loss_fn(log_probs, Variable(sample_batched[:, -1].float()))

            loss.backward()
            optimizer.step()

            total_loss += loss.data

        losses.append(total_loss.item())
        print('Epoch:', epoch, 'Loss:', total_loss.item())
        save_params(emb=model, losses=losses, vocab=vocab)
        # Early stopping
        if len(losses) > 2 and losses[-1] > losses[-2]:
            break
    return model, losses
Example #7
0
def testSkipGram(vocabulary_file, training_dir):
    last_sentence = None
    print("Reading vocabulary " + vocabulary_file + "...")
    words, dictionary = read_vocabulary(vocabulary_file, MAX_VOCAB_SIZE)
    print("Reading sentences and training SkipGram...")
    start = timer()
    skip_gram = SkipGram(len(words), WINDOW_SIZE, HIDDEN_LAYER_SIZE)
    num_words = 0
    for i in range(NUM_ITER):
        sentences = tokenize_files(dictionary, training_dir)    
        for sentence in itertools.islice(sentences, MAX_SENTENCES):
            last_sentence = sentence
            skip_gram.train(sentence)
            num_words += len(sentence)

        ll = skip_gram.train(last_sentence, compute_ll=True)
        print("Iteration " + str(i + 1) + "/" + str(NUM_ITER) + " finished (" + str(num_words) + " words)")
        print("Log-likelihood: " + str(ll))

        num_words = 0

    print("- Took %.2f sec" % (timer() - start))
Example #8
0
def main(args):
    data_loader = DataLoader()
    data_loader.load_graph()
    data_loader.load_sequence(args.seq_file)
    data_loader.construct_distribution()
    data_loader.generate_positive_samples()
    # data_loader.generate_negative_samples(3,1)

    model = SkipGram()
    model.initialize(data_loader.get_node_size(), args.emb_dim)
    model.train_process(args.epochs, data_loader, args.neg_num)
    def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True):
        self._window = window
        self._size = size
        self._decay = decay
        self._prior_pred = prior
        self._stopwordslist = []
        self._props = {"window": window, "size": size, "decay": decay,
                       "prior": prior, "balanced": balanced}
        super(Ensemble, self).__init__(filename)

        self.collaborative = {}
        self.skipgram = {}
        self.cbowsim = {}
        self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True)
        self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True)
        self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False)
        self._models = ["collaborative", "cbowsim", "skipgram"]
Example #10
0
class Ensemble(BinaryPredictor):
    def __init__(self,
                 filename,
                 window=10,
                 size=600,
                 decay=8.0,
                 balanced=False,
                 prior=True):
        self._window = window
        self._size = size
        self._decay = decay
        self._prior_pred = prior
        self._stopwordslist = []
        self._props = {
            "window": window,
            "size": size,
            "decay": decay,
            "prior": prior,
            "balanced": balanced
        }
        super(Ensemble, self).__init__(filename)

        self.collaborative = {}
        self.skipgram = {}
        self.cbowsim = {}
        self.collaborative = CollaborativeFiltering(filename, 27, 300, decay,
                                                    balanced, True)
        self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True)
        self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False)
        self._models = ["collaborative", "cbowsim", "skipgram"]

    def train(self, filename):
        self.collaborative.train(filename)
        self.cbowsim.train(filename)
        self.skipgram.train(filename)
        self._prior = self.cbowsim._prior
        self._weights = {m: defaultdict(lambda: 0) for m in self._models}

        with open(filename) as f:
            for line in f:
                feed_events = line.split("|")[2].split(" ")
                actual = line.split("|")[0].split(",")

                cf_preds = self.collaborative.predict(feed_events)
                cbow_preds = self.cbowsim.predict(feed_events)
                skip_preds = self.skipgram.predict(feed_events)

                for diag in self._diags:
                    if diag in actual:
                        self._weights["collaborative"][diag] += cf_preds[diag]
                        self._weights["cbowsim"][diag] += cbow_preds[diag]
                        self._weights["skipgram"][diag] += skip_preds[diag]
                    else:
                        self._weights["collaborative"][
                            diag] += 1 - cf_preds[diag]
                        self._weights["cbowsim"][diag] += 1 - cbow_preds[diag]
                        self._weights["skipgram"][diag] += 1 - skip_preds[diag]

            # Normalize weights
            for diag in self._diags:
                norm = (self._weights["collaborative"][diag] +
                        self._weights["cbowsim"][diag] +
                        self._weights["skipgram"][diag])
                self._weights["collaborative"][diag] /= norm
                self._weights["cbowsim"][diag] /= norm
                self._weights["skipgram"][diag] /= norm

            print(self._weights)

    def predict(self, feed_events):
        cf_preds = self.collaborative.predict(feed_events)
        cbow_preds = self.cbowsim.predict(feed_events)
        skip_preds = self.skipgram.predict(feed_events)
        predictions = {}
        for diag in self._diags:
            predictions[
                diag] = cf_preds[diag] * self._weights["collaborative"][diag]
            predictions[
                diag] += cbow_preds[diag] * self._weights["cbowsim"][diag]
            predictions[
                diag] += skip_preds[diag] * self._weights["skipgram"][diag]
        return predictions
Example #11
0
def run_skipgram():
    vec = SkipGram()
    vec.train()
class Ensemble(BinaryPredictor):
    def __init__(self, filename, window=10, size=600, decay=8.0, balanced=False, prior=True):
        self._window = window
        self._size = size
        self._decay = decay
        self._prior_pred = prior
        self._stopwordslist = []
        self._props = {"window": window, "size": size, "decay": decay,
                       "prior": prior, "balanced": balanced}
        super(Ensemble, self).__init__(filename)

        self.collaborative = {}
        self.skipgram = {}
        self.cbowsim = {}
        self.collaborative = CollaborativeFiltering(filename, 27, 300, decay, balanced, True)
        self.cbowsim = CbowSim(filename, 45, 275, decay, balanced, True)
        self.skipgram = SkipGram(filename, 23, 350, decay, balanced, False)
        self._models = ["collaborative", "cbowsim", "skipgram"]

    def train(self, filename):
        self.collaborative.train(filename)
        self.cbowsim.train(filename)
        self.skipgram.train(filename)
        self._prior = self.cbowsim._prior
        self._weights = {m: defaultdict(lambda: 0) for m in self._models}

        with open(filename) as f:
            for line in f:
                feed_events = line.split("|")[2].split(" ")
                actual = line.split("|")[0].split(",")

                cf_preds = self.collaborative.predict(feed_events)
                cbow_preds = self.cbowsim.predict(feed_events)
                skip_preds = self.skipgram.predict(feed_events)

                for diag in self._diags:
                    if diag in actual:
                        self._weights["collaborative"][diag] += cf_preds[diag]
                        self._weights["cbowsim"][diag] += cbow_preds[diag]
                        self._weights["skipgram"][diag] += skip_preds[diag]
                    else:
                        self._weights["collaborative"][diag] += 1 - cf_preds[diag]
                        self._weights["cbowsim"][diag] += 1 - cbow_preds[diag]
                        self._weights["skipgram"][diag] += 1 - skip_preds[diag]

            # Normalize weights
            for diag in self._diags:
                norm = (self._weights["collaborative"][diag] + self._weights["cbowsim"][diag] +
                        self._weights["skipgram"][diag])
                self._weights["collaborative"][diag] /= norm
                self._weights["cbowsim"][diag] /= norm
                self._weights["skipgram"][diag] /= norm

            print(self._weights)

    def predict(self, feed_events):
        cf_preds = self.collaborative.predict(feed_events)
        cbow_preds = self.cbowsim.predict(feed_events)
        skip_preds = self.skipgram.predict(feed_events)
        predictions = {}
        for diag in self._diags:
            predictions[diag] = cf_preds[diag] * self._weights["collaborative"][diag]
            predictions[diag] += cbow_preds[diag] * self._weights["cbowsim"][diag]
            predictions[diag] += skip_preds[diag] * self._weights["skipgram"][diag]
        return predictions