def vectorize(sent): tokens = text.tokenize(sent) vocab = data.vocab() ixs = [vocab[t] for t in tokens] embeddings = data.glove() _vectors = [embeddings[ix].reshape(1, -1) for ix in ixs] return np.concatenate(_vectors, axis=0).sum(axis=0)
def __init__(self): self.vocab = data.vocab()
def __init__(self, args=None): self.vocab = data.vocab() self.n_training_points = None
def __init__(self): self.vocab = data.vocab() self.collate_for_rnn = CollateSentsForRNN()
def __init__(self): self.vocab = data.vocab() self.tokenizer = text.tokenize self.pad_ix = 0
"""Build GloVe vectors for ARCT data.""" import os import numpy as np from arct import data import glovar from util import text if __name__ == '__main__': print('Creating GloVe embeddings...') vocab = data.vocab() embeddings = text.create_glove_embeddings(vocab) save_path = os.path.join(glovar.ARCT_DIR, 'glove.npy') np.save(save_path, embeddings)