Esempio n. 1
0
def load_embedding_model(args, vocab):
    embedding_model = Embedding(vocab.size(), args.input_dim)

    if args.cuda:
        embedding_model = embedding_model.cuda()
    emb_file = os.path.join(args.data, args.emb_dir.split("/")[-1]+"_"+args.emb_file + '_emb.pth')
    if os.path.isfile(emb_file) and torch.load(emb_file).size()[1] == args.input_dim:
        emb = torch.load(emb_file)
    else:
        glove_vocab, glove_emb = load_word_vectors(os.path.join(args.emb_dir,args.emb_file))
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))
        not_known = []
        for word in vocab.token_to_idx.keys():
            if glove_vocab.get_index(word):
                emb[vocab.get_index(word)] = glove_emb[glove_vocab.get_index(word)]
            else:
                not_known.append(word)
                emb[vocab.get_index(word)] = torch.Tensor(emb[vocab.get_index(word)].size()).normal_(-0.05, 0.05)
        if args.calculate_new_words:
            emb = apply_not_known_words(emb, args, not_known, vocab)

        torch.save(emb, emb_file)

    if args.cuda:
        emb = emb.cuda()
    # plug these into embedding matrix inside model
    embedding_model.state_dict()['weight'].copy_(emb)
    return embedding_model
Esempio n. 2
0
def prepare_embeddings(vectors, word2index):
    embedding_model = Embedding(len(word2index), config.tree_config["input_dim"])

    if config.tree_config["cuda"]:
        embedding_model = embedding_model.cuda()

    torch_vectors = torch.tensor(vectors)

    if config.tree_config["cuda"]:
        torch_vectors = torch_vectors.cuda()
    embedding_model.state_dict()['weight'].copy_(torch_vectors)
    return embedding_model
Esempio n. 3
0
def load_embedding_model(data: str,
                         emb_dir: str,
                         emb_file: str,
                         input_dim: int,
                         vocab: Vocab,
                         cuda: bool = False):

    new_emb_file = os.path.join(
        data,
        emb_dir.split("/")[-1] + "_" + emb_file + '_emb.pth')
    if os.path.isfile(new_emb_file) and torch.load(
            new_emb_file).size()[1] == input_dim:
        emb: Tensor = torch.load(new_emb_file)

    else:
        glove_vocab, glove_emb = load_word_vectors(
            os.path.join(emb_dir, emb_file))
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))
        not_known = []
        for word in vocab.token_to_idx.keys():
            if glove_vocab.get_index(word):
                emb[vocab.get_index(word)] = glove_emb[glove_vocab.get_index(
                    word)]
            else:
                not_known.append(word)
                emb[vocab.get_index(word)] = torch.Tensor(
                    emb[vocab.get_index(word)].size()).normal_(-0.05, 0.05)

        torch.save(emb, emb_file)

    embedding_model = Embedding(emb.shape[0], input_dim)
    if cuda:
        emb = emb.cuda()
        embedding_model = embedding_model.cuda()

    # plug these into embedding matrix inside model
    embedding_model.state_dict()['weight'].copy_(emb)
    return embedding_model
Esempio n. 4
0
class StatusEmb(Module):
    def __init__(self, dim, dyad_lut, cuda=False):

        super(StatusEmb, self).__init__()
        self.dyad_lut = dyad_lut
        self.dim = dim
        self.cuda = cuda

        num_entities = len(dyad_lut)

        self.emb = Embedding(num_entities, self.dim)
        if self.cuda:
            self.emb = self.emb.cuda()

    def forward(self, dyad):

        dyad = self.dyad_lut[dyad]
        dyad = torch.LongTensor([dyad])
        dyad = Variable(dyad)
        if self.cuda:
            dyad = dyad.cuda()
        out = self.emb(dyad)
        return out
Esempio n. 5
0
lstm = torch.nn.LSTM(100, 100, bidirectional=True)
model = torch.nn.Sequential(
    #torch.nn.Linear(10, 10),
    #torch.nn.LeakyReLU(),
    torch.nn.Dropout(),
    torch.nn.Linear(100, 30),
    torch.nn.LeakyReLU(),
    #torch.nn.Linear(30, 10),
    #torch.nn.LeakyReLU(),
    torch.nn.Linear(30, 2), 
    # torch.nn.Softplus(),
)
if CUDA:
    lstm = lstm.cuda()
    model = model.cuda()
    emb = emb.cuda()

# optim = torch.optim.Adam(lstm.parameters(), lr=10e-4)
optim = torch.optim.Adam([x for x in lstm.parameters()], lr=10e-4)
model_optim = torch.optim.Adam([x for x in model.parameters()], lr=10e-4)
emb_optim = torch.optim.Adam(emb.parameters(), lr=10e-4)


class DyadLSTMDataset(Dataset):
    def __init__(self):
        self.labels = []
        self.words = []
        with open('dyad_dataset_for_lstm.txt') as f:
            for line in f.readlines():
                user1, user2, acts, pred1, num_days1, pred2, num_days2 = line.strip().split('\t')
                acts = json.loads(acts)