Ejemplo n.º 1
0
    def __init__(self,
                 model,
                 optimizer,
                 train_dataset,
                 test_dataset,
                 num_folds=config.num_folds,
                 loss_function=None):

        self.num_folds = num_folds
        assert num_folds >= 1

        self.use_crf = config.use_crf

        vocab = Vocab.from_files(
            [config.dataset_path, config.test_dataset_path],
            store=config.mapping_file)

        #self.train_dataset = ReviewDataset(config.dataset_path, preprocessed= False, vocab= vocab)
        #self.test_dataset = ReviewDataset(config.test_dataset_path, preprocessed= False, vocab= vocab)

        #self.model = model( vocab, embedding_path= config.word_embedding_path, use_crf= config.use_crf ).to(config.device)

        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.model = model
        self.optimizer = optimizer(self.model.parameters())

        if not self.use_crf and loss_function is None:
            raise Exception(
                ' Loss function must be specified when crf is not being used ')

        self.device = torch.device(
            config.device if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

        print('using device: ', self.device)
Ejemplo n.º 2
0
        'adam': torch.optim.Adam,  # default lr=0.001
        'adamax': torch.optim.Adamax,  # default lr=0.002
        'asgd': torch.optim.ASGD,  # default lr=0.01
        'rmsprop': torch.optim.RMSprop,  # default lr=0.01
        'sgd': torch.optim.SGD,
    }

    models = {
        'lstm': LSTM,
        'attention_lstm': AttentionAspectExtraction,
        'global_attention_lstm': GlobalAttentionAspectExtraction,
        'hsan': HSAN,
        'decnn': DECNN
    }

    vocab = Vocab.from_files([config.dataset_path, config.test_dataset_path],
                             store=config.mapping_file)
    train_dataset = ReviewDataset(config.dataset_path,
                                  preprocessed=False,
                                  vocab=vocab)
    test_dataset = ReviewDataset(config.test_dataset_path,
                                 preprocessed=False,
                                 vocab=vocab)

    network = models[config.model](vocab,
                                   embedding_path=config.word_embedding_path,
                                   lambda1=config.lambda1,
                                   use_crf=config.use_crf).to(config.device)
    trainer = Trainer(network,
                      optimizers[config.optimizer],
                      train_dataset,
                      test_dataset,
import numpy as np
import json

from data_utils import Vocab

vocab = {}
vectors = []
index = 0

train_dataset = './datasets/Restaurants_Train.xml'
test_dataset = './datasets/Restaurants_Test.xml'
mapping_file = './embeddings/restaurant_mapping.json'
vocab = Vocab.from_files([train_dataset, test_dataset],
                         store=mapping_file).get_vocab()

embedding = np.zeros((len(vocab), 200))

with open('embeddings/glove/glove.6B.100d.txt', 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        if word in vocab:
            vector = np.asarray(values[1:])
            embedding[vocab[word], :100] = vector

print('glove done')

with open('embeddings/domain_embedding/restaurant_emb.vec',
          'r',
          encoding='utf-8') as f:
    for line in f: