Ejemplo n.º 1
0
def get_data():
    vocab_dict = load_vocab_dict()
    data = pickling.load(glovar.PKL_DIR, 'annotated_dep_trees.pkl')
    train = SSTDataset(data['train'], vocab_dict)
    dev = SSTDataset(data['dev'], vocab_dict)
    test = SSTDataset(data['test'], vocab_dict)
    return train, dev, test
Ejemplo n.º 2
0
 def train(self, config):
     if config['target'] == 'negs':
         return pickling.load(glovar.DATA_DIR, 'train_negs', ['arct'])
     else:
         data = self.data('train-full')
     if config['train_subsample'] > 0:
         return random.sample(data, config['train_subsample'])
     else:
         return data
Ejemplo n.º 3
0
def get(pkl_dir, name, override, arg_config):
    print('Getting history with name %s; override=%s...' % (name, override))
    pkl_name = 'history_%s.pkl' % name
    exists = os.path.exists(os.path.join(pkl_dir, pkl_name))
    print('Exists: %s' % exists)
    if exists:
        if override:
            print('Overriding...')
            return History(name, models.Config(**arg_config))
        else:
            print('Loading...')
            return pickling.load(pkl_dir, pkl_name)
    else:
        print('Creating...')
        return History(name, models.Config(**arg_config))
Ejemplo n.º 4
0
def load_vocab_dict():
    return pickling.load(glovar.PKL_DIR, 'vocab_dict.pkl')
Ejemplo n.º 5
0
params, arg_config = parameters.parse_arguments()


# Get or create History
history = histories.get(
    glovar.PKL_DIR, params.name, params.override, arg_config)


# Report config to be used
config = history.config
print(config)


# Get vocab dict and embeddings
print('Load vocab dict and embedding matrix...')
vocab_dict = pickling.load(glovar.PKL_DIR, 'vocab_dict.pkl')
embedding_matrix = pickling.load(glovar.PKL_DIR, 'glove_embeddings.pkl')[0]


print('Loading data...')
mnli_train = nli.load_json('mnli', 'train')
snli_train = nli.load_json('snli', 'train')
mnli_dev_matched = nli.load_json('mnli', 'dev_matched')
train_data = nli.NYUDataSet(mnli_train, snli_train, vocab_dict)
tune_data = nli.NLIDataSet(mnli_dev_matched, vocab_dict)
train_loader = nli.get_data_loader(train_data, config.batch_size)
dev_loader = nli.get_data_loader(tune_data, config.batch_size)


print('Loading model...')
model = inference.InferenceModel(params.name, config, embedding_matrix)
Ejemplo n.º 6
0
 def load(name):
     pkl_name = 'history_%s.pkl' % name
     return pickling.load(glovar.PKL_DIR, pkl_name)
Ejemplo n.º 7
0
def encoder_state_dict(transfer_name):
    np_dict = pickling.load(glovar.DATA_DIR, '%s_dict_np' % transfer_name)
    return dict(
        zip(np_dict.keys(), [torch.from_numpy(a) for a in np_dict.values()]))
Ejemplo n.º 8
0
 def vocab(self, config):
     return pickling.load(glovar.DATA_DIR, 'vocab', ['arct'])
Ejemplo n.º 9
0
 def embeddings(self, config):
     return pickling.load(glovar.DATA_DIR, config['embed_type'], ['arct'])
Ejemplo n.º 10
0
 def vocab(self, config):
     return pickling.load(glovar.DATA_DIR, 'vocab', [config['target']])
Ejemplo n.º 11
0
 def embeddings(self, config):
     print(config)
     return pickling.load(glovar.DATA_DIR, config['embed_type'],
                          [config['target']])
Ejemplo n.º 12
0
from ext import parameters, pickling, training, histories
from models import sentiment

# Parse configuration settings from command line
params, arg_config = parameters.parse_arguments()

# Get or create History
history = histories.get(glovar.PKL_DIR, params.name, params.override,
                        arg_config)

# Report config to be used
config = history.config
print(config)

print('Load embedding matrix...')
embedding_matrix = pickling.load(glovar.PKL_DIR, 'glove_embeddings.pkl')[0]

print('Loading data...')
train_data, dev_data, _ = sst.get_data()
train_loader = sst.get_data_loader(train_data, config.batch_size)
dev_loader = sst.get_data_loader(dev_data, config.batch_size)

print('Loading model...')
model = sentiment.SentimentModel(params.name, config, embedding_matrix)

print('Loading trainer...')
trainer = training.PyTorchTrainer(model, history, train_loader, dev_loader,
                                  glovar.CKPT_DIR)

print('Training...')
trainer.train()