def get_data(): vocab_dict = load_vocab_dict() data = pickling.load(glovar.PKL_DIR, 'annotated_dep_trees.pkl') train = SSTDataset(data['train'], vocab_dict) dev = SSTDataset(data['dev'], vocab_dict) test = SSTDataset(data['test'], vocab_dict) return train, dev, test
def train(self, config): if config['target'] == 'negs': return pickling.load(glovar.DATA_DIR, 'train_negs', ['arct']) else: data = self.data('train-full') if config['train_subsample'] > 0: return random.sample(data, config['train_subsample']) else: return data
def get(pkl_dir, name, override, arg_config): print('Getting history with name %s; override=%s...' % (name, override)) pkl_name = 'history_%s.pkl' % name exists = os.path.exists(os.path.join(pkl_dir, pkl_name)) print('Exists: %s' % exists) if exists: if override: print('Overriding...') return History(name, models.Config(**arg_config)) else: print('Loading...') return pickling.load(pkl_dir, pkl_name) else: print('Creating...') return History(name, models.Config(**arg_config))
def load_vocab_dict(): return pickling.load(glovar.PKL_DIR, 'vocab_dict.pkl')
params, arg_config = parameters.parse_arguments() # Get or create History history = histories.get( glovar.PKL_DIR, params.name, params.override, arg_config) # Report config to be used config = history.config print(config) # Get vocab dict and embeddings print('Load vocab dict and embedding matrix...') vocab_dict = pickling.load(glovar.PKL_DIR, 'vocab_dict.pkl') embedding_matrix = pickling.load(glovar.PKL_DIR, 'glove_embeddings.pkl')[0] print('Loading data...') mnli_train = nli.load_json('mnli', 'train') snli_train = nli.load_json('snli', 'train') mnli_dev_matched = nli.load_json('mnli', 'dev_matched') train_data = nli.NYUDataSet(mnli_train, snli_train, vocab_dict) tune_data = nli.NLIDataSet(mnli_dev_matched, vocab_dict) train_loader = nli.get_data_loader(train_data, config.batch_size) dev_loader = nli.get_data_loader(tune_data, config.batch_size) print('Loading model...') model = inference.InferenceModel(params.name, config, embedding_matrix)
def load(name): pkl_name = 'history_%s.pkl' % name return pickling.load(glovar.PKL_DIR, pkl_name)
def encoder_state_dict(transfer_name): np_dict = pickling.load(glovar.DATA_DIR, '%s_dict_np' % transfer_name) return dict( zip(np_dict.keys(), [torch.from_numpy(a) for a in np_dict.values()]))
def vocab(self, config): return pickling.load(glovar.DATA_DIR, 'vocab', ['arct'])
def embeddings(self, config): return pickling.load(glovar.DATA_DIR, config['embed_type'], ['arct'])
def vocab(self, config): return pickling.load(glovar.DATA_DIR, 'vocab', [config['target']])
def embeddings(self, config): print(config) return pickling.load(glovar.DATA_DIR, config['embed_type'], [config['target']])
from ext import parameters, pickling, training, histories from models import sentiment # Parse configuration settings from command line params, arg_config = parameters.parse_arguments() # Get or create History history = histories.get(glovar.PKL_DIR, params.name, params.override, arg_config) # Report config to be used config = history.config print(config) print('Load embedding matrix...') embedding_matrix = pickling.load(glovar.PKL_DIR, 'glove_embeddings.pkl')[0] print('Loading data...') train_data, dev_data, _ = sst.get_data() train_loader = sst.get_data_loader(train_data, config.batch_size) dev_loader = sst.get_data_loader(dev_data, config.batch_size) print('Loading model...') model = sentiment.SentimentModel(params.name, config, embedding_matrix) print('Loading trainer...') trainer = training.PyTorchTrainer(model, history, train_loader, dev_loader, glovar.CKPT_DIR) print('Training...') trainer.train()