def test_saving_data(self): current_dir = os.path.dirname(__file__) fake_cache_dir = os.path.join(current_dir, '_test_cache') fake_save_dir = os.path.join(current_dir, '_test_save') self._clean_testing_cache(fake_cache_dir) out = imdb_data.load_data( positive=MOCK_POSITIVE, negative=MOCK_NEGATIVE, _cache_dir=fake_cache_dir, pad_to = 4 ) self._check_testing_cache(fake_cache_dir) # Validate output self._test_output(out) # Test saving imdb_data.save_processed_data(out, fake_save_dir, _cache_dir=fake_cache_dir) self._check_testing_cache(fake_cache_dir) self._is_valid_cache(fake_save_dir) self._rm_cache_path(fake_save_dir) # Test loading load = imdb_data.load_processed_data(imdb_data.get_latest_cache_path(_cache_dir=fake_cache_dir)) self._test_output(load) # Make sure saving uses old cache imdb_data.save_processed_data(load, fake_save_dir, _cache_dir=fake_cache_dir) self._check_testing_cache(fake_cache_dir) self._is_valid_cache(fake_save_dir) self._rm_cache_path(fake_save_dir) self._clean_testing_cache(fake_cache_dir)
def objective(self, space): data = load_data(int(space['top_words']), int(space['max_word_length']), fraction=space['data_fraction']) model = self.create_model(space) early_stopping = EarlyStopping(monitor=space['monitor'], patience=1) model_checkpoint = ModelCheckpoint(filepath=space['model_file'], monitor=space['monitor'], save_best_only=True, verbose=1) model.fit(data['train_data'], data['train_labels'], validation_data=(data['valid_data'], data['valid_labels']), batch_size=space['batch_size'], epochs=space['epochs'], callbacks=[model_checkpoint, early_stopping]) best_model = load_model(space['model_file']) print(best_model.metrics_names) results = best_model.evaluate(data['valid_data'], data['valid_labels'], batch_size=2500) loss = results[0] acc = results[1] return {'loss': loss, 'acc': acc, 'status': STATUS_OK}
def test_load_data_from_input(self): out = imdb_data.load_data( positive=MOCK_POSITIVE, negative=MOCK_NEGATIVE, write_to_cache=False, pad_to=4) self._test_output(out)
def main(_): if not FLAGS.data_path: #raise ValueError("Must set --data_path to PTB data directory") pass train_data, valid_data, test_data = imdb_data.load_data() word2id, id2word = imdb_data.load_dict_imdb() accsTrain = [] accsTest = [] config = Config() eval_config = Config() eval_config.batch_size = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = SentimentModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = SentimentModel(is_training=False, config=config) mtest = SentimentModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() print("Starting") for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, accTrain = run_epoch(session, m, train_data, m.train_op, id2word, verbose=True) accsTrain.append(accTrain) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity, crap = run_epoch(session, mvalid, valid_data, tf.no_op(), id2word) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity, accTest = run_epoch(session, mtest, test_data, tf.no_op(),id2word) accsTest.append(accTest) print("Test Perplexity: %.3f" % test_perplexity) plt.figure() plt.plot(accsTrain, label="train") plt.plot(accsTest, label="test") plt.show() plt.close()
def main(_): if not FLAGS.data_path: #raise ValueError("Must set --data_path to PTB data directory") pass train_data, valid_data, test_data = imdb_data.load_data() word2id, id2word = imdb_data.load_dict_imdb() config = Config() eval_config = Config() eval_config.batch_size = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = SentimentModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = SentimentModel(is_training=False, config=config) mtest = SentimentModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() print("Starting") for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, train_accuracy = run_epoch(session, m, train_data, m.train_op, id2word, verbose=True) print("Epoch: %d Train Perplexity: %.3f Train Accuracy: %.3f" % \ (i + 1, train_perplexity, train_accuracy)) valid_perplexity, valid_accuracy = run_epoch( session, mvalid, valid_data, tf.no_op(), id2word) print("Epoch: %d Valid Perplexity: %.3f Valid Accuracy: %.3f" % \ (i + 1, valid_perplexity, valid_accuracy)) test_perplexity, test_accuracy = run_epoch(session, mtest, test_data, tf.no_op(), id2word) print("Test Perplexity: %.3f Test Accuracy: %.3f" % (test_perplexity, test_accuracy))
def make(config): data = load_data(pad_to=config['seq_len'], confirmation=False) train_loader = make_loader((data['train_x'], data['train_y']), batch_size=config['batch_size']) valid_loader = make_loader((data['valid_x'], data['valid_y']), batch_size=config['batch_size']) test_loader = make_loader((data['test_x'], data['test_y']), batch_size=config['batch_size']) model = lstm.make_model(config, data) model.to(device) criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=config['lr']) return model, data, train_loader, test_loader, valid_loader, criterion, optimizer
def make(config): data = imdb_data.load_data( pad_to=PADDING, confirmation=False ) # Create skip gram subsets from train_x of return x = [] y = [] if False: for entry in tqdm(data['train_x'], total=len(data['train_x'])): entry_length = len(entry) for i, word in enumerate(entry): # Do not process padding chars if word == 0: continue for delta in range(-config['context_size'], config['context_size']+1): context_i = i + delta # Validate the position if context_i < 0 or context_i >= entry_length or context_i == i: continue x.append(word) y.append(entry[context_i]) # Save for future use: print('Saving word context pairs...') np.save(os.path.join(DIRNAME, 'center_words.npy'), x) np.save(os.path.join(DIRNAME, 'context_targets.npy'), y) else: print('Loading pairs from file...') x = np.load(os.path.join(DIRNAME, 'center_words.npy')) y = np.load(os.path.join(DIRNAME, 'context_targets.npy')) # Split into sets total_x = len(x) data['train_x'] = x[0:int(SPLIT_FRAC*total_x)] data['train_y'] = y[0:int(SPLIT_FRAC*total_x)] # Get the remaining x and y after train x = x[int(SPLIT_FRAC*total_x):] y = y[int(SPLIT_FRAC*total_x):] data['valid_x'] = x[0:int(len(x)*0.5)] data['valid_y'] = y[0:int(len(y)*0.5)] data['test_x'] = x[int(len(x)*0.5):] data['test_y'] = y[int(len(y)*0.5):] if False: print('Train: ') print('\twords:', len(data['train_x'])) print('\tcontext:', len(data['train_y'])) print('Test: ') print('\twords:', len(data['test_x'])) print('\tcontext:', len(data['test_y'])) # Make loaders train_loader = make_loader((data['train_x'], data['train_y']), batch_size=config['batch_size']) valid_loader = make_loader((data['valid_x'], data['valid_y']), batch_size=config['batch_size']) test_loader = make_loader((data['test_x'], data['test_y']), batch_size=config['batch_size']) # Make model model = skip_gram.make_model(config, data['vocab_to_int']) model.to(device) criterion = nn.NLLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config['lr']) return model, data, train_loader, test_loader, valid_loader, criterion, optimizer
print('\n-\t Creating data loaders') from torch.utils.data import DataLoader, TensorDataset # Create tensor datasets train_data = None valid_data = None test_data = None if True: train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y)) valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y)) test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y)) else: from imdb_data import load_data data = load_data(pad_to=400) train_data = TensorDataset( torch.from_numpy(data['train_x']), torch.from_numpy(data['train_y']) ) test_data = TensorDataset( torch.from_numpy(data['test_x']), torch.from_numpy(data['test_y']) ) valid_data = TensorDataset( torch.from_numpy(data['valid_x']), torch.from_numpy(data['valid_y']) )
def test_load_data_basic(self): out = imdb_data.load_data( positive=['hello'], negative=['bye'], write_to_cache=False)