def test_saving_data(self):
        current_dir = os.path.dirname(__file__)
        fake_cache_dir = os.path.join(current_dir, '_test_cache')
        fake_save_dir = os.path.join(current_dir, '_test_save')

        self._clean_testing_cache(fake_cache_dir)
        out = imdb_data.load_data(
            positive=MOCK_POSITIVE,
            negative=MOCK_NEGATIVE,
            _cache_dir=fake_cache_dir,
            pad_to = 4 
        )

        self._check_testing_cache(fake_cache_dir)
        # Validate output
        self._test_output(out)

        # Test saving
        imdb_data.save_processed_data(out, fake_save_dir, _cache_dir=fake_cache_dir)
        self._check_testing_cache(fake_cache_dir)
        self._is_valid_cache(fake_save_dir)
        self._rm_cache_path(fake_save_dir)

        # Test loading
        load = imdb_data.load_processed_data(imdb_data.get_latest_cache_path(_cache_dir=fake_cache_dir))
        self._test_output(load)

        # Make sure saving uses old cache
        imdb_data.save_processed_data(load, fake_save_dir, _cache_dir=fake_cache_dir)
        self._check_testing_cache(fake_cache_dir)
        self._is_valid_cache(fake_save_dir)
        self._rm_cache_path(fake_save_dir)

        self._clean_testing_cache(fake_cache_dir)
Esempio n. 2
0
    def objective(self, space):
        data = load_data(int(space['top_words']),
                         int(space['max_word_length']),
                         fraction=space['data_fraction'])
        model = self.create_model(space)

        early_stopping = EarlyStopping(monitor=space['monitor'], patience=1)
        model_checkpoint = ModelCheckpoint(filepath=space['model_file'],
                                           monitor=space['monitor'],
                                           save_best_only=True,
                                           verbose=1)
        model.fit(data['train_data'],
                  data['train_labels'],
                  validation_data=(data['valid_data'], data['valid_labels']),
                  batch_size=space['batch_size'],
                  epochs=space['epochs'],
                  callbacks=[model_checkpoint, early_stopping])

        best_model = load_model(space['model_file'])
        print(best_model.metrics_names)
        results = best_model.evaluate(data['valid_data'],
                                      data['valid_labels'],
                                      batch_size=2500)
        loss = results[0]
        acc = results[1]
        return {'loss': loss, 'acc': acc, 'status': STATUS_OK}
 def test_load_data_from_input(self):
     out = imdb_data.load_data(
         positive=MOCK_POSITIVE,
         negative=MOCK_NEGATIVE,
         write_to_cache=False,
         pad_to=4)
     
     self._test_output(out)
def main(_):
    if not FLAGS.data_path:
        #raise ValueError("Must set --data_path to PTB data directory")
        pass

    train_data, valid_data, test_data = imdb_data.load_data()
    word2id, id2word = imdb_data.load_dict_imdb()

    accsTrain = []
    accsTest = []


    config = Config()
    eval_config = Config()
    eval_config.batch_size = 1
    
    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = SentimentModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = SentimentModel(is_training=False, config=config)
            mtest = SentimentModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        print("Starting")
        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity, accTrain = run_epoch(session, m, train_data, m.train_op, id2word,
                                       verbose=True)
            accsTrain.append(accTrain)
            
            print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
            valid_perplexity, crap = run_epoch(session, mvalid, valid_data, tf.no_op(),
                                         id2word)
            print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

            test_perplexity, accTest = run_epoch(session, mtest, test_data, tf.no_op(),id2word)
            accsTest.append(accTest)
            print("Test Perplexity: %.3f" % test_perplexity)
        
    plt.figure()
    plt.plot(accsTrain, label="train")
    plt.plot(accsTest, label="test")
    plt.show()
    plt.close()    
Esempio n. 5
0
def main(_):
    if not FLAGS.data_path:
        #raise ValueError("Must set --data_path to PTB data directory")
        pass

    train_data, valid_data, test_data = imdb_data.load_data()
    word2id, id2word = imdb_data.load_dict_imdb()

    config = Config()
    eval_config = Config()
    eval_config.batch_size = 1

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = SentimentModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = SentimentModel(is_training=False, config=config)
            mtest = SentimentModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        print("Starting")
        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity, train_accuracy = run_epoch(session,
                                                         m,
                                                         train_data,
                                                         m.train_op,
                                                         id2word,
                                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f Train Accuracy: %.3f" % \
                  (i + 1, train_perplexity, train_accuracy))
            valid_perplexity, valid_accuracy = run_epoch(
                session, mvalid, valid_data, tf.no_op(), id2word)
            print("Epoch: %d Valid Perplexity: %.3f Valid Accuracy: %.3f" % \
                  (i + 1, valid_perplexity, valid_accuracy))

        test_perplexity, test_accuracy = run_epoch(session, mtest, test_data,
                                                   tf.no_op(), id2word)
        print("Test Perplexity: %.3f Test Accuracy: %.3f" %
              (test_perplexity, test_accuracy))
Esempio n. 6
0
def make(config):
    data = load_data(pad_to=config['seq_len'], confirmation=False)

    train_loader = make_loader((data['train_x'], data['train_y']),
                               batch_size=config['batch_size'])
    valid_loader = make_loader((data['valid_x'], data['valid_y']),
                               batch_size=config['batch_size'])
    test_loader = make_loader((data['test_x'], data['test_y']),
                              batch_size=config['batch_size'])

    model = lstm.make_model(config, data)
    model.to(device)

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])

    return model, data, train_loader, test_loader, valid_loader, criterion, optimizer
Esempio n. 7
0
def make(config):
    data = imdb_data.load_data(
        pad_to=PADDING,
        confirmation=False
    )

    # Create skip gram subsets from train_x of return
    x = []
    y = []
    if False:
        for entry in tqdm(data['train_x'], total=len(data['train_x'])):
            entry_length = len(entry)
            for i, word in enumerate(entry):
                # Do not process padding chars
                if word == 0: 
                    continue 

                for delta in range(-config['context_size'], config['context_size']+1):
                    context_i = i + delta

                    # Validate the position
                    if context_i < 0 or context_i >= entry_length or context_i == i:
                        continue
                    
                    x.append(word)
                    y.append(entry[context_i])
        # Save for future use:
        print('Saving word context pairs...')
        np.save(os.path.join(DIRNAME, 'center_words.npy'), x)
        np.save(os.path.join(DIRNAME, 'context_targets.npy'), y)
    else:
        print('Loading pairs from file...')
        x = np.load(os.path.join(DIRNAME, 'center_words.npy'))
        y = np.load(os.path.join(DIRNAME, 'context_targets.npy'))

    # Split into sets
    total_x = len(x)
    
    data['train_x'] = x[0:int(SPLIT_FRAC*total_x)]
    data['train_y'] = y[0:int(SPLIT_FRAC*total_x)]

    # Get the remaining x and y after train
    x = x[int(SPLIT_FRAC*total_x):]
    y = y[int(SPLIT_FRAC*total_x):]

    data['valid_x'] = x[0:int(len(x)*0.5)]
    data['valid_y'] = y[0:int(len(y)*0.5)]

    data['test_x'] = x[int(len(x)*0.5):]
    data['test_y'] = y[int(len(y)*0.5):]

    if False:
        print('Train: ')
        print('\twords:', len(data['train_x']))
        print('\tcontext:', len(data['train_y']))
        print('Test: ')
        print('\twords:', len(data['test_x']))
        print('\tcontext:', len(data['test_y']))


    # Make loaders
    train_loader = make_loader((data['train_x'], data['train_y']), batch_size=config['batch_size'])
    valid_loader = make_loader((data['valid_x'], data['valid_y']), batch_size=config['batch_size'])
    test_loader = make_loader((data['test_x'], data['test_y']), batch_size=config['batch_size'])

    # Make model
    model = skip_gram.make_model(config, data['vocab_to_int'])
    model.to(device)

    criterion = nn.NLLLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])

    return model, data, train_loader, test_loader, valid_loader, criterion, optimizer
Esempio n. 8
0
print('\n-\t Creating data loaders')
from torch.utils.data import DataLoader, TensorDataset

# Create tensor datasets
train_data = None
valid_data = None
test_data = None
if True: 
    train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
    valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
    test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))
else:
    from imdb_data import load_data

    data = load_data(pad_to=400)
    
    train_data = TensorDataset(
        torch.from_numpy(data['train_x']),
        torch.from_numpy(data['train_y'])
    )
    test_data = TensorDataset(
        torch.from_numpy(data['test_x']),
        torch.from_numpy(data['test_y'])
    )

    valid_data = TensorDataset(
        torch.from_numpy(data['valid_x']),
        torch.from_numpy(data['valid_y'])
    )
 def test_load_data_basic(self):
     out = imdb_data.load_data(
         positive=['hello'],
         negative=['bye'],
         write_to_cache=False)