Esempio n. 1
0
def load_train_valid_data(config, verbose):
    """
    Returns train_data and valid_data, both as BatchGenerator objects.
    """
    data_path = data_utils.get_data_path(config.data_dir, config.datafile)
    batches = BatchGenerator(data_path, config, verbose=verbose)
    train_data = batches.train_batches()
    valid_data = batches.valid_batches()
    return train_data, valid_data
Esempio n. 2
0
def train_model(config):
    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    print("Loading training data from %s ..." % config.datafile)
    train_data = None
    valid_data = None

    data_path = os.path.join(config.data_dir, config.datafile)
    batches = BatchGenerator(data_path, config, is_training_only=True)

    train_data = batches.train_batches(verbose=True)
    valid_data = batches.valid_batches(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("Constructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        params = model_utils.get_scaling_params(config,
                                                train_data,
                                                verbose=True)
        model.set_scaling_params(session, **params)

        noise_model = None

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)
        sys.stdout.flush()

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            (train_mse, valid_mse) = run_epoch(session,
                                               model,
                                               train_data,
                                               valid_data,
                                               keep_prob=config.keep_prob,
                                               passes=config.passes,
                                               noise_model=noise_model,
                                               verbose=True)
            print((
                'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f'
            ) % (i + 1, train_mse, valid_mse, lr))
            sys.stdout.flush()

            train_history.append(train_mse)
            valid_history.append(valid_mse)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            if math.isnan(valid_mse):
                print("Training failed due to nan.")
                quit()
            elif stop_training(config, valid_history):
                print("Training stopped.")
                quit()
            else:
                if ((config.early_stop is None)
                        or (valid_history[-1] <= min(valid_history))):
                    model_utils.save_model(session, config, i)
Esempio n. 3
0
############################################################################
#   If cached data doesn't exist, build it
############################################################################
if not os.path.exists(cache_path) or config.use_cache is False:
    print("Generating Data from Scratch")

    config.end_date = 999901

    data_bg = BatchGenerator(train_path,
                             config,
                             config.batch_size,
                             config.num_unrollings,
                             validation_size=config.validation_size,
                             randomly_sample=False)

    train_bg = data_bg.train_batches()
    valid_bg = data_bg.valid_batches()

    print("Grabbing tabular data from batch generator")
    X_train_full, Y_train_full, dates_train = get_tabular_data(train_bg)
    X_valid_full, Y_valid_full, dates_valid = get_tabular_data(valid_bg)

    print("Saving tabular data to cache")
    # JDA 10/27/16: Save these objects to cache here
    if not os.path.exists(cache_path):
        os.mkdir(cache_path)
    np.save(os.path.join(cache_path, 'X_train_full.npy'), X_train_full)
    np.save(os.path.join(cache_path, 'Y_train_full.npy'), Y_train_full)
    np.save(os.path.join(cache_path, 'X_valid_full.npy'), X_valid_full)
    np.save(os.path.join(cache_path, 'Y_valid_full.npy'), Y_valid_full)
    np.save(os.path.join(cache_path, 'dates_train.npy'), dates_train)
Esempio n. 4
0
end_date = config.end_date

############################################################################
#   If cached data doesn't exist, build it
############################################################################
if not os.path.exists(cache_path) or config.use_cache is False:
    print("Generating Data from Scratch")

    config.end_date = 999901

    data_bg = BatchGenerator(train_path, config,
          config.batch_size, config.num_unrollings,
          validation_size=config.validation_size,
          randomly_sample=False)

    train_bg = data_bg.train_batches()
    valid_bg = data_bg.valid_batches()

    print("Grabbing tabular data from batch generator")
    X_train_full, Y_train_full, dates_train = get_tabular_data(train_bg)
    X_valid_full, Y_valid_full, dates_valid = get_tabular_data(valid_bg)

    print("Saving tabular data to cache")    
    # JDA 10/27/16: Save these objects to cache here
    if not os.path.exists(cache_path):
       os.mkdir(cache_path)
    np.save(os.path.join(cache_path, 'X_train_full.npy'), X_train_full )
    np.save(os.path.join(cache_path, 'Y_train_full.npy'), Y_train_full )
    np.save(os.path.join(cache_path, 'X_valid_full.npy'), X_valid_full )
    np.save(os.path.join(cache_path, 'Y_valid_full.npy'), Y_valid_full )
    np.save(os.path.join(cache_path, 'dates_train.npy'), dates_train )