def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False): """ Just compute the error f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 denom = 0 data_mean = numpy.array(data[1]).mean() for _, valid_index in iterator: # TODO: This is not very efficient I should check x, y = prepare_data([data[0][t] for t in valid_index], numpy.array(data[1])[valid_index], model_options['n_iter'], model_options['n_input']) rx, _ = prepare_data([data[0][t][::-1] for t in valid_index], numpy.array(data[1])[valid_index], model_options['n_iter'], model_options['n_input']) preds = f_pred(x, rx) targets = numpy.array(data[1])[valid_index] valid_err += tensor.sum((targets - preds.T)**2) denom += ((numpy.array(data[1]) - data_mean)**2).sum() #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) valid_err = 1. - (valid_err / denom) return valid_err.eval()
def pred_probs(f_pred, prepare_data, data, model_options, verbose=False): """ If you want to use a trained model, this is useful to compute the probabilities of new examples. """ n_samples = len(data) x, y = prepare_data(data, numpy.array([]), model_options['n_iter'], model_options['n_input']) rx, _ = prepare_data(data[:][::-1], numpy.array([]), model_options['n_iter'], model_options['n_input']) pred = f_pred(x, rx) return pred
def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False): """ Just compute the error f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 for _, valid_index in iterator: # TODO: This is not very efficient I should check x, y = prepare_data([data[0][t] for t in valid_index], numpy.array(data[1])[valid_index], model_options['n_iter'], model_options['n_input']) preds = f_pred(x) targets = numpy.array(data[1])[valid_index] # or tensor.sum valid_err += ((targets - preds.T)**2).sum() #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) valid_err = numpy_floatX(valid_err) / len(data[0]) return valid_err
def pred_probs(f_pred, prepare_data, data, model_options, verbose=False): """ If you want to use a trained model, this is useful to compute the probabilities of new examples. """ n_samples = len(data) x,y = prepare_data(data, numpy.array([]), model_options['n_iter'], model_options['n_input']) pred = f_pred(x) return pred
def backforecast(f_pred, data, model_options): """ Compute the amount of times in which the RNN correctly predict a up or down trend """ # TODO: Use the prepare data x, y = prepare_data(data[0], data[1], model_options['n_iter'], model_options['n_input']) rx, _ = prepare_data(data[0][:][::-1], data[1], model_options['n_iter'], model_options['n_input']) targets = (y > x[-1, :, 0]) #TODO: not need for this asarray preds = f_pred(numpy.asarray(x, dtype='float32'), rx) preds_up = (preds[:, 0] > x[-1, :, 0]) err = (targets <> preds_up).sum() ret = float(err) / float(len(data[0])) return ret
def backforecast(f_pred, data, model_options): """ Compute the amount of times in which the RNN correctly predict a up or down trend """ # TODO: Use the prepare data x, y = prepare_data(data[0], data[1], model_options['n_iter'], model_options['n_input']) targets = (y > x[-1,:,0]) preds = f_pred(numpy.asarray(x,dtype='float32')) preds_up = (preds[:,0] > x[-1,:,0]) err = (targets <> preds_up).sum() ret = float(err) / float(len(data[0])) return ret
def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False): """ Just compute the error f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 for _, valid_index in iterator: # TODO: This is not very efficient I should check x, y = prepare_data([data[0][t] for t in valid_index], numpy.array(data[1])[valid_index], model_options['n_iter'],model_options['n_input']) preds = f_pred(x) targets = numpy.array(data[1])[valid_index] valid_err += tensor.sum((targets-preds.T)**2) #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) valid_err = valid_err / len(data[0]) return valid_err.eval()
def R_score(f_pred, prepare_data, data, iterator, model_options, verbose=False): """ Compute R score f_pred: Theano fct computing the prediction prepare_data: usual prepare_data for that dataset. """ valid_err = 0 denom = 0 data_mean = numpy.array(data[1]).mean() for _, valid_index in iterator: # TODO: This is not very efficient I should check x, y = prepare_data([data[0][t] for t in valid_index], numpy.array(data[1])[valid_index], model_options['n_iter'],model_options['n_input']) preds = f_pred(x) targets = numpy.array(data[1])[valid_index] valid_err += tensor.sum((targets-preds.T)**2) denom += ((numpy.array(data[1]) - data_mean)**2).sum() #valid_err = 1. - numpy.float32(valid_err) / len(data[0]) valid_err = 1. - (valid_err / denom) return valid_err.eval()
def train_lstm( dim_proj=32, # word embeding dimension and LSTM number of hidden units. patience=10, # Number of epoch to wait before early stop if no progress max_epochs=150, # The maximum number of epoch to run dispFreq=40, # Display to stdout the training progress every N updates decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.1, # Learning rate for sgd (not used for adadelta and rmsprop) n_input=4, # Vocabulary size optimizer=mom_sgd, # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). encoder='lstm', # TODO: can be removed must be lstm. validFreq=20, # Compute the validation error after this number of update. saveFreq=20, # Save the parameters after every saveFreq updates maxlen=100, # Sequence longer then this get ignored batch_size=50, # The batch size during training. valid_batch_size=64, # The batch size used for validation/test set. exchange='AUDJPY', # Parameter for extra option noise_std=0., use_dropout=False, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model="", # Path to a saved model we want to start from. sum_pool=False, mom_start=0.5, mom_end=0.99, mom_epoch_interval=300, learning_rate_decay=0.99995, nlayers=3, #learning_rate_decay=0.98, predict=False, input_pred=None): model_path = "/user/j/jgpavez/rnn_trading/models/" data_path = "/user/j/jgpavez/rnn_trading/data/" saveto = exchange + '_model_deep.npz' params_file = exchange + '_params_deep.npz' dataset = exchange + '_hour.csv' saveto = os.path.join(model_path, saveto) params_file = os.path.join(data_path, params_file) ydim = 1 n_iter = 50 # Model options model_options = locals().copy() if predict == True: return predict_lstm(input_pred, model_options) print "model options", model_options print 'Loading data' train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file) #YDIM?? #number of labels (output) theano.config.optimizer = 'None' print 'Building model' # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params(saveto, params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, y, f_pred_prob, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U']**2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, y], cost, name='f_cost') grads = tensor.grad(cost, wrt=tparams.values()) f_grad = theano.function([x, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, y, cost) print 'Optimization' kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, shuffle=True) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.clock() mom = 0 try: for eidx in xrange(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) if eidx < model_options['mom_epoch_interval']: mom = model_options['mom_start']*\ (1.0 - eidx/model_options['mom_epoch_interval'])\ + mom_end*(eidx/model_options['mom_epoch_interval']) else: mom = mom_end for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index] x = [train[0][t] for t in train_index] # Get the data in numpy.ndarray formet. # It return something of the shape (minibatch maxlen, n samples) x, y = prepare_data(x, y, model_options['n_iter'], model_options['n_input']) if x is None: print 'Minibatch with zero sample under length ', maxlen continue n_samples += x.shape[1] cost = f_grad_shared(x, y) f_update(lrate, mom) #decay #TODO: CHECK THIS LEARNING RATE #lrate = learning_rate_decay*lrate if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost #decay #TODO: CHECK THIS LEARNING RATE lrate = learning_rate_decay * lrate if numpy.mod(eidx, validFreq) == 0: use_noise.set_value(0.) #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) bckfr_err = backforecast(f_pred_prob, test, model_options) history_errs.append([valid_err, test_err]) if (eidx == 0 or test_err <= numpy.array(history_errs)[:, 1].min()): best_p = unzip(tparams) bad_counter = 0 print('Valid ', valid_err, 'Test ', test_err, 'Backfore ', bckfr_err) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.mod(eidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print 'Done' print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.clock() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) bckfr_err = backforecast(f_pred_prob, test, model_options) print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err
def train_lstm( #dim_proj=32, # word embeding dimension and LSTM number of hidden units. dim_proj=124, # word embeding dimension and LSTM number of hidden units. patience=10, # Number of epoch to wait before early stop if no progress max_epochs=150, # The maximum number of epoch to run dispFreq=40, # Display to stdout the training progress every N updates decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.1, # Learning rate for sgd (not used for adadelta and rmsprop) n_input = 4, # Vocabulary size optimizer=mom_sgd, # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). encoder='lstm', # TODO: can be removed must be lstm. tick='hour', validFreq=5, # Compute the validation error after this number of update. saveFreq=5, # Save the parameters after every saveFreq updates maxlen=100, # Sequence longer then this get ignored batch_size=50, # The batch size during training. valid_batch_size=50, # The batch size used for validation/test set. exchange='AUDJPY', # Parameter for extra option noise_std=0., use_dropout=False, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model="", # Path to a saved model we want to start from. sum_pool = False, mom_start = 0.5, mom_end = 0.99, mom_epoch_interval = 60, learning_rate_decay=0.99995, #learning_rate_decay=0.98, predict=False, input_pred=None ): ''' Main function for LSTM training ''' model_path = "/user/j/jgpavez/rnn_trading/models/" data_path = "/user/j/jgpavez/rnn_trading/data/" log_path = "/user/j/jgpavez/rnn_trading/logs/" saveto = exchange + '_model.npz' params_file = exchange + '_params.npz' dataset = exchange + '_{0}.csv'.format(tick) saveto = os.path.join(model_path, saveto) params_file = os.path.join(data_path, params_file) ydim = 1 #n_iter = 10 n_iter = 24 # Model options model_options = locals().copy() if predict == True: return predict_lstm(input_pred, model_options) print "model options", model_options print 'Loading data' train, valid, test, mean, std = read_data(max_len=n_iter, path=dataset, params_file=params_file,min=(tick=='minute')) #YDIM?? #number of labels (output) theano.config.optimizer = 'None' print 'Building model' # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params(saveto, params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, y, f_pred_prob, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U']**2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, y], cost, name='f_cost') grads = tensor.grad(cost, wrt=tparams.values()) f_grad = theano.function([x, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, y, cost) print 'Optimization' kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, shuffle=True) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True) print "%d train examples" % len(train[0]) print "%d valid examples" % len(valid[0]) print "%d test examples" % len(test[0]) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0])/batch_size if saveFreq == -1: saveFreq = len(train[0])/batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.clock() mom = 0 try: for eidx in xrange(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) if eidx < model_options['mom_epoch_interval']: mom = model_options['mom_start']*\ (1.0 - eidx/model_options['mom_epoch_interval'])\ + mom_end*(eidx/model_options['mom_epoch_interval']) else: mom = mom_end for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index] x = [train[0][t]for t in train_index] # Get the data in numpy.ndarray formet. # It return something of the shape (minibatch maxlen, n samples) x, y = prepare_data(x, y, model_options['n_iter'],model_options['n_input']) if x is None: print 'Minibatch with zero sample under length ', maxlen continue n_samples += x.shape[1] cost = f_grad_shared(x, y) f_update(lrate,mom) #decay #TODO: CHECK THIS LEARNING RATE #lrate = learning_rate_decay*lrate if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: with open(log_path + 'log_{0}_{0}.log'.format(dim_proj, n_iter), 'a') as log_file: log_file.write('Epoch {0} Update {1} Cost {2}\n'.format(eidx, uidx, cost)) #decay #TODO: CHECK THIS LEARNING RATE lrate = learning_rate_decay*lrate if numpy.mod(eidx, validFreq) == 0: use_noise.set_value(0.) #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) #bckfr_err = backforecast(f_pred_prob, test, model_options) #r2_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options) bckfr_err = 0. r2_score = 0. #history_errs.append([valid_err, test_err]) history_errs.append([valid_err, bckfr_err]) if (eidx == 0 or test_err <= numpy.array(history_errs)[:, 1].min()): #bckfr_err <= numpy.array(history_errs)[:, # 1].min()): best_p = unzip(tparams) bad_counter = 0 with open(log_path + 'log_{0}_{0}.log'.format(dim_proj, n_iter), 'a') as log_file: log_file.write('Valid {0} Test {1}\n'.format(valid_err,test_err)) print('Valid',valid_err, 'Test ', test_err, 'Backfore ', bckfr_err, 'R2 score ', r2_score) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.mod(eidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print 'Done' print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.clock() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options) valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options) test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options) #bckfr_err = backforecast(f_pred_prob, test, model_options) #r2_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options) r2_score= 0. bckfr_err = 0. print 'Valid ', valid_err, 'Test ', test_err, 'Backforecasting ', bckfr_err, ' R2 score: ', r2_score numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print 'The code run for %d epochs, with %f sec/epochs' % ( (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err