Ejemplo n.º 1
0
def data_for_sumsharpeLoss(markets, date_train_end, lookback, lookahead,
                           sd_window):
    # it is not necessary to  match by date
    test_dict = {}
    train_dict = {}
    # getting common dates:
    data = pd.read_csv(datadir % markets[0])
    for i in range(1, len(markets), 1):
        data1 = pd.read_csv(datadir % markets[i])
        data = pd.merge(data, data1, on='dtStart', how='inner')
    dates = data[['dtStart']]

    x = pd.read_csv(datadir % markets[0])
    x = pd.merge(x, dates, on='dtStart', how='inner')
    curr_market_data = dataProcessing.time_series_toMatrix(
        x,
        date_train_end,
        lookback=lookback,
        look_ahead=lookahead,
        sd_window=sd_window)
    train_dict[markets[0]] = copy.deepcopy(curr_market_data[:4])
    test_dict[markets[0]] = copy.deepcopy(curr_market_data[4:])
    for i in range(1, len(markets), 1):
        x = pd.read_csv(datadir % markets[i])
        x = pd.merge(x, dates, on='dtStart', how='inner')
        curr_market_data = dataProcessing.time_series_toMatrix(
            x,
            date_train_end,
            lookback=lookback,
            look_ahead=lookahead,
            sd_window=sd_window)
        train_dict[markets[i]] = copy.deepcopy(curr_market_data[:4])
        test_dict[markets[i]] = copy.deepcopy(curr_market_data[4:])
    return train_dict, test_dict
Ejemplo n.º 2
0
def data_gen(data, lookback, look_ahead, weired_feat=True, skip=0, every=0):
    if weired_feat:
        lookback = 105
        trainX, trainY, train_prediction_start, testX, testY, test_prediction_start = \
            dataProcessing.time_series_toMatrixWeiredModoFeat(data,lookback=lookback, look_ahead=5,
                                       train_inds=(564,4070), test_inds=(4070, 6489),every=every)
    else:
        trainX, trainY, train_prediction_start, testX, testY, test_prediction_start = \
            dataProcessing.time_series_toMatrix(data,skip=skip,lookback=lookback, look_ahead=look_ahead,
                         train_inds=(564,4070), test_inds=(4070, 6489),every=every)

    return trainX, trainY,train_prediction_start, testX, testY,test_prediction_start
def dataPrep(datadir, markets, dates):
    # dates are common dates between all markets
    for i in range(0, len(markets), 1):
        data = pd.read_csv(datadir % markets[i])
        # Make sure we get data from all  markets on exact common dates
        data = pd.merge(data, dates, on='dtStart', how='inner')
        curr_market_data = \
            dataProcessing.time_series_toMatrix(data, date_train_end)
        if i == 0:
            trainX = curr_market_data[0]
            trainY = curr_market_data[1]
            train_Y_op_opD = curr_market_data[2]
            train_dates = curr_market_data[3]
            testX = curr_market_data[4]
            testY = curr_market_data[5]
            test_Y_op_opD = curr_market_data[6]
            test_dates = curr_market_data[7]
        else:
            trainX = np.append(trainX,
                               copy.deepcopy(curr_market_data[0]),
                               axis=0)
            trainY = np.dstack((trainY, copy.deepcopy(curr_market_data[1])))
            train_Y_op_opD = np.dstack(
                (train_Y_op_opD, copy.deepcopy(curr_market_data[2])))
            testX = np.append(testX,
                              copy.deepcopy(curr_market_data[4]),
                              axis=0)
            testY = np.dstack((testY, copy.deepcopy(curr_market_data[5])))
            test_Y_op_opD = np.dstack(
                (test_Y_op_opD, copy.deepcopy(curr_market_data[6])))

    trainY = np.transpose(trainY, [2, 1, 0])
    trainY = np.reshape(trainY, trainY.shape[:2])
    train_Y_op_opD = np.transpose(train_Y_op_opD, [2, 1, 0])
    train_Y_op_opD = np.reshape(train_Y_op_opD, train_Y_op_opD.shape[:2])
    testY = np.transpose(testY, [2, 1, 0])
    testY = np.reshape(testY, testY.shape[:2])
    test_Y_op_opD = np.transpose(test_Y_op_opD, [2, 1, 0])
    test_Y_op_opD = np.reshape(test_Y_op_opD, test_Y_op_opD.shape[:2])
    train_data = (trainX, trainY, train_Y_op_opD, train_dates)
    test_data = (testX, testY, test_Y_op_opD, test_dates)
    return train_data, test_data
Ejemplo n.º 4
0
def data_gen(data, lookback, look_ahead, skip=0,lookbackMethod=1,
             every=0,scale=False,feat_inds=[],scaler='MinMaxScaler'):
    if lookbackMethod==2:
        lookback = 105
        trainX, trainY, train_prediction_start, testX, testY, test_prediction_start = \
            dataProcessing.time_series_toMatrixWeiredModoFeat(data, lookback=lookback,
                                                              look_ahead=look_ahead,
                                                              train_inds=(564, 4070),
                                                              test_inds=(4070, 6489),
                                                              every=every,scale=scale,
                                                              feat_inds=feat_inds,scaler=scaler)
    else:
        trainX, trainY, train_prediction_start, testX, testY, test_prediction_start = \
            dataProcessing.time_series_toMatrix(data, skip=skip, lookback=lookback,
                                                look_ahead=look_ahead,
                                                train_inds=(564, 4070),
                                                test_inds=(4070, 6489),
                                                every=every,scale=scale,
                                                feat_inds=feat_inds,scaler=scaler)

    return trainX, trainY, train_prediction_start, testX, testY, test_prediction_start
Ejemplo n.º 5
0
def myMainFunc(market, hidden_size, l2Reg, Files_Folder, learning_rate_grid, epoch_grid):

    # objective function and optimizer
    objective = sharpeLoss
    curr_optimizer = tf.train.AdamOptimizer

    # data parameters
    lookback = 30
    lookahead = 1
    rolling_sd_window = 100
    network_activation = tf.nn.tanh
    test_start_date = 20070418
    random_start_indicies = np.arange(1, 11, 1)
    nRandom_start = len(random_start_indicies)
    batch_size = 100

    # loading data
    datadir = 'C:/behrouz/Projects/DailyModels_new/NeuralNet/tf-SQ-only/data/%s_Commision-and-Slippage-limits.csv'

    data = pd.read_csv(datadir % market)

    curr_market_data = \
        dataProcessing.time_series_toMatrix(data,  test_start_date,
                                            lookback=lookback,
                                            look_ahead=lookahead, sd_window=rolling_sd_window)

    train = curr_market_data[:4]
    test = curr_market_data[4:]

    total_batches = train[0].shape[0] // batch_size
    decay_steps = total_batches
    decay_rate = 0.99

    for LR in learning_rate_grid:

        for training_epochs in epoch_grid:

            market_trainPred = np.zeros((train[0].shape[0], nRandom_start + 2))
            market_testPred = np.zeros((test[0].shape[0], nRandom_start + 2))
            total_loss_matrix = np.zeros((nRandom_start, 6))


            market_trainPred[:, 0] = train[3]  # date
            market_trainPred[:, 1] = train[2]  # 1 day return
            market_testPred[:, 0] = test[3]
            market_testPred[:, 1] = test[2]

            for R in range(len(random_start_indicies)):
                print('Hidden Size =', hidden_size, 'Learning rate=', LR,
                      'TrainingEpochs=', training_epochs, 'L2 Reg=', l2Reg, 'Random Start=', R)

                weights = {

                     'h1': initializers.xavier_from_tf_initializer([lookback, hidden_size], name='W_1'),
                     'out': initializers.xavier_from_tf_initializer([hidden_size, 1], name='W_out')
                         }
                biases = {

                    'b1': initializers.bias_initializer([hidden_size], name='B_1')
                    #, 'out': initializers.bias_initializer([1], name='B_out')
                        }
                # placeholders
                x = tf.placeholder(tf.float32, [None, lookback])
                y = tf.placeholder(tf.float32, [None])

                optimizer, output, sharpe_loss, sharpe_plus_l2_loss, l2Loss, ema = \
                    MLP_1layerFixedOutBias(x, y, weights, biases, curr_optimizer,
                                           objective, network_activation, l2Reg, l2Reg,
                                           LR, decay_steps, decay_rate)

                # Getting EMA var names:
                ema_dict = {}
                for var in tf.trainable_variables():
                    ema_var_name = ema.average_name(var)
                    ema_dict[ema_var_name] = var
                saver = tf.train.Saver(ema_dict)

                #saver = tf.train.Saver()

                with tf.Session() as sess:
                    try:
                        source_model_loc = 'C:/behrouz/Projects/DailyModels_new/NeuralNet/' \
                                           '/tf-SQ-only/%s/' % Files_Folder

                        saver.restore(sess,
                                      source_model_loc + 'MLP-checkpointFiles/' + str(R + 1) +
                                      '/run%d-s-%d-LR-%.6f-epoch-%d-l2-%.5f.ckpt'
                                      % (R + 1, hidden_size, LR, training_epochs, l2Reg))
                        #print(weights['h1'].eval())

                    except IOError:
                        print('Could not find the checkpoint file, filling with previous model..')

                    trainPred, train_loss, train_total_loss, trainL2_loss = \
                        sess.run([output, sharpe_loss, sharpe_plus_l2_loss, l2Loss],
                                 feed_dict={x: train[0], y: train[1]})
                    trainPred = trainPred[:, 0]

                    testPred, test_loss, test_total_loss, test_l2_loss = \
                        sess.run([output, sharpe_loss, sharpe_plus_l2_loss, l2Loss],
                                 feed_dict={x: test[0], y: test[1]})

                    testPred = testPred[:, 0]

                    market_trainPred[:, R + 2] = trainPred
                    market_testPred[:, R + 2] = testPred
                    total_loss_matrix[R, 0:3] = train_loss, trainL2_loss, train_total_loss
                    total_loss_matrix[R, 3:] = test_loss, test_l2_loss, test_total_loss

                tf.reset_default_graph()

            total_loss_matrix_colnames = ['train_loss', 'train_l2_loss', 'train_total_loss',
                                          'test_loss', 'test_l2_loss', 'test_total_loss']
            total_loss_matrix = pd.DataFrame(total_loss_matrix, columns=total_loss_matrix_colnames)

            total_loss_matrix.to_csv('./Results/%s-loss-s-%d-LR-%.6f-epoch-%d-l2-%.5f.csv'
                                     % (market, hidden_size, LR, training_epochs, l2Reg),
                                     index=False)

            predsCols = ['dtStart', '%s-y-true' % market]
            predsCols.extend(['%s-pred%d' % (market, j) for j in range(1, nRandom_start + 1, 1)])

            market_trainPred = pd.DataFrame(market_trainPred, columns=predsCols)
            market_trainPred.to_csv('./Results/%s-trainPreds-s1-%d-LR-%.6f-epoch-%d-l2-%.5f.csv'
                                    % (market, hidden_size, LR, training_epochs, l2Reg),
                                    index=False)

            market_testPred = pd.DataFrame(market_testPred, columns=predsCols)
            market_testPred.to_csv('./Results/%s-testPreds-s1-%d-LR-%.6f-epoch-%d-l2-%.5f.csv'
                                   % (market, hidden_size, LR, training_epochs, l2Reg),
                                   index=False)
Ejemplo n.º 6
0
def myMainFunc(rand_start, hidden_size, l2Reg, learning_rate_grid, epoch_grid):

    #random.seed(seeds[rand_start - 1])
    #np.random.seed(seeds[rand_start - 1])
    #tf.set_random_seed(seeds[rand_start - 1])
    market = 'SQ'

    if not os.path.exists('./MLP-checkpointFiles/' + str(rand_start)):
        os.makedirs('./MLP-checkpointFiles/' + str(rand_start))

    # objective function
    objective = sharpeLoss
    curr_optimizer = tf.train.AdamOptimizer
    network_activation = tf.nn.tanh

    # data parameters
    lookback = 30
    lookahead = 1
    rolling_sd_window = 100

    # training parameters:
    batch_size = 100
    test_start_date = 20070418

    patience = 20  # stop training if  train loss does not improve after 20 epochs
    counter = 0
    best_train_loss = np.inf

    # loading data
    datadir = 'C:/behrouz/Projects/DailyModels_new/NeuralNet/tf-SQ-only/data/%s_Commision-and-Slippage-limits.csv'

    # get the common dates and then merge each data making sure they have common dates:
    data = pd.read_csv(datadir % market)

    curr_market_data = \
            dataProcessing.time_series_toMatrix(data,  test_start_date, lookback = lookback,
                                                look_ahead = lookahead, sd_window = rolling_sd_window)
    trainX, trainY, train_ret_1day, train_dates = curr_market_data[:4]

    total_batches = trainX.shape[0] // batch_size
    rem = trainX.shape[0] % batch_size

    decay_steps = total_batches
    decay_rate = 1.0

    for LR in learning_rate_grid:

        for training_epochs in epoch_grid:
            print('Hidden Size =', hidden_size, 'Learning rate=', LR,
                  'TrainingEpochs=', training_epochs, 'L2 Reg=', l2Reg)

            weights = {
                'h1':
                initializers.xavier_from_tf_initializer(
                    [lookback, hidden_size], name='W_1'),
                'out':
                initializers.xavier_from_tf_initializer([hidden_size, 1],
                                                        name='W_out')
            }

            biases = {
                'b1': initializers.bias_initializer([hidden_size], name='B_1')
                #, 'out': initializers.bias_initializer([1], name='B_out')
            }
            # placeholders
            x = tf.placeholder(tf.float32, [None, lookback])
            y = tf.placeholder(tf.float32, [None])

            train_op, output, sharpe_plus_l2_loss, classification_loss = MLP_1layer_fixedBiasOut_sigmoid(
                x, y, weights, biases, curr_optimizer, objective,
                network_activation, l2Reg, l2Reg, LR, decay_steps, decay_rate)

            # initialize all tensors- to be run in Session!
            init = tf.global_variables_initializer()

            # saver for restoring the whole model graph of tensors from the  checkpoint file
            saver = tf.train.Saver()

            # launch default graph:
            with tf.Session() as sess:

                sess.run(init)

                # training cycle:
                for epoch in range(training_epochs):

                    # shuffle the training data at the beginning of each epoch!
                    # for now I turn this off to get very consistent results
                    a = np.arange(trainX.shape[0])
                    np.random.shuffle(a)
                    trainX = trainX[a, :]
                    trainY = trainY[a]
                    # loop over all batches:
                    for batch_number in range(total_batches):

                        if (batch_number + 1) == total_batches and rem != 0:
                            xBatch = trainX[(total_batches - 1) * batch_size +
                                            rem:, :]
                            trainY_batch = trainY[(total_batches - 1) *
                                                  batch_size + rem:]

                        else:
                            xBatch = trainX[batch_number *
                                            batch_size:(batch_number + 1) *
                                            batch_size, :]
                            trainY_batch = trainY[batch_number *
                                                  batch_size:(batch_number +
                                                              1) * batch_size]

                        # run optimization
                        _ = sess.run(train_op,
                                     feed_dict={
                                         x: xBatch,
                                         y: trainY_batch
                                     })

                    #curr_loss = sess.run(sharpe_plus_l2_loss, feed_dict={x: trainX, y: trainY})
                    curr_loss, curr_classification_loss = sess.run(
                        [sharpe_plus_l2_loss, classification_loss],
                        feed_dict={
                            x: trainX,
                            y: trainY
                        })
                    print('=' * 20)
                    print('Epoch=', epoch, 'Current Train Loss=', curr_loss,
                          'Best Train Loss=', best_train_loss)
                    print('Epoch=', epoch, 'Classification Loss=',
                          curr_classification_loss)

                    if curr_loss < best_train_loss:
                        counter = 0
                        best_train_loss = curr_loss
                        saver.save(
                            sess, './MLP-checkpointFiles/' + str(rand_start) +
                            '/run%d-s-%d-LR-%.6f-epoch-%d-l2-%.5f.ckpt' %
                            (rand_start, hidden_size, LR, training_epochs,
                             l2Reg))

                    else:
                        counter += 1
                    if counter >= patience:
                        break

            # resetting the graph to be built again in the next iteration of for loop
            tf.reset_default_graph()
Ejemplo n.º 7
0
trans_data = trans_data.values
transCost_dict = dict(zip(trans_data[:, 0], trans_data[:, 1]))

test_dict = {}
train_dict = {}

for i in range(0, len(markets), 1):

    data = pd.read_csv(datadir % markets[i])

    # Make sure we get data from all  markets on exact common dates
    data = pd.merge(data, dates1, on='dtStart', how='inner')
    data = pd.merge(data, dates2, on='dtStart', how='inner')

    curr_market_data = \
        dataProcessing.time_series_toMatrix(data, 20070418, lookback= lookback, look_ahead=lookahead,
                                            sd_window= rolling_sd_window)

    train_dict[markets[i]] = copy.deepcopy(curr_market_data[:4])
    test_dict[markets[i]] = copy.deepcopy(curr_market_data[4:])

total_batches = train_dict[markets[0]][0].shape[0] // batch_size
rem = train_dict[markets[0]][0].shape[0] % batch_size
print('TOTAL BATCHES+++++++++++++++++++++++', total_batches)

train_basket_sharpes_array = np.zeros(training_epochs)
test_basket_sharpes_array = np.zeros(training_epochs)

train_indiv_sharpes = np.zeros((len(markets), training_epochs, 3))
test_indiv_sharpes = np.zeros((len(markets), training_epochs, 3))

for R in range(nRandom_start):
Ejemplo n.º 8
0
def myparralelFunc(LR_gird, l2Reg, dates, objective, results_path):
    for i in range(0, len(markets), 1):
        data = pd.read_csv(datadir % markets[i])
        # Make sure we get data from all  markets on exact common dates
        data = pd.merge(data, dates, on='dtStart', how='inner')
        curr_market_data = dataProcessing.time_series_toMatrix(
            data, date_train_end)
        if i == 0:
            trainX = curr_market_data[0]
            trainY = curr_market_data[1]
            train_Y_op_opD = curr_market_data[2]
            train_dates = curr_market_data[3]
            testX = curr_market_data[4]
            testY = curr_market_data[5]
            test_Y_op_opD = curr_market_data[6]
            test_dates = curr_market_data[7]
        else:
            trainX = np.append(trainX,
                               copy.deepcopy(curr_market_data[0]),
                               axis=0)
            trainY = np.dstack((trainY, copy.deepcopy(curr_market_data[1])))
            train_Y_op_opD = np.dstack(
                (train_Y_op_opD, copy.deepcopy(curr_market_data[2])))
            testX = np.append(testX,
                              copy.deepcopy(curr_market_data[4]),
                              axis=0)
            testY = np.dstack((testY, copy.deepcopy(curr_market_data[5])))
            test_Y_op_opD = np.dstack(
                (test_Y_op_opD, copy.deepcopy(curr_market_data[6])))

    trainY = np.transpose(trainY, [2, 1, 0])
    trainY = np.reshape(trainY, trainY.shape[:2])
    train_Y_op_opD = np.transpose(train_Y_op_opD, [2, 1, 0])
    train_Y_op_opD = np.reshape(train_Y_op_opD, train_Y_op_opD.shape[:2])
    testY = np.transpose(testY, [2, 1, 0])
    testY = np.reshape(testY, testY.shape[:2])
    test_Y_op_opD = np.transpose(test_Y_op_opD, [2, 1, 0])
    test_Y_op_opD = np.reshape(test_Y_op_opD, test_Y_op_opD.shape[:2])

    print(trainX.shape, trainY.shape)
    print(testX.shape, testY.shape)
    print('====')
    train_loss_mat = np.zeros((len(LR_gird), training_epochs))
    for i in range(len(LR_gird)):
        init_lr = LR_gird[i]
        random.seed(12345)
        np.random.seed(12345)
        tf.set_random_seed(12345)

        weights = {
            'out':
            initializers.xavier_from_tf_initializer([hidden1_size, 1],
                                                    name='W_out')
        }
        biases = {'out': initializers.bias_initializer([1], name='B_out')}
        # placeholders
        x = tf.placeholder(tf.float32, [len(markets), None, features])
        y = tf.placeholder(tf.float32, [len(markets), None])
        learning_rate = tf.placeholder(tf.float32)
        keep_prob = tf.placeholder(tf.float32)

        optimizer, output, sharpe_plus_l2_loss = vanilla_RNN(
            x,
            y,
            weights,
            biases,
            keep_prob,
            curr_optimizer,
            learning_rate,
            objective,
            markets=markets,
            activation=tf.nn.tanh,
            l2Reg=l2Reg,
            hidden_size=hidden1_size,
            n_layers=1)
        # initialize all tensors- to be run in Session!
        init = tf.global_variables_initializer()
        # saver for restoring the whole model graph of
        #  tensors from the  checkpoint file
        saver = tf.train.Saver()

        # launch default graph:
        with tf.Session() as sess:
            sess.run(init)
            # training cycle:
            decay_exponent = 1
            for epoch in range(training_epochs):
                if epoch >= stationary_epochs:
                    LR = init_lr * (decya_fact**decay_exponent)
                    decay_exponent += 1
                    feed_dict = {
                        x: trainX,
                        y: train_Y_op_opD,
                        learning_rate: LR,
                        keep_prob: dropout
                    }
                else:
                    feed_dict = {
                        x: trainX,
                        y: train_Y_op_opD,
                        learning_rate: init_lr,
                        keep_prob: dropout
                    }

                _ = sess.run(optimizer, feed_dict=feed_dict)

                train_loss = sess.run(sharpe_plus_l2_loss,
                                      feed_dict={
                                          x: trainX,
                                          y: train_Y_op_opD,
                                          keep_prob: 1.
                                      })
                print('L2 reg=', l2Reg, 'Epoch=', epoch, 'TrainLoss= ',
                      train_loss)
                train_loss_mat[i, epoch] = train_loss

            saver.save(
                sess, results_path + '/checkpointFiles/' + str(l2Reg) +
                '/checkPoint-LR-%.6f-l2-%.4f.ckpt' % (init_lr, l2Reg))

    # resetting the graph to be built again in the next iteration of for loop
        tf.reset_default_graph()
    return train_loss_mat
Ejemplo n.º 9
0
def myMainFunc(random_start_indicies):
    markets = ('SQ', 'MQ', 'NQ', 'DQ', 'RN')

    # objective function
    # objective = losses_and_metrics.sum_sharpeLoss
    objective = losses_and_metrics.sharpeLoss
    # objective = losses_and_metrics.basket_trading_pos_size_sharpeLoss

    curr_optimizer = tf.train.AdamOptimizer
    # curr_optimizer = tf.train.RMSPropOptimizer

    # data parameters
    Y_toUse = 1  # 1: scaled return, 2:1-day return
    lookback = 30
    lookahead = 1
    rolling_sd_window = 100

    # training parameters:

    batch_size = 100
    # network parameters:
    network_activation = tf.nn.tanh
    dropout = 1.

    input_feats = lookback
    test_start_date = 20070418

    hidden_size_grid = [5, 10, 15, 20]
    learning_rate_grid = [0.001, 0.0005, 0.0001, 0.00005]
    epoch_grid = [50, 100, 150, 200, 300]
    l2_grid = [0, 0.01, 0.1, 1, 5]
    valid_frac = 0.2

    # loading data
    datadir = 'C:/behrouz/Projects/DailyModels_new/NeuralNet/hyper-param-optimization/tf-hyperParam-opt/data/%s_Commision-and-Slippage-limits.csv'

    # get the common dates and then merge each data making sure they have common dates:

    data = pd.read_csv(datadir % markets[0])
    for i in range(1, len(markets), 1):
        data1 = pd.read_csv(datadir % markets[i])
        data = pd.merge(data, data1, on='dtStart', how='inner')

    dates = data[['dtStart']]

    # getting random piece but common indicies from train  as validation

    test_start_ind = int(np.where(dates.values == test_start_date)
                         [0]) - rolling_sd_window - lookback - lookahead
    inds = np.arange(test_start_ind)

    valid_inds = pd.read_csv('Validation_indicies.csv').values
    valid_inds = valid_inds.flatten()

    #valid_inds = np.random.choice(inds, size=int(valid_frac * test_start_ind), replace=False)
    #valid_inds = np.sort(valid_inds)
    # writing validation indicies to file
    # valid_inds_df = pd.DataFrame(valid_inds)
    #valid_inds_df.to_csv('Validation_indicies.csv', index=False)
    train_inds = [i for i in inds if i not in valid_inds]
    test_dict = {}
    train_dict = {}
    validation_dict = {}

    for i in range(0, len(markets), 1):
        data = pd.read_csv(datadir % markets[i])

        # Make sure we get data from all  markets on exact common dates
        data = pd.merge(data, dates, on='dtStart', how='inner')

        curr_market_data = \
            dataProcessing.time_series_toMatrix(data, train_inds, valid_inds , 20070418, lookback = lookback,
                                                look_ahead = lookahead, sd_window = rolling_sd_window)

        train_dict[markets[i]] = copy.deepcopy(curr_market_data[:4])
        validation_dict[markets[i]] = copy.deepcopy(curr_market_data[4:8])
        test_dict[markets[i]] = copy.deepcopy(curr_market_data[8:])

    total_batches = train_dict[markets[0]][0].shape[0] // batch_size
    rem = train_dict[markets[0]][0].shape[0] % batch_size

    print('TOTAL BATCHES+++++++++++++++++++++++', total_batches)

    for R in random_start_indicies:
        print('RUN %d optimization begins..' % R)

        for hidden_size in hidden_size_grid:

            for LR in learning_rate_grid:

                for training_epochs in epoch_grid:

                    for l2Reg in l2_grid:

                        print('Hidden Size =', hidden_size, 'Learning rate=',
                              LR, 'TrainingEpochs=', training_epochs,
                              'L2 Reg=', l2Reg)

                        weights = {
                            'h1':
                            initializers.xavier_from_tf_initializer(
                                [lookback, hidden_size], name='W_1'),
                            'h2':
                            initializers.xavier_from_tf_initializer(
                                [hidden_size, hidden_size], name='W_2'),
                            'out':
                            initializers.xavier_from_tf_initializer(
                                [hidden_size, 1], name='W_out')
                        }

                        biases = {
                            'b1':
                            initializers.bias_initializer([hidden_size],
                                                          name='B_1'),
                            'b2':
                            initializers.bias_initializer([hidden_size],
                                                          name='B_2'),
                            'out':
                            initializers.bias_initializer([1], name='B_out')
                        }
                        # placeholders
                        x = tf.placeholder(tf.float32, [None, input_feats])
                        y = tf.placeholder(tf.float32, [None])
                        learning_rate = tf.placeholder(tf.float32)
                        keep_prob = tf.placeholder(tf.float32)

                        optimizer, output, sharpe_plus_l2_loss = \
                            MLP_1layer(x, y, weights, biases, keep_prob, curr_optimizer, learning_rate, objective,
                                batch_size=batch_size,
                                markets=markets,
                                activation=network_activation, l2Reg=l2Reg, l2RegOutput=l2Reg * 1.,
                                l2Reg_biases=l2Reg * 1.)

                        # initialize all tensors- to be run in Session!

                        init = tf.global_variables_initializer()

                        # saver for restoring the whole model graph of
                        #  tensors from the  checkpoint file

                        saver = tf.train.Saver()

                        # launch default graph:
                        with tf.Session() as sess:

                            sess.run(init)

                            # training cycle:
                            for epoch in range(training_epochs):

                                # shuffle the training data at the begining of each epoch!

                                curr_train_dict = dataProcessing.shuffle_train_dict(
                                    train_dict, markets)

                                # loop over all batches:
                                for batch_number in range(total_batches):
                                    xBatch, trainY_batch = dataProcessing.next_batch_dict(
                                        curr_train_dict, batch_number,
                                        batch_size, rem, Y_toUse,
                                        total_batches, markets)
                                    # run optimization

                                    _ = sess.run(optimizer,
                                                 feed_dict={
                                                     x: xBatch,
                                                     y: trainY_batch,
                                                     learning_rate: LR,
                                                     keep_prob: dropout
                                                 })

                            #print(' Optimization finished! saving model graph of all tensors to file')

                            save_path = saver.save(
                                sess,
                                './MLP-checkpointFiles/run%d-s-%d-LR-%.6f-epoch-%d-l2-%.5f.ckpt'
                                % (R, hidden_size, LR, training_epochs, l2Reg))

                        # resetting the graph to be built again in the next iteration of for loop

                        tf.reset_default_graph()
    return random_start_indicies
Ejemplo n.º 10
0
def myparralelFunc(random_start_indicies, l2Reg, results_path):

    train_dict = {}
    test_dict = {}
    aug_multipliers = [2]
    aug_multipliers_val = [2]
    for i in range(len(markets)):
        data = pd.read_csv(datadir % markets[i])
        # Make sure we get data from all  markets on exact common dates
        curr_market_data = \
            dataProcessing.time_series_toMatrix(data, 20070418, lookback, lookahead)
        # print(markets[i], curr_market_data_aug[0].shape)
        if i == 0:
            trainX, trainY, trainRetY = curr_market_data[:3]
            testX, testY, testRet = curr_market_data[4:7]
        else:
            trainX = np.append(trainX, curr_market_data[0], axis=0)
            trainY = np.append(trainY, curr_market_data[1], axis=0)
            trainRetY = np.append(trainRetY, curr_market_data[2], axis=0)
            testX = np.append(testX, curr_market_data[4], axis=0)
            testY = np.append(testY, curr_market_data[5], axis=0)
            testRet = np.append(testRet, curr_market_data[6], axis=0)

    for i in range(len(markets)):
        data = pd.read_csv(datadir % markets[i])
        for aug in aug_multipliers:
            curr_market_data_aug = dataProcessing.time_series_toMatrix_AUG(
                data, 20070418, lookback, lookahead, aug)
            curr_market_data_aug_toTrain = dataProcessing.time_series_toMatrix_AUG(
                data, 20070418, lookback, lookahead, aug)

            # print(markets[i], curr_market_data_aug[0].shape)
            if i == 0 and aug == aug_multipliers[0]:
                trainX1, trainY1, retY1 = curr_market_data_aug[:3]
                trainX11, trainY11, trainRetY11 = curr_market_data_aug_toTrain[:
                                                                               3]

            else:
                trainX1 = np.append(trainX1, curr_market_data_aug[0], axis=0)
                trainY1 = np.append(trainY1, curr_market_data_aug[1], axis=0)
                retY1 = np.append(retY1, curr_market_data_aug[2], axis=0)
                trainX11 = np.append(trainX11,
                                     curr_market_data_aug_toTrain[0],
                                     axis=0)
                trainY11 = np.append(trainY11,
                                     curr_market_data_aug_toTrain[1],
                                     axis=0)
                trainRetY11 = np.append(trainRetY11,
                                        curr_market_data_aug_toTrain[2],
                                        axis=0)

        for aug in aug_multipliers_val:
            val_curr_market_data_aug = dataProcessing.time_series_toMatrix_AUG(
                data, 20070418, lookback, lookahead, aug)
            if i == 0 and aug == aug_multipliers_val[0]:
                validX1, validY1, validretY1 = val_curr_market_data_aug[:3]
            else:
                validX1 = np.append(validX1,
                                    val_curr_market_data_aug[0],
                                    axis=0)
                validY1 = np.append(validY1,
                                    val_curr_market_data_aug[1],
                                    axis=0)
                validretY1 = np.append(validretY1,
                                       val_curr_market_data_aug[2],
                                       axis=0)

    print(trainX.shape, trainY.shape)
    train_dict['TrainPurturb'] = copy.deepcopy([trainX1, trainY1, retY1])
    train_dict['Train'] = copy.deepcopy((trainX, trainY, trainRetY))
    test_dict['Test'] = copy.deepcopy((testX, testY, testRet))
    print('Shape of training data=', trainX.shape, trainY.shape,
          trainRetY.shape)
    print('Shape of training perturbed data=', trainX1.shape, trainY1.shape)
    print('Shape of validation data=', validX1.shape, validY1.shape,
          validretY1.shape)
    print('Shape of test data:', testX.shape, testRet.shape)
    finalWeights = np.zeros((total_params, len(random_start_indicies)))
    for R in random_start_indicies:
        print('RUN %d optimization begins..' % R)
        ##################################################
        # a = np.arange(trainX.shape[0])
        # np.random.shuffle(a)
        # trainX = trainX[a,:]
        # trainY = trainY[a]
        # trainRet = retY[a]
        # validInd = int(trainX.shape[0] * valFrac)
        # validX, validY,validRet = trainX[:validInd,:], trainY[:validInd], trainRet[:validInd]
        # trainX, trainY = trainX[validInd:,:], trainY[validInd:]
        ###################################################
        #trainX = np.append(trainX,trainX11, axis = 0)
        #trainY = np.append(trainY,trainY11, axis = 0)
        #trainRetY = np.append(trainRetY,trainRetY11, axis = 0)
        ############################################
        #trainX = trainX11
        #trainY = trainY11
        #trainRetY = trainRetY11
        ##################################################
        weights = {
            'h1':
            glorot_normal_weight_initializer([lookback * 2, hidden1_size]),
            'h2':
            glorot_normal_weight_initializer([hidden1_size, hidden2_size]),
            'out': glorot_normal_weight_initializer([hidden2_size, 1])
        }
        biases = {
            'b1': np.zeros(hidden1_size),
            'b2': np.zeros(hidden2_size),
            'out': np.zeros(1)
        }
        flat_params = weight_flatten(weights, biases)
        print('Length of Flattened parameters=', len(flat_params))
        # retall: returns the solution after each iteration
        finalWeights[:, R - 1] = optimize.fmin_cg(MLP,
                                                  flat_params,
                                                  fprime=None,
                                                  retall=False,
                                                  args=(trainX, trainY, l2Reg,
                                                        sharpeLoss))
    np.save(results_path + '/' + str(l2Reg) + '/FinalWeights-l2-%.2f' % l2Reg,
            finalWeights)
    # prediction and saving to file
    if not os.path.exists(results_path + '/' + str(l2Reg) + '/predictions'):
        os.makedirs(results_path + '/' + str(l2Reg) + '/predictions')
    #finalWeights = np.load(results_path +'/'+ str(l2Reg)+'/FinalWeights-l2-%.2f'%l2Reg+'.npy')
    for i in range(len(markets)):
        data = pd.read_csv(datadir % markets[i])
        # Make sure we get data from all  markets on exact common dates
        curr_market_data = dataProcessing.time_series_toMatrix(
            data, 20070418, lookback, lookahead)
        trainX = curr_market_data[0]
        testX = curr_market_data[4]
        trainMat = np.zeros((trainX.shape[0], len(random_start_indicies) + 2))
        testMat = np.zeros((testX.shape[0], len(random_start_indicies) + 2))
        trainMat[:, 0] = curr_market_data[3]
        trainMat[:, 1] = curr_market_data[2]
        testMat[:, 0] = curr_market_data[-1]
        testMat[:, 1] = curr_market_data[-2]
        for R in range(len(random_start_indicies)):
            trainMat[:, R + 2] = MLP_predict(trainX, finalWeights[:, R])
            testMat[:, R + 2] = MLP_predict(testX, finalWeights[:, R])
        predsCols = ['dtStart', '%s-y-true' % markets[i]]
        predsCols.extend([
            '%s-pred%d' % (markets[i], j)
            for j in range(1,
                           len(random_start_indicies) + 1, 1)
        ])
        market_trainPred = pd.DataFrame(trainMat, columns=predsCols)
        market_trainPred.to_csv(results_path + str(l2Reg) + '/' +
                                'predictions/' +
                                '%s-trainPreds.csv' % markets[i],
                                index=False)

        market_testPred = pd.DataFrame(testMat, columns=predsCols)
        market_testPred.to_csv(results_path + str(l2Reg) + '/predictions/' +
                               '%s-testPreds.csv' % markets[i],
                               index=False)