def run_gru(s):

    x_dims = len(x_cols[s.dataSet]) if s.dataSet in x_cols else s.lookback
    random.seed(6)
    np.random.seed(6)
    rnn = Sequential()
    rnn.add(
        GRU(s.nodes,
            input_shape=(None, x_dims),
            kernel_initializer='he_uniform',
            stateful=False))

    #rnn.add(Dropout(0.15))
    rnn.add(Dense(1, kernel_initializer='he_uniform'))
    opt = adam(lr=s.lr, decay=0.0)  #1e-3)
    rnn.compile(loss='mae', optimizer=opt)

    # prepare dataset as pyBrain sequential dataset
    sequence = readDataSet(s.dataSet, s.dataSetDetailed, s)
    if s.limit_to:
        sequence = sequence[:s.limit_to]

    dp = DataProcessor()
    # standardize data by subtracting mean and dividing by std
    #(meanSeq, stdSeq) = dp.normalize('data', sequence)

    dp.windowed_normalize(sequence)

    for key in sequence.keys():
        if key != "data":
            dp.normalize(key, sequence)

    predictedInput = np.zeros((len(sequence), ))
    targetInput = np.zeros((len(sequence), ))
    trueData = np.zeros((len(sequence), ))

    if s.dataSet in differenceSets:
        predictedInputNodiff = np.zeros((len(sequence), ))
        targetInputNodiff = np.zeros((len(sequence), ))

    if s.dataSet in differenceSets:
        backup_sequence = sequence
        sequence = dp.difference(sequence, s.lookback)

    allX = getX(sequence, s)
    allY = np.array(sequence['data'])

    allX = allX[48:]
    allY = allY[48:]
    #if s.dataSet not in x_cols:
    #    allY = allY[s.lookback:]
    trainX = allX[0:s.nTrain]
    trainY = allY[s.predictionStep:s.nTrain + s.predictionStep]
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    curBatch = 1.0
    callback = LossCallback()
    temp_set = np.array(sequence['data'])[:48 + s.nTrain + 5]
    configure_batches(48, s.batch_size,
                      np.reshape(temp_set, (temp_set.shape[0], 1, 1)))
    rnn.fit(trainX,
            trainY,
            epochs=s.epochs,
            batch_size=s.batch_size,
            verbose=min(s.max_verbosity, 2),
            callbacks=[callback])
    for i in xrange(0, s.nTrain):
        targetInput[i] = allY[i + s.predictionStep]

    for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)),
                  disable=s.max_verbosity == 0):
        if i % s.retrain_interval == 0 and i > s.numLags + s.nTrain and s.online:
            trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep]
            trainY = allY[i - s.nTrain:i]
            trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
            temp_set = np.array(sequence['data'])[i - s.nTrain -
                                                  s.predictionStep - 48:i]
            configure_batches(48, s.batch_size,
                              np.reshape(temp_set, (temp_set.shape[0], 1, 1)))
            rnn.fit(trainX,
                    trainY,
                    epochs=s.epochs,
                    batch_size=s.batch_size,
                    verbose=2,
                    callbacks=[callback])

        targetInput[i] = allY[i + s.predictionStep]
        predictedInput[i] = rnn.predict(np.reshape(allX[i], (1, 1, x_dims)))
        if s.dataSet in differenceSets:
            predictedInputNodiff[i] = predictedInput[i]
            targetInputNodiff[i] = targetInput[i]
            predictedInput[i] = dp.inverse_difference(backup_sequence['data'],
                                                      predictedInput[i], i - 1)
            targetInput[i] = dp.inverse_difference(backup_sequence['data'],
                                                   targetInput[i], i - 1)
        predictedInput[0] = 0
        trueData[i] = sequence['data'][i]

    #predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq)
    #targetInput = dp.denormalize(targetInput, meanSeq, stdSeq)
    dp.windowed_denormalize(predictedInput, targetInput)
    if s.dataSet in differenceSets:

        # predictedInputNodiff = dp.denormalize(predictedInputNodiff)
        # targetInputNodiff = dp.denormalize(targetInputNodiff)
        pass
    #trueData = (trueData * stdSeq) + meanSeq

    dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru',
                        s.predictionStep, s.max_verbosity)
    skipTrain = error_ignore_first[s.dataSet]
    from plot import computeSquareDeviation
    squareDeviation = computeSquareDeviation(predictedInput, targetInput)
    squareDeviation[:skipTrain] = None
    nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput)
    if s.max_verbosity > 0:
        print "", s.nodes, "NRMSE {}".format(nrmse)
    mae = np.nanmean(np.abs(targetInput - predictedInput))
    if s.max_verbosity > 0:
        print "MAE {}".format(mae)

    if s.dataSet in differenceSets:
        dp.saveResultToFile(s.dataSet, predictedInputNodiff, targetInputNodiff,
                            'gru_nodiff', s.predictionStep, s.max_verbosity)
        squareDeviation = computeSquareDeviation(predictedInputNodiff,
                                                 targetInputNodiff)
        squareDeviation[:skipTrain] = None
        nrmse = np.sqrt(
            np.nanmean(squareDeviation)) / np.nanstd(targetInputNodiff)
        if s.max_verbosity > 0:
            print "", s.nodes, "NRMSE {}".format(nrmse)
        mae = np.nanmean(np.abs(targetInputNodiff - predictedInputNodiff))
        if s.max_verbosity > 0:
            print "MAE {}".format(mae)
        mase = errors.get_mase(predictedInput, targetInput,
                               np.roll(targetInput, 24))
        if s.max_verbosity > 0:
            print "MAE {}".format(mae)
    return nrmse
def run_gru(s):
    global global_step
    global increment_global_step_op
    global reset_global_step_op
    global batches
    global images_placeholder
    global batches_op
    global_step = tf.Variable(0,
                              name='global_step',
                              trainable=False,
                              dtype=tf.int32)
    increment_global_step_op = tf.assign(global_step, global_step + 1)
    reset_global_step_op = tf.assign(global_step, 0)

    batches = tf.get_variable(
        "batches", [s.nTrain / int(s.batch_size), s.batch_size, 1, 1],
        dtype=tf.float32,
        initializer=tf.zeros_initializer)
    images_placeholder = tf.placeholder(tf.float32,
                                        shape=(s.nTrain / int(s.batch_size),
                                               s.batch_size, 1, 1))
    batches_op = tf.assign(batches, images_placeholder)

    x_dims = len(x_cols[s.dataSet]) if s.dataSet in x_cols else s.lookback
    random.seed(6)
    np.random.seed(6)
    rnn = Sequential()
    rnn.add(
        GRU(s.nodes,
            input_shape=(None, x_dims),
            kernel_initializer='he_uniform',
            stateful=False))

    #rnn.add(Dropout(0.15))
    rnn.add(Dense(1, kernel_initializer='he_uniform'))
    opt = adam(lr=s.lr, decay=0.0)  #1e-3)
    rnn.compile(loss='mae', optimizer=opt)

    # prepare dataset as pyBrain sequential dataset
    sequence = readDataSet(s.dataSet, s.dataSetDetailed, s)
    if s.limit_to:
        sequence = sequence[:s.limit_to]

    dp = DataProcessor()
    # standardize data by subtracting mean and dividing by std
    (meanSeq, stdSeq) = dp.normalize('data', sequence, s.nTrain)

    #dp.windowed_normalize(sequence)

    for key in sequence.keys():
        if key != "data":
            dp.normalize(key, sequence, s.nTrain)

    if s.dataSet in differenceSets:
        predictedInputNodiff = np.zeros((len(sequence), ))
        targetInputNodiff = np.zeros((len(sequence), ))

    if s.dataSet in differenceSets:
        backup_sequence = sequence
        sequence = dp.difference(sequence, s.lookback)

    seq_full = sequence['data'].values
    seq_actual = seq_full[s.front_buffer:]
    allX = getX(seq_full, s)
    allY = seq_actual[s.predictionStep - 1:]
    predictedInput = np.full((len(allY), ), np.nan)

    #if s.dataSet not in x_cols:
    #    allY = allY[s.lookback:]
    trainX = allX[:s.nTrain]
    trainY = allY[:s.nTrain]
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    rnn.fit(trainX,
            trainY,
            epochs=s.epochs,
            batch_size=s.batch_size,
            verbose=min(s.max_verbosity, 2))
    #for i in xrange(0,s.nTrain):
    #    targetInput[i] = allY[i+s.predictionStep]
    targetInput = allY
    pred_diffs = []
    pred_closer_to_actual = []
    isFirst = True
    for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)),
                  disable=s.max_verbosity == 0):
        #for i in tqdm(xrange(0, len(allX)), disable=s.max_verbosity == 0):
        if i % s.retrain_interval == 0 and i > s.numLags + s.nTrain and s.online:
            trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep]
            trainY = allY[i - s.nTrain - s.predictionStep:i - s.predictionStep]
            trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
            rnn.fit(trainX,
                    trainY,
                    epochs=s.epochs,
                    batch_size=s.batch_size,
                    verbose=0)

        #targetInput[i] = allY[i]
        predictedInput[i] = rnn.predict(np.reshape(allX[i], (1, 1, x_dims)))
        if isFirst:
            print predictedInput[i]
            isFirst = False
        #predictedInput[i] = targetInput[i-1440]
        pred_diffs.append(abs(predictedInput[i] - allX[i][-1]))
        pred_closer_to_actual.append(
            abs(predictedInput[i] - targetInput[i]) < abs(predictedInput[i] -
                                                          allX[i][-1]))

        if s.dataSet in differenceSets:
            predictedInputNodiff[i] = predictedInput[i]
            targetInputNodiff[i] = targetInput[i]
            predictedInput[i] = dp.inverse_difference(backup_sequence['data'],
                                                      predictedInput[i], i - 1)
            targetInput[i] = dp.inverse_difference(backup_sequence['data'],
                                                   targetInput[i], i - 1)
    for i in range(s.nTrain + s.predictionStep):
        predictedInput[i] = np.nan
    predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq)
    targetInput = dp.denormalize(targetInput, meanSeq, stdSeq)
    #dp.windowed_denormalize(predictedInput, targetInput)
    print "FINAL", predictedInput[-1], targetInput[-1], len(
        predictedInput), len(targetInput)
    if s.dataSet in differenceSets:

        # predictedInputNodiff = dp.denormalize(predictedInputNodiff)
        # targetInputNodiff = dp.denormalize(targetInputNodiff)
        pass
    dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru',
                        s.predictionStep, s.max_verbosity)
    skipTrain = error_ignore_first[s.dataSet]
    from plot import computeSquareDeviation
    squareDeviation = computeSquareDeviation(predictedInput, targetInput)
    squareDeviation[:skipTrain] = None
    nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput)
    if s.max_verbosity > 0:
        print "", s.nodes, "NRMSE {}".format(nrmse)
    mae = np.nanmean(np.abs(targetInput - predictedInput))
    if s.max_verbosity > 0:
        print "MAE {}".format(mae)
    mase = errors.get_mase(predictedInput, targetInput,
                           np.roll(targetInput, s.season))
    if s.max_verbosity > 0:
        print "MASE {}".format(mase)
    if s.dataSet in differenceSets:
        dp.saveResultToFile(s.dataSet, predictedInputNodiff, targetInputNodiff,
                            'gru_nodiff', s.predictionStep, s.max_verbosity)
        squareDeviation = computeSquareDeviation(predictedInputNodiff,
                                                 targetInputNodiff)
        squareDeviation[:skipTrain] = None
        nrmse = np.sqrt(
            np.nanmean(squareDeviation)) / np.nanstd(targetInputNodiff)
        if s.max_verbosity > 0:
            print "", s.nodes, "NRMSE {}".format(nrmse)
        mae = np.nanmean(np.abs(targetInputNodiff - predictedInputNodiff))
        if s.max_verbosity > 0:
            print "MAE {}".format(mae)
    closer_rate = pred_closer_to_actual.count(True) / float(
        len(pred_closer_to_actual))
    if s.max_verbosity > 0:
        pred_diffs.sort()
        print pred_diffs[0], pred_diffs[-1], pred_diffs[int(0.9 *
                                                            len(pred_diffs))]
        print "Good results:", closer_rate
    return mase, closer_rate