コード例 #1
0
def run_gru(s):
    prob = tf.placeholder_with_default(1.0, shape=())

    global global_step
    global increment_global_step_op
    global reset_global_step_op
    global batches
    global images_placeholder
    global batches_op
    global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int32)
    increment_global_step_op = tf.assign(global_step, global_step + 1)
    reset_global_step_op = tf.assign(global_step, 0)

    batches = tf.get_variable("batches", [s.nTrain / int(s.batch_size), s.batch_size, 1, 1], dtype=tf.float32,
                              initializer=tf.zeros_initializer)
    images_placeholder = tf.placeholder(tf.float32, shape=(s.nTrain / int(s.batch_size), s.batch_size, 1, 1))
    batches_op = tf.assign(batches, images_placeholder)


    x_dims = s.lookback
    random.seed(6)
    np.random.seed(6)
    tf.set_random_seed(6)
    if s.implementation == "keras":
        if s.use_binary:
            raise Exception("Binary Keras not implemented")
        rnn = Sequential()
        if s.rnn_type == "lstm":
            rnn.add(LSTM(s.nodes, input_shape=(None,x_dims), kernel_initializer='he_uniform'))
        elif s.rnn_type == "gru":
            rnn.add(GRU(s.nodes, input_shape=(None, x_dims), kernel_initializer='he_uniform'))

        rnn.add(Dropout(0.5))
        rnn.add(Dense(1, kernel_initializer='he_uniform'))
        opt = rmsprop(lr=s.lr)#1e-3)
        rnn.compile(loss='mae', optimizer=opt)

        input = Input(shape=(1, x_dims))
        dense1 = Dense(s.nodes, activation='sigmoid')(input)
        dense2 = Dense(s.nodes, activation='sigmoid')(input)
        dense3 = Dense(s.nodes, activation='tanh')(input)
        mult1 = Multiply()([dense2, dense3])
        act1 = Activation('tanh')(mult1)
        mult2 = Multiply()([dense1, act1])
        reshape = Reshape((s.nodes,))(mult2)
        dropout = Dropout(0.5)(reshape)
        dense_out = Dense(1)(dropout)
        rnn = Model(inputs=[input], outputs=[dense_out])
        opt = adam(lr=s.lr)  # 1e-3)
        rnn.compile(loss='mae', optimizer=opt)
        print rnn.summary()


    elif s.implementation == "tf":
        data = tf.placeholder(tf.float32, [None, s.lookback,  1])  # Number of examples, number of input, dimension of each input
        target = tf.placeholder(tf.float32, [None, 1])
        if s.rnn_type == "lstm" and s.use_binary:
            cell = rnn_tf.LSTMCell(s.nodes)

        elif s.rnn_type == "lstm" and not s.use_binary:
            cell = tf.nn.rnn_cell.LSTMCell(s.nodes)
        elif s.rnn_type == "gru" and s.use_binary:
            cell = rnn_tf.GRUCell(s.nodes)
        elif s.rnn_type == "gru" and not s.use_binary:
            cell = tf.nn.rnn_cell.GRUCell(s.nodes)


        val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
        with tf.name_scope('rnn_summaries'):
            var = val
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
            with tf.name_scope('stddev'):
                stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)
        val = tf.nn.dropout(val, prob)
        if not s.use_binary:
            dense = tf.layers.dense(val, 1)
        else:
            dense = core_discretize.dense(val, 1)
        with tf.name_scope('dense_summaries'):
            var = dense
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
            with tf.name_scope('stddev'):
                stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)
        pred = tf.reshape(dense, (tf.shape(dense)[0], 1))
        summary = tf.summary.merge_all()
        optimizer = tf.train.AdamOptimizer(learning_rate=s.lr)
        #cost = tf.losses.mean_squared_error(target, pred)
        cost = tf.reduce_mean(tf.abs(target - pred))
        minimize = optimizer.minimize(cost)

    else:
        raise Exception("Unknown implementation " + s.implementation)


    sequence = readDataSet(s.dataSet, s.dataSetDetailed, s)
    if s.limit_to:
        sequence = sequence[:s.limit_to]

    #TEMP SANITY CHECK
    # sequence['data'][7001] = 0
    # sequence['data'][7002] = 0
    # sequence['data'][7003] = 0
    # sequence['data'][7004] = 0
    # sequence['data'][7005] = 0
    seq_full = sequence['data'].values #use .values to copy

    targetInput = seq_full[s.front_buffer + s.predictionStep - 1:].copy() #grab this now to avoid having to denormalize


    dp = DataProcessor()
    if s.normalization_type == 'default':
        (meanSeq, stdSeq) = dp.normalize('data', sequence, s.nTrain)
    elif s.normalization_type == 'windowed':
        dp.windowed_normalize(sequence)
    elif s.normalization_type == 'AN':
        an = AdaptiveNormalizer(s.lookback, s.lookback + s.predictionStep)
        an.set_pruning(False)
        an.set_source_data(seq_full, s.nTrain)
        an.do_ma('s')
        an.do_stationary()
        an.remove_outliers()
        seq_norm = an.do_adaptive_normalize()
    else:
        raise Exception("Unsupported normalization type: " + s.normalization_type)

    seq_actual = seq_full[s.front_buffer:] #Leave enough headroom for MASE calculation and lookback

    seq_full_norm = sequence['data'].values
    seq_actual_norm = seq_full_norm[s.front_buffer:]

    if s.normalization_type != "AN":
        #Default and windowed change the seq itself but still require creating lookback frames
        allX = getX(seq_full_norm, s)
        allY = seq_actual_norm[s.predictionStep-1:]
    else:
        #AN creates a new array but takes care of lookback internally
        allX= seq_norm[:,0:-s.predictionStep]
        allY = np.reshape(seq_norm[:,-1], (-1,))
        # TODO FIX PROPERLY (now rolled too far)
        too_long = len(allX) - (len(seq_full) - s.front_buffer - s.predictionStep + 1)
        if too_long > 0:
            allX = allX[too_long:]
            allY = allY[too_long:]

    print len(allX), len(allY), s.front_buffer
    predictedInput = np.full((len(allY),), np.nan) #Initialize all predictions to NaN

    trainX = allX[:s.nTrain]
    trainY = allY[:s.nTrain]
    trainX = np.reshape(trainX, (trainX.shape[0],1,  trainX.shape[1]))
    trainY = np.reshape(trainY, ( trainY.shape[0],))
    if s.implementation == "keras":
        rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2))
    elif s.implementation == "tf":
        sess = tf.Session()
        writer = tf.summary.FileWriter("results/", graph=sess.graph)
        init = tf.global_variables_initializer()
        sess.run(init)

        for v in tf.trainable_variables():
            print v.name
        for epoch in tqdm(range(s.epochs)):
            the_cost, _, summ = sess.run([cost, minimize, summary], feed_dict={data: trainX, target: trainY, prob: 0.5})
            writer.add_summary(summ, epoch)
            if epoch % 10 == 0:
                print the_cost
            #print(psutil.Process(os.getpid()).memory_percent())
            var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/gates/kernel:0"][0]
            print sess.run(tf.reduce_min(var))
            print sess.run(tf.reduce_max(var))
            # var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/gates/bias:0"][0]
            # print sess.run(tf.reduce_min(var))
            # print sess.run(tf.reduce_max(var))
            # var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/candidate/kernel:0"][0]
            # print sess.run(tf.reduce_min(var))
            # print sess.run(tf.reduce_max(var))
            # var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/candidate/bias:0"][0]
            # print sess.run(tf.reduce_min(var))
            # print sess.run(tf.reduce_max(var))
            # print "loop"
        var = [v for v in tf.trainable_variables() if v.name == "dense/bias:0"]
        print sess.run(var)

    minval = 10
    latestStart = None
    for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0):
    #for i in tqdm(xrange(0, len(allX)), disable=s.max_verbosity == 0):
    #for i in tqdm(xrange(10475, len(allX)), disable=s.max_verbosity == 0):
        if i % s.retrain_interval == 0 and i > s.numLags+s.nTrain and s.online:
            if s.normalization_type == 'AN':

                predictedInput = np.array(an.do_adaptive_denormalize(predictedInput, therange=(i-s.retrain_interval, i)))
                latestStart = i
                an.set_ignore_first_n(i-s.nTrain-s.predictionStep)
                an.do_ma('s')
                an.do_stationary()
                an.remove_outliers()
                seq_norm = an.do_adaptive_normalize()

                allX = seq_norm[:, 0:-s.predictionStep]
                allY = np.reshape(seq_norm[:, -1], (-1,))

            trainX = allX[i-s.nTrain-s.predictionStep:i-s.predictionStep]
            trainY = allY[i-s.nTrain-s.predictionStep:i-s.predictionStep]
            trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
            trainY = np.reshape(trainY, (trainY.shape[0], 1))
            if s.implementation == "keras":
                rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=0)
            elif s.implementation == "tf":
                for epoch in range(s.epochs):
                    sess.run(minimize, feed_dict={data: trainX, target: trainY, prob: 0.5})


        if s.implementation == "keras":
            predictedInput[i] = rnn.predict(np.reshape(allX[i], (1,1,x_dims)))

        elif s.implementation == "tf":
            predictedInput[i] = sess.run(dense, feed_dict={data: np.reshape(allX[i], (1, x_dims, 1))})
            #if len(allX) > i+5:
            #    predictedInput[i] = allY[i-3000]

        # if i == 10000:
        #     print allX[i]
        #     print "should be ", (targetInput[i] - meanSeq) / stdSeq
        #     print "predicted as ", predictedInput[i]

    # for i in range(s.nTrain + s.predictionStep):
    #     predictedInput[i] = np.nan
    print "SMALLEST", minval
    # np.set_printoptions(threshold=np.nan, suppress=True)
    # print "ALLY START"
    # for val in allY:
    #     print val
    # print "ALLY STOP"

    if s.normalization_type == 'default':
        predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq)
        #targetInput = dp.denormalize(targetInput, meanSeq, stdSeq)
    elif s.normalization_type == 'windowed':
        dp.windowed_denormalize(predictedInput, targetInput,  pred_step=s.predictionStep)
    elif s.normalization_type == 'AN':
        if latestStart:
            predictedInput = np.array(an.do_adaptive_denormalize(predictedInput, therange=(latestStart, len(predictedInput))))
        else:
            predictedInput = np.array(an.do_adaptive_denormalize(predictedInput))
        if an.pruning:
            targetInput = np.delete(targetInput, an.deletes)
    print len(predictedInput), len(targetInput), "LENS"
    #TEMP SANITY CHECK
    #print predictedInput[7005 - s.front_buffer - s.predictionStep +1]
    #print predictedInput[7006 - s.front_buffer - s.predictionStep + 1]
    dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity)
    skipTrain = s.ignore_for_error
    from plot import computeSquareDeviation
    squareDeviation = computeSquareDeviation(predictedInput, targetInput)
    squareDeviation[:skipTrain] = None
    nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput)
    if s.max_verbosity > 0:
        print "", s.nodes, "NRMSE {}".format(nrmse)
    mae = np.nanmean(np.abs(targetInput-predictedInput))
    if s.max_verbosity > 0:
        print "MAE {}".format(mae)
    mape = errors.get_mape(predictedInput,targetInput, s.ignore_for_error)
    if s.max_verbosity > 0:
            print "MAPE {}".format(mape)
    mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, s.season), s.ignore_for_error)
    if s.max_verbosity > 0:
        print "MASE {}".format(mase)

    if s.implementation == "tf":
        sess.close()
    return mase
コード例 #2
0
def run_gru(s):

    x_dims = len(x_cols[s.dataSet]) if s.dataSet in x_cols else s.lookback
    random.seed(6)
    np.random.seed(6)
    rnn = Sequential()
    rnn.add(
        GRU(s.nodes,
            input_shape=(None, x_dims),
            kernel_initializer='he_uniform',
            stateful=False))

    #rnn.add(Dropout(0.15))
    rnn.add(Dense(1, kernel_initializer='he_uniform'))
    opt = adam(lr=s.lr, decay=0.0)  #1e-3)
    rnn.compile(loss='mae', optimizer=opt)

    # prepare dataset as pyBrain sequential dataset
    sequence = readDataSet(s.dataSet, s.dataSetDetailed, s)
    if s.limit_to:
        sequence = sequence[:s.limit_to]

    dp = DataProcessor()
    # standardize data by subtracting mean and dividing by std
    #(meanSeq, stdSeq) = dp.normalize('data', sequence)

    dp.windowed_normalize(sequence)

    for key in sequence.keys():
        if key != "data":
            dp.normalize(key, sequence)

    predictedInput = np.zeros((len(sequence), ))
    targetInput = np.zeros((len(sequence), ))
    trueData = np.zeros((len(sequence), ))

    if s.dataSet in differenceSets:
        predictedInputNodiff = np.zeros((len(sequence), ))
        targetInputNodiff = np.zeros((len(sequence), ))

    if s.dataSet in differenceSets:
        backup_sequence = sequence
        sequence = dp.difference(sequence, s.lookback)

    allX = getX(sequence, s)
    allY = np.array(sequence['data'])

    allX = allX[48:]
    allY = allY[48:]
    #if s.dataSet not in x_cols:
    #    allY = allY[s.lookback:]
    trainX = allX[0:s.nTrain]
    trainY = allY[s.predictionStep:s.nTrain + s.predictionStep]
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    curBatch = 1.0
    callback = LossCallback()
    temp_set = np.array(sequence['data'])[:48 + s.nTrain + 5]
    configure_batches(48, s.batch_size,
                      np.reshape(temp_set, (temp_set.shape[0], 1, 1)))
    rnn.fit(trainX,
            trainY,
            epochs=s.epochs,
            batch_size=s.batch_size,
            verbose=min(s.max_verbosity, 2),
            callbacks=[callback])
    for i in xrange(0, s.nTrain):
        targetInput[i] = allY[i + s.predictionStep]

    for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)),
                  disable=s.max_verbosity == 0):
        if i % s.retrain_interval == 0 and i > s.numLags + s.nTrain and s.online:
            trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep]
            trainY = allY[i - s.nTrain:i]
            trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
            temp_set = np.array(sequence['data'])[i - s.nTrain -
                                                  s.predictionStep - 48:i]
            configure_batches(48, s.batch_size,
                              np.reshape(temp_set, (temp_set.shape[0], 1, 1)))
            rnn.fit(trainX,
                    trainY,
                    epochs=s.epochs,
                    batch_size=s.batch_size,
                    verbose=2,
                    callbacks=[callback])

        targetInput[i] = allY[i + s.predictionStep]
        predictedInput[i] = rnn.predict(np.reshape(allX[i], (1, 1, x_dims)))
        if s.dataSet in differenceSets:
            predictedInputNodiff[i] = predictedInput[i]
            targetInputNodiff[i] = targetInput[i]
            predictedInput[i] = dp.inverse_difference(backup_sequence['data'],
                                                      predictedInput[i], i - 1)
            targetInput[i] = dp.inverse_difference(backup_sequence['data'],
                                                   targetInput[i], i - 1)
        predictedInput[0] = 0
        trueData[i] = sequence['data'][i]

    #predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq)
    #targetInput = dp.denormalize(targetInput, meanSeq, stdSeq)
    dp.windowed_denormalize(predictedInput, targetInput)
    if s.dataSet in differenceSets:

        # predictedInputNodiff = dp.denormalize(predictedInputNodiff)
        # targetInputNodiff = dp.denormalize(targetInputNodiff)
        pass
    #trueData = (trueData * stdSeq) + meanSeq

    dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru',
                        s.predictionStep, s.max_verbosity)
    skipTrain = error_ignore_first[s.dataSet]
    from plot import computeSquareDeviation
    squareDeviation = computeSquareDeviation(predictedInput, targetInput)
    squareDeviation[:skipTrain] = None
    nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput)
    if s.max_verbosity > 0:
        print "", s.nodes, "NRMSE {}".format(nrmse)
    mae = np.nanmean(np.abs(targetInput - predictedInput))
    if s.max_verbosity > 0:
        print "MAE {}".format(mae)

    if s.dataSet in differenceSets:
        dp.saveResultToFile(s.dataSet, predictedInputNodiff, targetInputNodiff,
                            'gru_nodiff', s.predictionStep, s.max_verbosity)
        squareDeviation = computeSquareDeviation(predictedInputNodiff,
                                                 targetInputNodiff)
        squareDeviation[:skipTrain] = None
        nrmse = np.sqrt(
            np.nanmean(squareDeviation)) / np.nanstd(targetInputNodiff)
        if s.max_verbosity > 0:
            print "", s.nodes, "NRMSE {}".format(nrmse)
        mae = np.nanmean(np.abs(targetInputNodiff - predictedInputNodiff))
        if s.max_verbosity > 0:
            print "MAE {}".format(mae)
        mase = errors.get_mase(predictedInput, targetInput,
                               np.roll(targetInput, 24))
        if s.max_verbosity > 0:
            print "MAE {}".format(mae)
    return nrmse
コード例 #3
0
def run_gru(s):
    s.print_settings()
    prob = tf.placeholder_with_default(
        1.0, shape=())  #Retain probability for TF dropout

    start_time = timeit.default_timer()

    if s.implementation == "keras":
        if s.use_binary:
            raise Exception("Binary Keras not implemented")

        input = Input(shape=(1, s.x_dims))
        dense1 = Dense(s.nodes, activation='sigmoid')(input)
        dense2 = Dense(s.nodes, activation='sigmoid')(input)
        dense3 = Dense(s.nodes, activation='tanh')(input)
        mult1 = Multiply()([dense2, dense3])
        act1 = Activation('tanh')(mult1)
        mult2 = Multiply()([dense1, act1])
        reshape = Reshape((s.nodes, ))(mult2)
        dropout = Dropout(0.5)(reshape)
        dense_out = Dense(1)(dropout)
        rnn = Model(inputs=[input], outputs=[dense_out])

        opt = adam(lr=s.lr, decay=0.0,
                   epsilon=s.adam_eps)  #, clipvalue=1.)#1e-3)
        #opt = rmsprop(lr=s.lr)
        rnn.compile(loss=s.loss, optimizer=opt)
        if s.max_verbosity > 0:
            print(rnn.summary())

    else:
        raise Exception("Unknown implementation " + s.implementation)

    sequence = readDataSet(s.dataSet, s.dataSetDetailed, s).values

    if s.limit_to:
        sequence = sequence[:s.limit_to]

    #Get rid of unneeded columns
    sequence = sequence[:, 0:s.feature_count]

    #sequence[-1000,0] = 666
    #print "Changed -1000 to 666"
    """
    We need to leave some values unpredicted in front so that
      - We can fill the lookback window for each prediction
      - We can get the value from 1 season earlier for MASE
    --> Don't use the first `front_buffer` values as prediction
    --> Independent from `prediction_step`, so the first actual value predicted is `front_buffer`\
        plus however many steps the `prediction_step` is higher than 1
        In other words, the most recent X-value for the first prediction will be the final value in the `front_buffer`
    """

    first_prediction_index = s.front_buffer + s.predictionStep - 1

    targetInput = sequence[
        first_prediction_index:,
        0].copy()  #grab this now to avoid having to denormalize

    dp = DataProcessor()
    if s.normalization_type == 'default':
        (meanSeq, stdSeq) = dp.normalize(
            sequence, s.nTrain if s.cutoff_normalize else len(sequence))

    elif s.normalization_type == 'windowed':
        dp.windowed_normalize(sequence, columns=[0])
        if s.feature_count > 1:
            dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count))
    elif s.normalization_type == 'AN':
        an = AdaptiveNormalizer(s.lookback, s.lookback + s.predictionStep)
        an.set_pruning(False)
        an.set_source_data(sequence, s.nTrain)
        an.do_ma('s')
        an.do_stationary()
        an.remove_outliers()
        seq_norm = an.do_adaptive_normalize()
        print seq_norm.shape
        if s.feature_count > 1:
            dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count))
            start = sequence.shape[0] - seq_norm.shape[
                0] - s.lookback - s.predictionStep + 1
            for i in range(seq_norm.shape[0]):
                seq_norm[i, :,
                         1:s.feature_count] = sequence[start + i:start + i +
                                                       seq_norm.shape[1],
                                                       1:s.feature_count]
        #an.do_ma('s')
        #an.do_stationary()
        #an.remove_outliers()
        #seq_norm = an.do_adaptive_normalize()
        #print seq_norm[15000,0,0]
        #exit(1)

    else:
        raise Exception("Unsupported normalization type: " +
                        s.normalization_type)

    #seq_actual = sequence[s.front_buffer:] #Leave enough headroom for MASE calculation and lookback
    #seq_full_norm = np.reshape(sequence[:,0], (sequence.shape[0],))
    #seq_actual_norm = seq_full_norm[s.front_buffer:]
    if s.normalization_type != "AN":
        #Default and windowed change the seq itself but still require creating lookback frames
        allX = getX(sequence, s)
        allY = sequence[first_prediction_index:, 0]
    else:
        #AN creates a new array but takes care of lookback internally
        allX = seq_norm[:, 0:-s.predictionStep]
        allY = np.reshape(seq_norm[:, -1, 0], (-1, ))
    predictedInput = np.full((len(allY), ),
                             np.nan)  #Initialize all predictions to NaN
    #print "TESTT", allX[15000,0,1:]
    print "FIRST", allX[875]
    trainX = allX[:s.nTrain]
    trainY = allY[:s.nTrain]
    #print "FIRST", trainX[0], trainY[0]
    trainX = np.reshape(trainX, s.actual_input_shape_train)
    trainY = np.reshape(trainY, s.actual_output_shape_train)

    #print "FIRST", trainX[0], trainY[0]
    if s.implementation == "keras":
        #for _ in tqdm(range(s.epochs)):
        for _ in range(1):
            rnn.fit(
                trainX,
                trainY,
                epochs=s.epochs,
                batch_size=s.batch_size,
                verbose=min(s.max_verbosity, 2),
                shuffle=not s.stateful
            )  #, validation_data=(trainX, trainY), callbacks=[TensorBoard(log_dir='./logs', histogram_freq=1, write_grads=True)])
            if s.stateful:
                rnn_layer.reset_states()

    # for layer in rnn.layers:
    #     print layer.get_weights()
    #for i in xrange(0, s.nTrain + s.predictionStep):
    #   rnn.predict(np.reshape(allX[i], (1, 1, x_dims)))
    #predictedInput[s.nTrain + s.predictionStep : len(allX)] =  rnn.predict(np.reshape(allX[s.nTrain + s.predictionStep : len(allX)], (1, 12510, x_dims)))
    latestStart = None
    do_non_lookback = True
    latest_onego = 0
    #buffer = s.retrain_interval / 2
    buffer = 0
    for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)),
                  disable=s.max_verbosity == 0):
        if i % s.retrain_interval == 0 and s.online and i > s.nTrain + s.predictionStep + buffer:
            do_non_lookback = True
            if s.normalization_type == 'AN':
                #print "TEST", seq_norm[15000,0,1]
                predictedInput = np.array(
                    an.do_adaptive_denormalize(
                        predictedInput, therange=(i - s.retrain_interval, i)))
                latestStart = i
                an.set_ignore_first_n(i - s.nTrain - s.predictionStep)
                an.do_ma('s')
                an.do_stationary()
                an.remove_outliers()
                seq_norm = an.do_adaptive_normalize()
                print seq_norm[15000, 0, 0]
                print seq_norm.shape
                #exit(1)
                #print "FIRST", seq_norm[i-s.nTrain -s.predictionStep,0]#, trainY[0]
                #print sequence[start+i-s.nTrain-s.predictionStep:start+
                if s.feature_count > 1:
                    #dp.normalize(sequence, s.nTrain, columns=range(1,s.feature_count))
                    start = sequence.shape[0] - seq_norm.shape[
                        0] - s.lookback - s.predictionStep + 1
                    for j in range(seq_norm.shape[0]):
                        seq_norm[j, :, 1:s.feature_count] = sequence[
                            start + j:start + j + seq_norm.shape[1],
                            1:s.feature_count]
                #print "FIRST", seq_norm[i-s.nTrain -s.predictionStep,0]#, trainY[0]
                allX = seq_norm[:, 0:-s.predictionStep]
                allY = np.reshape(seq_norm[:, -1, 0], (-1, ))

            if s.lookback:
                trainX = allX[i - s.nTrain - s.predictionStep:i -
                              s.predictionStep]
                trainY = allY[i - s.nTrain - s.predictionStep:i -
                              s.predictionStep]
            else:
                trainX = allX[i - s.nTrain - s.predictionStep:i -
                              s.predictionStep]
                trainY = allY[i - s.nTrain - s.predictionStep:i -
                              s.predictionStep]
            #print "TESTT", allX[15000,0,:]
            print "at", i - s.nTrain - s.predictionStep
            print "FIRST", allX[875]  #, trainY[0]
            #exit(1)
            trainX = np.reshape(trainX, s.actual_input_shape_train)
            trainY = np.reshape(trainY, s.actual_output_shape_train)
            #print "FIRST", trainX[0], trainY[0]
            #exit(1)
            if s.implementation == "keras":
                if s.reset_on_retrain:
                    input = Input(shape=(1, s.x_dims))
                    dense1 = Dense(s.nodes, activation='sigmoid')(input)
                    dense2 = Dense(s.nodes, activation='sigmoid')(input)
                    dense3 = Dense(s.nodes, activation='tanh')(input)
                    mult1 = Multiply()([dense2, dense3])
                    act1 = Activation('tanh')(mult1)
                    mult2 = Multiply()([dense1, act1])
                    reshape = Reshape((s.nodes, ))(mult2)
                    dropout = Dropout(0.5)(reshape)
                    dense_out = Dense(1)(dropout)
                    rnn = Model(inputs=[input], outputs=[dense_out])
                    opt = adam(lr=s.lr, decay=0.0,
                               epsilon=s.adam_eps)  # , clipvalue=1.)#1e-3)
                    #opt = rmsprop(lr=s.lr)
                    rnn.compile(loss=s.loss, optimizer=opt)
                for _ in range(1):
                    rnn.fit(trainX,
                            trainY,
                            epochs=s.epochs_retrain
                            if s.epochs_retrain else s.epochs,
                            batch_size=s.batch_size,
                            verbose=2,
                            shuffle=not s.stateful)
                    if s.stateful:
                        rnn_layer.reset_states()

        if s.lookback:
            if s.implementation == "keras":
                predictedInput[i] = rnn.predict(
                    np.reshape(allX[i], s.predict_input_shape))

        elif do_non_lookback:
            do_non_lookback = False
            up_to = min(allX.shape[0], i - (i % s.retrain_interval) +
                        s.retrain_interval) if s.online else allX.shape[0]
            start_time = time.time()
            #print allX[0]
            start = 0 if s.refeed_on_retrain else latest_onego
            new_predicts = rnn.predict(
                np.reshape(allX[start:up_to], (1, -1, s.x_dims)))
            new_predicts = np.reshape(new_predicts, (new_predicts.shape[1], ))
            predictedInput[i:up_to] = new_predicts[-(up_to - i):]
            latest_onego = up_to

    for i in range(s.nTrain + s.predictionStep):
        predictedInput[i] = np.nan

    if s.normalization_type == 'default':
        predictedInput = dp.denormalize(predictedInput, meanSeq[0], stdSeq[0])
    elif s.normalization_type == 'windowed':
        dp.windowed_denormalize(predictedInput, pred_step=s.predictionStep)
    elif s.normalization_type == 'AN':
        if latestStart:
            predictedInput = np.array(
                an.do_adaptive_denormalize(predictedInput,
                                           therange=(latestStart,
                                                     len(predictedInput))))
        else:
            predictedInput = np.array(
                an.do_adaptive_denormalize(predictedInput))
        if an.pruning:
            targetInput = np.delete(targetInput, an.deletes)

    print "Final time", (timeit.default_timer() - start_time)

    #print "Last not to change:", predictedInput[-996], targetInput[-996]
    #print "First to change:", predictedInput[-995], targetInput[-995]
    dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru',
                        s.predictionStep, s.max_verbosity)
    for ignore in s.ignore_for_error:
        skipTrain = ignore
        from plot import computeSquareDeviation
        squareDeviation = computeSquareDeviation(predictedInput, targetInput)
        squareDeviation[:skipTrain] = None
        nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput)
        if s.max_verbosity > 0:
            print "", s.nodes, "NRMSE {}".format(nrmse)
        mae = np.nanmean(np.abs(targetInput - predictedInput))
        if s.max_verbosity > 0:
            print "MAE {}".format(mae)
        mape = errors.get_mape(predictedInput, targetInput, skipTrain)
        if s.max_verbosity > 0:
            print "MAPE {}".format(mape)
        mase = errors.get_mase(predictedInput, targetInput,
                               np.roll(targetInput, s.season), skipTrain)
        if s.max_verbosity > 0:
            print "MASE {}".format(mase)

    return mase