def run_gru(s): prob = tf.placeholder_with_default(1.0, shape=()) global global_step global increment_global_step_op global reset_global_step_op global batches global images_placeholder global batches_op global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int32) increment_global_step_op = tf.assign(global_step, global_step + 1) reset_global_step_op = tf.assign(global_step, 0) batches = tf.get_variable("batches", [s.nTrain / int(s.batch_size), s.batch_size, 1, 1], dtype=tf.float32, initializer=tf.zeros_initializer) images_placeholder = tf.placeholder(tf.float32, shape=(s.nTrain / int(s.batch_size), s.batch_size, 1, 1)) batches_op = tf.assign(batches, images_placeholder) x_dims = s.lookback random.seed(6) np.random.seed(6) tf.set_random_seed(6) if s.implementation == "keras": if s.use_binary: raise Exception("Binary Keras not implemented") rnn = Sequential() if s.rnn_type == "lstm": rnn.add(LSTM(s.nodes, input_shape=(None,x_dims), kernel_initializer='he_uniform')) elif s.rnn_type == "gru": rnn.add(GRU(s.nodes, input_shape=(None, x_dims), kernel_initializer='he_uniform')) rnn.add(Dropout(0.5)) rnn.add(Dense(1, kernel_initializer='he_uniform')) opt = rmsprop(lr=s.lr)#1e-3) rnn.compile(loss='mae', optimizer=opt) input = Input(shape=(1, x_dims)) dense1 = Dense(s.nodes, activation='sigmoid')(input) dense2 = Dense(s.nodes, activation='sigmoid')(input) dense3 = Dense(s.nodes, activation='tanh')(input) mult1 = Multiply()([dense2, dense3]) act1 = Activation('tanh')(mult1) mult2 = Multiply()([dense1, act1]) reshape = Reshape((s.nodes,))(mult2) dropout = Dropout(0.5)(reshape) dense_out = Dense(1)(dropout) rnn = Model(inputs=[input], outputs=[dense_out]) opt = adam(lr=s.lr) # 1e-3) rnn.compile(loss='mae', optimizer=opt) print rnn.summary() elif s.implementation == "tf": data = tf.placeholder(tf.float32, [None, s.lookback, 1]) # Number of examples, number of input, dimension of each input target = tf.placeholder(tf.float32, [None, 1]) if s.rnn_type == "lstm" and s.use_binary: cell = rnn_tf.LSTMCell(s.nodes) elif s.rnn_type == "lstm" and not s.use_binary: cell = tf.nn.rnn_cell.LSTMCell(s.nodes) elif s.rnn_type == "gru" and s.use_binary: cell = rnn_tf.GRUCell(s.nodes) elif s.rnn_type == "gru" and not s.use_binary: cell = tf.nn.rnn_cell.GRUCell(s.nodes) val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32) with tf.name_scope('rnn_summaries'): var = val mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) val = tf.nn.dropout(val, prob) if not s.use_binary: dense = tf.layers.dense(val, 1) else: dense = core_discretize.dense(val, 1) with tf.name_scope('dense_summaries'): var = dense mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) pred = tf.reshape(dense, (tf.shape(dense)[0], 1)) summary = tf.summary.merge_all() optimizer = tf.train.AdamOptimizer(learning_rate=s.lr) #cost = tf.losses.mean_squared_error(target, pred) cost = tf.reduce_mean(tf.abs(target - pred)) minimize = optimizer.minimize(cost) else: raise Exception("Unknown implementation " + s.implementation) sequence = readDataSet(s.dataSet, s.dataSetDetailed, s) if s.limit_to: sequence = sequence[:s.limit_to] #TEMP SANITY CHECK # sequence['data'][7001] = 0 # sequence['data'][7002] = 0 # sequence['data'][7003] = 0 # sequence['data'][7004] = 0 # sequence['data'][7005] = 0 seq_full = sequence['data'].values #use .values to copy targetInput = seq_full[s.front_buffer + s.predictionStep - 1:].copy() #grab this now to avoid having to denormalize dp = DataProcessor() if s.normalization_type == 'default': (meanSeq, stdSeq) = dp.normalize('data', sequence, s.nTrain) elif s.normalization_type == 'windowed': dp.windowed_normalize(sequence) elif s.normalization_type == 'AN': an = AdaptiveNormalizer(s.lookback, s.lookback + s.predictionStep) an.set_pruning(False) an.set_source_data(seq_full, s.nTrain) an.do_ma('s') an.do_stationary() an.remove_outliers() seq_norm = an.do_adaptive_normalize() else: raise Exception("Unsupported normalization type: " + s.normalization_type) seq_actual = seq_full[s.front_buffer:] #Leave enough headroom for MASE calculation and lookback seq_full_norm = sequence['data'].values seq_actual_norm = seq_full_norm[s.front_buffer:] if s.normalization_type != "AN": #Default and windowed change the seq itself but still require creating lookback frames allX = getX(seq_full_norm, s) allY = seq_actual_norm[s.predictionStep-1:] else: #AN creates a new array but takes care of lookback internally allX= seq_norm[:,0:-s.predictionStep] allY = np.reshape(seq_norm[:,-1], (-1,)) # TODO FIX PROPERLY (now rolled too far) too_long = len(allX) - (len(seq_full) - s.front_buffer - s.predictionStep + 1) if too_long > 0: allX = allX[too_long:] allY = allY[too_long:] print len(allX), len(allY), s.front_buffer predictedInput = np.full((len(allY),), np.nan) #Initialize all predictions to NaN trainX = allX[:s.nTrain] trainY = allY[:s.nTrain] trainX = np.reshape(trainX, (trainX.shape[0],1, trainX.shape[1])) trainY = np.reshape(trainY, ( trainY.shape[0],)) if s.implementation == "keras": rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2)) elif s.implementation == "tf": sess = tf.Session() writer = tf.summary.FileWriter("results/", graph=sess.graph) init = tf.global_variables_initializer() sess.run(init) for v in tf.trainable_variables(): print v.name for epoch in tqdm(range(s.epochs)): the_cost, _, summ = sess.run([cost, minimize, summary], feed_dict={data: trainX, target: trainY, prob: 0.5}) writer.add_summary(summ, epoch) if epoch % 10 == 0: print the_cost #print(psutil.Process(os.getpid()).memory_percent()) var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/gates/kernel:0"][0] print sess.run(tf.reduce_min(var)) print sess.run(tf.reduce_max(var)) # var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/gates/bias:0"][0] # print sess.run(tf.reduce_min(var)) # print sess.run(tf.reduce_max(var)) # var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/candidate/kernel:0"][0] # print sess.run(tf.reduce_min(var)) # print sess.run(tf.reduce_max(var)) # var = [v for v in tf.trainable_variables() if v.name == "rnn/gru_cell/candidate/bias:0"][0] # print sess.run(tf.reduce_min(var)) # print sess.run(tf.reduce_max(var)) # print "loop" var = [v for v in tf.trainable_variables() if v.name == "dense/bias:0"] print sess.run(var) minval = 10 latestStart = None for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0): #for i in tqdm(xrange(0, len(allX)), disable=s.max_verbosity == 0): #for i in tqdm(xrange(10475, len(allX)), disable=s.max_verbosity == 0): if i % s.retrain_interval == 0 and i > s.numLags+s.nTrain and s.online: if s.normalization_type == 'AN': predictedInput = np.array(an.do_adaptive_denormalize(predictedInput, therange=(i-s.retrain_interval, i))) latestStart = i an.set_ignore_first_n(i-s.nTrain-s.predictionStep) an.do_ma('s') an.do_stationary() an.remove_outliers() seq_norm = an.do_adaptive_normalize() allX = seq_norm[:, 0:-s.predictionStep] allY = np.reshape(seq_norm[:, -1], (-1,)) trainX = allX[i-s.nTrain-s.predictionStep:i-s.predictionStep] trainY = allY[i-s.nTrain-s.predictionStep:i-s.predictionStep] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) trainY = np.reshape(trainY, (trainY.shape[0], 1)) if s.implementation == "keras": rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=0) elif s.implementation == "tf": for epoch in range(s.epochs): sess.run(minimize, feed_dict={data: trainX, target: trainY, prob: 0.5}) if s.implementation == "keras": predictedInput[i] = rnn.predict(np.reshape(allX[i], (1,1,x_dims))) elif s.implementation == "tf": predictedInput[i] = sess.run(dense, feed_dict={data: np.reshape(allX[i], (1, x_dims, 1))}) #if len(allX) > i+5: # predictedInput[i] = allY[i-3000] # if i == 10000: # print allX[i] # print "should be ", (targetInput[i] - meanSeq) / stdSeq # print "predicted as ", predictedInput[i] # for i in range(s.nTrain + s.predictionStep): # predictedInput[i] = np.nan print "SMALLEST", minval # np.set_printoptions(threshold=np.nan, suppress=True) # print "ALLY START" # for val in allY: # print val # print "ALLY STOP" if s.normalization_type == 'default': predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq) #targetInput = dp.denormalize(targetInput, meanSeq, stdSeq) elif s.normalization_type == 'windowed': dp.windowed_denormalize(predictedInput, targetInput, pred_step=s.predictionStep) elif s.normalization_type == 'AN': if latestStart: predictedInput = np.array(an.do_adaptive_denormalize(predictedInput, therange=(latestStart, len(predictedInput)))) else: predictedInput = np.array(an.do_adaptive_denormalize(predictedInput)) if an.pruning: targetInput = np.delete(targetInput, an.deletes) print len(predictedInput), len(targetInput), "LENS" #TEMP SANITY CHECK #print predictedInput[7005 - s.front_buffer - s.predictionStep +1] #print predictedInput[7006 - s.front_buffer - s.predictionStep + 1] dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity) skipTrain = s.ignore_for_error from plot import computeSquareDeviation squareDeviation = computeSquareDeviation(predictedInput, targetInput) squareDeviation[:skipTrain] = None nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInput-predictedInput)) if s.max_verbosity > 0: print "MAE {}".format(mae) mape = errors.get_mape(predictedInput,targetInput, s.ignore_for_error) if s.max_verbosity > 0: print "MAPE {}".format(mape) mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, s.season), s.ignore_for_error) if s.max_verbosity > 0: print "MASE {}".format(mase) if s.implementation == "tf": sess.close() return mase
def run_gru(s): x_dims = len(x_cols[s.dataSet]) if s.dataSet in x_cols else s.lookback random.seed(6) np.random.seed(6) rnn = Sequential() rnn.add( GRU(s.nodes, input_shape=(None, x_dims), kernel_initializer='he_uniform', stateful=False)) #rnn.add(Dropout(0.15)) rnn.add(Dense(1, kernel_initializer='he_uniform')) opt = adam(lr=s.lr, decay=0.0) #1e-3) rnn.compile(loss='mae', optimizer=opt) # prepare dataset as pyBrain sequential dataset sequence = readDataSet(s.dataSet, s.dataSetDetailed, s) if s.limit_to: sequence = sequence[:s.limit_to] dp = DataProcessor() # standardize data by subtracting mean and dividing by std #(meanSeq, stdSeq) = dp.normalize('data', sequence) dp.windowed_normalize(sequence) for key in sequence.keys(): if key != "data": dp.normalize(key, sequence) predictedInput = np.zeros((len(sequence), )) targetInput = np.zeros((len(sequence), )) trueData = np.zeros((len(sequence), )) if s.dataSet in differenceSets: predictedInputNodiff = np.zeros((len(sequence), )) targetInputNodiff = np.zeros((len(sequence), )) if s.dataSet in differenceSets: backup_sequence = sequence sequence = dp.difference(sequence, s.lookback) allX = getX(sequence, s) allY = np.array(sequence['data']) allX = allX[48:] allY = allY[48:] #if s.dataSet not in x_cols: # allY = allY[s.lookback:] trainX = allX[0:s.nTrain] trainY = allY[s.predictionStep:s.nTrain + s.predictionStep] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) curBatch = 1.0 callback = LossCallback() temp_set = np.array(sequence['data'])[:48 + s.nTrain + 5] configure_batches(48, s.batch_size, np.reshape(temp_set, (temp_set.shape[0], 1, 1))) rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2), callbacks=[callback]) for i in xrange(0, s.nTrain): targetInput[i] = allY[i + s.predictionStep] for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0): if i % s.retrain_interval == 0 and i > s.numLags + s.nTrain and s.online: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain:i] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) temp_set = np.array(sequence['data'])[i - s.nTrain - s.predictionStep - 48:i] configure_batches(48, s.batch_size, np.reshape(temp_set, (temp_set.shape[0], 1, 1))) rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=2, callbacks=[callback]) targetInput[i] = allY[i + s.predictionStep] predictedInput[i] = rnn.predict(np.reshape(allX[i], (1, 1, x_dims))) if s.dataSet in differenceSets: predictedInputNodiff[i] = predictedInput[i] targetInputNodiff[i] = targetInput[i] predictedInput[i] = dp.inverse_difference(backup_sequence['data'], predictedInput[i], i - 1) targetInput[i] = dp.inverse_difference(backup_sequence['data'], targetInput[i], i - 1) predictedInput[0] = 0 trueData[i] = sequence['data'][i] #predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq) #targetInput = dp.denormalize(targetInput, meanSeq, stdSeq) dp.windowed_denormalize(predictedInput, targetInput) if s.dataSet in differenceSets: # predictedInputNodiff = dp.denormalize(predictedInputNodiff) # targetInputNodiff = dp.denormalize(targetInputNodiff) pass #trueData = (trueData * stdSeq) + meanSeq dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity) skipTrain = error_ignore_first[s.dataSet] from plot import computeSquareDeviation squareDeviation = computeSquareDeviation(predictedInput, targetInput) squareDeviation[:skipTrain] = None nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInput - predictedInput)) if s.max_verbosity > 0: print "MAE {}".format(mae) if s.dataSet in differenceSets: dp.saveResultToFile(s.dataSet, predictedInputNodiff, targetInputNodiff, 'gru_nodiff', s.predictionStep, s.max_verbosity) squareDeviation = computeSquareDeviation(predictedInputNodiff, targetInputNodiff) squareDeviation[:skipTrain] = None nrmse = np.sqrt( np.nanmean(squareDeviation)) / np.nanstd(targetInputNodiff) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInputNodiff - predictedInputNodiff)) if s.max_verbosity > 0: print "MAE {}".format(mae) mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, 24)) if s.max_verbosity > 0: print "MAE {}".format(mae) return nrmse
def run_gru(s): s.print_settings() prob = tf.placeholder_with_default( 1.0, shape=()) #Retain probability for TF dropout start_time = timeit.default_timer() if s.implementation == "keras": if s.use_binary: raise Exception("Binary Keras not implemented") input = Input(shape=(1, s.x_dims)) dense1 = Dense(s.nodes, activation='sigmoid')(input) dense2 = Dense(s.nodes, activation='sigmoid')(input) dense3 = Dense(s.nodes, activation='tanh')(input) mult1 = Multiply()([dense2, dense3]) act1 = Activation('tanh')(mult1) mult2 = Multiply()([dense1, act1]) reshape = Reshape((s.nodes, ))(mult2) dropout = Dropout(0.5)(reshape) dense_out = Dense(1)(dropout) rnn = Model(inputs=[input], outputs=[dense_out]) opt = adam(lr=s.lr, decay=0.0, epsilon=s.adam_eps) #, clipvalue=1.)#1e-3) #opt = rmsprop(lr=s.lr) rnn.compile(loss=s.loss, optimizer=opt) if s.max_verbosity > 0: print(rnn.summary()) else: raise Exception("Unknown implementation " + s.implementation) sequence = readDataSet(s.dataSet, s.dataSetDetailed, s).values if s.limit_to: sequence = sequence[:s.limit_to] #Get rid of unneeded columns sequence = sequence[:, 0:s.feature_count] #sequence[-1000,0] = 666 #print "Changed -1000 to 666" """ We need to leave some values unpredicted in front so that - We can fill the lookback window for each prediction - We can get the value from 1 season earlier for MASE --> Don't use the first `front_buffer` values as prediction --> Independent from `prediction_step`, so the first actual value predicted is `front_buffer`\ plus however many steps the `prediction_step` is higher than 1 In other words, the most recent X-value for the first prediction will be the final value in the `front_buffer` """ first_prediction_index = s.front_buffer + s.predictionStep - 1 targetInput = sequence[ first_prediction_index:, 0].copy() #grab this now to avoid having to denormalize dp = DataProcessor() if s.normalization_type == 'default': (meanSeq, stdSeq) = dp.normalize( sequence, s.nTrain if s.cutoff_normalize else len(sequence)) elif s.normalization_type == 'windowed': dp.windowed_normalize(sequence, columns=[0]) if s.feature_count > 1: dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count)) elif s.normalization_type == 'AN': an = AdaptiveNormalizer(s.lookback, s.lookback + s.predictionStep) an.set_pruning(False) an.set_source_data(sequence, s.nTrain) an.do_ma('s') an.do_stationary() an.remove_outliers() seq_norm = an.do_adaptive_normalize() print seq_norm.shape if s.feature_count > 1: dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count)) start = sequence.shape[0] - seq_norm.shape[ 0] - s.lookback - s.predictionStep + 1 for i in range(seq_norm.shape[0]): seq_norm[i, :, 1:s.feature_count] = sequence[start + i:start + i + seq_norm.shape[1], 1:s.feature_count] #an.do_ma('s') #an.do_stationary() #an.remove_outliers() #seq_norm = an.do_adaptive_normalize() #print seq_norm[15000,0,0] #exit(1) else: raise Exception("Unsupported normalization type: " + s.normalization_type) #seq_actual = sequence[s.front_buffer:] #Leave enough headroom for MASE calculation and lookback #seq_full_norm = np.reshape(sequence[:,0], (sequence.shape[0],)) #seq_actual_norm = seq_full_norm[s.front_buffer:] if s.normalization_type != "AN": #Default and windowed change the seq itself but still require creating lookback frames allX = getX(sequence, s) allY = sequence[first_prediction_index:, 0] else: #AN creates a new array but takes care of lookback internally allX = seq_norm[:, 0:-s.predictionStep] allY = np.reshape(seq_norm[:, -1, 0], (-1, )) predictedInput = np.full((len(allY), ), np.nan) #Initialize all predictions to NaN #print "TESTT", allX[15000,0,1:] print "FIRST", allX[875] trainX = allX[:s.nTrain] trainY = allY[:s.nTrain] #print "FIRST", trainX[0], trainY[0] trainX = np.reshape(trainX, s.actual_input_shape_train) trainY = np.reshape(trainY, s.actual_output_shape_train) #print "FIRST", trainX[0], trainY[0] if s.implementation == "keras": #for _ in tqdm(range(s.epochs)): for _ in range(1): rnn.fit( trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2), shuffle=not s.stateful ) #, validation_data=(trainX, trainY), callbacks=[TensorBoard(log_dir='./logs', histogram_freq=1, write_grads=True)]) if s.stateful: rnn_layer.reset_states() # for layer in rnn.layers: # print layer.get_weights() #for i in xrange(0, s.nTrain + s.predictionStep): # rnn.predict(np.reshape(allX[i], (1, 1, x_dims))) #predictedInput[s.nTrain + s.predictionStep : len(allX)] = rnn.predict(np.reshape(allX[s.nTrain + s.predictionStep : len(allX)], (1, 12510, x_dims))) latestStart = None do_non_lookback = True latest_onego = 0 #buffer = s.retrain_interval / 2 buffer = 0 for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0): if i % s.retrain_interval == 0 and s.online and i > s.nTrain + s.predictionStep + buffer: do_non_lookback = True if s.normalization_type == 'AN': #print "TEST", seq_norm[15000,0,1] predictedInput = np.array( an.do_adaptive_denormalize( predictedInput, therange=(i - s.retrain_interval, i))) latestStart = i an.set_ignore_first_n(i - s.nTrain - s.predictionStep) an.do_ma('s') an.do_stationary() an.remove_outliers() seq_norm = an.do_adaptive_normalize() print seq_norm[15000, 0, 0] print seq_norm.shape #exit(1) #print "FIRST", seq_norm[i-s.nTrain -s.predictionStep,0]#, trainY[0] #print sequence[start+i-s.nTrain-s.predictionStep:start+ if s.feature_count > 1: #dp.normalize(sequence, s.nTrain, columns=range(1,s.feature_count)) start = sequence.shape[0] - seq_norm.shape[ 0] - s.lookback - s.predictionStep + 1 for j in range(seq_norm.shape[0]): seq_norm[j, :, 1:s.feature_count] = sequence[ start + j:start + j + seq_norm.shape[1], 1:s.feature_count] #print "FIRST", seq_norm[i-s.nTrain -s.predictionStep,0]#, trainY[0] allX = seq_norm[:, 0:-s.predictionStep] allY = np.reshape(seq_norm[:, -1, 0], (-1, )) if s.lookback: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain - s.predictionStep:i - s.predictionStep] else: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain - s.predictionStep:i - s.predictionStep] #print "TESTT", allX[15000,0,:] print "at", i - s.nTrain - s.predictionStep print "FIRST", allX[875] #, trainY[0] #exit(1) trainX = np.reshape(trainX, s.actual_input_shape_train) trainY = np.reshape(trainY, s.actual_output_shape_train) #print "FIRST", trainX[0], trainY[0] #exit(1) if s.implementation == "keras": if s.reset_on_retrain: input = Input(shape=(1, s.x_dims)) dense1 = Dense(s.nodes, activation='sigmoid')(input) dense2 = Dense(s.nodes, activation='sigmoid')(input) dense3 = Dense(s.nodes, activation='tanh')(input) mult1 = Multiply()([dense2, dense3]) act1 = Activation('tanh')(mult1) mult2 = Multiply()([dense1, act1]) reshape = Reshape((s.nodes, ))(mult2) dropout = Dropout(0.5)(reshape) dense_out = Dense(1)(dropout) rnn = Model(inputs=[input], outputs=[dense_out]) opt = adam(lr=s.lr, decay=0.0, epsilon=s.adam_eps) # , clipvalue=1.)#1e-3) #opt = rmsprop(lr=s.lr) rnn.compile(loss=s.loss, optimizer=opt) for _ in range(1): rnn.fit(trainX, trainY, epochs=s.epochs_retrain if s.epochs_retrain else s.epochs, batch_size=s.batch_size, verbose=2, shuffle=not s.stateful) if s.stateful: rnn_layer.reset_states() if s.lookback: if s.implementation == "keras": predictedInput[i] = rnn.predict( np.reshape(allX[i], s.predict_input_shape)) elif do_non_lookback: do_non_lookback = False up_to = min(allX.shape[0], i - (i % s.retrain_interval) + s.retrain_interval) if s.online else allX.shape[0] start_time = time.time() #print allX[0] start = 0 if s.refeed_on_retrain else latest_onego new_predicts = rnn.predict( np.reshape(allX[start:up_to], (1, -1, s.x_dims))) new_predicts = np.reshape(new_predicts, (new_predicts.shape[1], )) predictedInput[i:up_to] = new_predicts[-(up_to - i):] latest_onego = up_to for i in range(s.nTrain + s.predictionStep): predictedInput[i] = np.nan if s.normalization_type == 'default': predictedInput = dp.denormalize(predictedInput, meanSeq[0], stdSeq[0]) elif s.normalization_type == 'windowed': dp.windowed_denormalize(predictedInput, pred_step=s.predictionStep) elif s.normalization_type == 'AN': if latestStart: predictedInput = np.array( an.do_adaptive_denormalize(predictedInput, therange=(latestStart, len(predictedInput)))) else: predictedInput = np.array( an.do_adaptive_denormalize(predictedInput)) if an.pruning: targetInput = np.delete(targetInput, an.deletes) print "Final time", (timeit.default_timer() - start_time) #print "Last not to change:", predictedInput[-996], targetInput[-996] #print "First to change:", predictedInput[-995], targetInput[-995] dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity) for ignore in s.ignore_for_error: skipTrain = ignore from plot import computeSquareDeviation squareDeviation = computeSquareDeviation(predictedInput, targetInput) squareDeviation[:skipTrain] = None nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInput - predictedInput)) if s.max_verbosity > 0: print "MAE {}".format(mae) mape = errors.get_mape(predictedInput, targetInput, skipTrain) if s.max_verbosity > 0: print "MAPE {}".format(mape) mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, s.season), skipTrain) if s.max_verbosity > 0: print "MASE {}".format(mase) return mase