Example #1
0
def predict():
    # normalization
    scaler = MinMaxScaler()
    records['power'] = scaler.fit_transform(records['power'])

    saver = tf.train.Saver()
    with tf.Session() as sess:
        # restore model
        saver.restore(sess, model_path)
        test_data = get_cycle_time_batch_data(records, batch_size, cycle_timesteps)
        test_y_list = [] 
        test_y_pre_list = []
        test_all_loss = []
        for batch in test_data:
            predict, loss = sess.run([y_pre, loss_func], feed_dict = {cycle_model.X_ : batch[0], y : batch[1]})
            test_y_list.extend(batch[1])
            test_y_pre_list.extend(predict)
            test_all_loss.append(loss)

        # display
        test_x = list(range(len(test_y_list)))

        # inverse normalization
        test_y_list = scaler.inverse_transform(test_y_list)
        test_y_pre_list = scaler.inverse_transform(test_y_pre_list)
        test_y_list = np.array(test_y_list)
        test_y_pre_list = np.array(test_y_pre_list)

        mse = np.mean( (test_y_list - test_y_pre_list) ** 2)
        print('---------------- Test Loss:', np.mean(test_all_loss), 'MSE:', mse)
        plt.plot(test_x, test_y_list, 'r', test_x, test_y_pre_list, 'b')
        plt.show()
def test_min_max_scaler_iris():
    X = iris.data
    scaler = MinMaxScaler()
    # default params
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), 0)
    assert_array_almost_equal(X_trans.min(axis=0), 0)
    assert_array_almost_equal(X_trans.max(axis=0), 1)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # not default params: min=1, max=2
    scaler = MinMaxScaler(feature_range=(1, 2))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), 1)
    assert_array_almost_equal(X_trans.max(axis=0), 2)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # min=-.5, max=.6
    scaler = MinMaxScaler(feature_range=(-.5, .6))
    X_trans = scaler.fit_transform(X)
    assert_array_almost_equal(X_trans.min(axis=0), -.5)
    assert_array_almost_equal(X_trans.max(axis=0), .6)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    # raises on invalid range
    scaler = MinMaxScaler(feature_range=(2, 1))
    assert_raises(ValueError, scaler.fit, X)
def sample_from_generator(history, nb_samples, latent_dim=12, 
                          valid_split=0.3, random_split=True,
                          hidden_dims=None, **kwargs):
    scaler = MinMaxScaler()
    scaler.fit(history)
    scaled = scaler.transform(history)
    
    nb_train = history.shape[0]    
    if not valid_split:
        nb_valid = 0
    elif isinstance(valid_split, float):
        nb_valid = nb_train - int(np.floor(nb_train*valid_split))
    else:
        nb_valid = valid_split
        
    if nb_valid > 0:
        if random_split:
            ind = np.arange(nb_train)
            np.random.shuffle(ind)
            x_valid = scaled[ind[-nb_valid:], :]
            x_train = scaled[ind[:-nb_valid], :]
        else:
            x_valid = scaled[-nb_valid:, :]
            x_train = scaled[:-nb_valid, :]
    else:
        x_valid = None
        x_train = scaled
    
    _, generator = build_model(latent_dim, x_train, x_valid=x_valid, 
                               hidden_dims=hidden_dims, **kwargs)
    
    normal_sample = np.random.standard_normal((nb_samples, latent_dim))
    draws = generator.predict(normal_sample)
    return scaler.inverse_transform(draws)
def test_min_max_scaler_zero_variance_features():
    """Check min max scaler on toy data with zero variance features"""
    X = [[0.,  1.,  0.5],
         [0.,  1., -0.1],
         [0.,  1.,  1.1]]

    X_new = [[+0.,  2.,  0.5],
             [-1.,  1.,  0.0],
             [+0.,  1.,  1.5]]

    # default params
    scaler = MinMaxScaler()
    X_trans = scaler.fit_transform(X)
    X_expected_0_1 = [[0.,  0.,  0.5],
                      [0.,  0.,  0.0],
                      [0.,  0.,  1.0]]
    assert_array_almost_equal(X_trans, X_expected_0_1)
    X_trans_inv = scaler.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_trans_inv)

    X_trans_new = scaler.transform(X_new)
    X_expected_0_1_new = [[+0.,  1.,  0.500],
                          [-1.,  0.,  0.083],
                          [+0.,  0.,  1.333]]
    assert_array_almost_equal(X_trans_new, X_expected_0_1_new, decimal=2)

    # not default params
    scaler = MinMaxScaler(feature_range=(1, 2))
    X_trans = scaler.fit_transform(X)
    X_expected_1_2 = [[1.,  1.,  1.5],
                      [1.,  1.,  1.0],
                      [1.,  1.,  2.0]]
    assert_array_almost_equal(X_trans, X_expected_1_2)
def one_input_LSTM(dataset):
	# normalize the dataset
	scaler = MinMaxScaler(feature_range=(0, 1))
	dataset = scaler.fit_transform(dataset)
	# split into train and test sets
	train_size = int(len(dataset) * 0.67)
	test_size = len(dataset) - train_size
	train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
	# reshape into X=t and Y=t+1
	look_back = 1
	trainX, trainY = create_dataset(train, look_back)
	testX, testY = create_dataset(test, look_back)
	# reshape input to be [samples, time steps, features]
	trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
	testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
	# create and fit the LSTM network
	model = Sequential()
	model.add(LSTM(4, input_shape=(1, look_back)))
	model.add(Dense(1))
	model.compile(loss='mean_squared_error', optimizer='adam')
	model.fit(trainX, trainY, epochs=20, batch_size=1, verbose=2)
	# make predictions
	trainPredict = model.predict(trainX)
	testPredict = model.predict(testX)
	# invert predictions
	trainPredict = scaler.inverse_transform(trainPredict)
	trainY = scaler.inverse_transform([trainY])
	testPredict = scaler.inverse_transform(testPredict)
	testY = scaler.inverse_transform([testY])
	# calculate root mean squared error
	trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
	print('Train Score: %.2f RMSE' % (trainScore))
	testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
	print('Test Score: %.2f RMSE' % (testScore))
	# shift train predictions for plotting
	trainPredictPlot = np.empty_like(dataset)
	trainPredictPlot[:, :] = np.nan
	trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
	# shift test predictions for plotting
	testPredictPlot = np.empty_like(dataset)
	testPredictPlot[:, :] = np.nan
	testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
Example #6
0
class SerialDataScaler:
    
    def __init__(self, data):
        data = numpy.reshape(data, (len(data), 1))
        data = data.astype("float32")
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.scaler.fit(data)
    
    def transform(self, X):
        #return X
        return self.scaler.transform(numpy.reshape(X, (len(X), 1)))

    def inverse_transform(self, x):
        return self.scaler.inverse_transform(x)
def get_net_prediction( train_data, train_truth, test_data, test_truth
                      , hidden=(5,), weight_decay=0.0, dropout_prob=0.0
                      , learning_rate=None, epochs=25, verbose=False
                      , iter_id=None
                      ):

    container = NeuralNetContainer()
    container.learning_rate = learning_rate
    container.dropout_prob = dropout_prob
    container.weight_decay = weight_decay
    container.epochs = epochs
    container.hidden_layers = hidden
    container.verbose = verbose
    container.plot = get_should_plot()

    mms = MinMaxScaler(feature_range= (-1, 1)) # Scale output from -1 to 1.
    train_y = mms.fit_transform(train_truth[:,np.newaxis])

    n_features = train_data.shape[1]

    collect_time_stats = get_is_time_stats()
    if collect_time_stats: 
        start = time.time()

    # Find and return an effectively initialized network to start.
    container = _get_initial_net(container, n_features, train_data, train_y)

    # Train the network.
    if collect_time_stats:
        # Train a specific time, never terminating early.
        _train_net(container, train_data, train_y, override_epochs=TIMING_EPOCHS, is_check_train=False)
    else: 
        # Normal training, enable all heuristics.
        _train_net(container, train_data, train_y)

    if collect_time_stats: 
        end = time.time()
        print('Fitting took {} seconds'.format(end - start))
        print(json.dumps({'seconds': end - start, 'hidden': container.hidden_layers}))

    # Unsupervised (test) dataset.
    predicted = _predict(container, test_data)
    predicted = mms.inverse_transform(predicted)
    
    return predicted.ravel()
def get_fast_nn_dom_prediction(train_data, train_truth, test_data, test_truth, hidden=(5,), weight_decay=0.0): 
    # Convert data to individual alleles to capture dominance.
    train_data, test_data = tuple(map(_convert_to_individual_alleles, [train_data, test_data]))

    scaler = MinMaxScaler(feature_range = (-1, 1))
    train_truth = scaler.fit_transform(train_truth)
    test_truth = scaler.transform(test_truth)

    net = _get_nn(train_data.shape[1], hidden)

    _train_nn(net, train_data, train_truth, weight_decay)

    out = []
    for i in range(test_data.shape[0]):
        sample = test_data[i,:]
        res = net.run(sample)
        out.append(res)

    predicted = scaler.inverse_transform(np.array(out))

    return predicted.ravel()
# model.load_weights('weights/bitcoin2012_2017_50_30_weights.hdf5')
# model.compile(loss='mse', optimizer='adam')
# scaler = MinMaxScaler(feature_range=(0, 1))
# scaler.fit(datas.reshape(-1))
# predicted_inverted = scaler.inverse_transform(predicted)
# ground_true = scaler.inverse_transform(validation_next_price)
# In[6]:


predicted = model.predict(validation_datas)
predicted_inverted = []

# In[7]:
for i in range(original_datas.shape[1]):
	scaler.fit(original_datas[:,i].reshape(-1,1))
	predicted_inverted.append(scaler.inverse_transform(predicted[:,:,i]))

#get only the close data
ground_true = ground_true[:,:,0].reshape(-1)
output_times = output_times.reshape(-1)

predicted_inverted = np.array(predicted_inverted)[:,:,0].reshape(-1)



# In[8]:
print output_times.shape, ground_true.shape

plt.plot(output_times[-1000:],ground_true[-1000:])
plt.plot(output_times[-1000:],predicted_inverted[-1000:])
Example #10
0
#we fit it on the training data and give it the chloride as predicted variable 
model = lm.fit(train_X, train_y)
#make you predction on the test data (all the inputs expet for the chloride)
predictions = lm.predict(test_X)
####Now we will mapp back to the original data set
predictions = predictions.reshape((predictions.shape[0], 1))
# invert scaling for test data
#take only the last number of features of the test data set 
#start should be the length of the variable "train" - the number of features "n_features"
start=750
#end should be the length of the variable "train"
end =900
####we merge the predicted chloride with the rest of the data set 
inv_yhat = concatenate(( test_X[: , 750:900], predictions), axis=1)
##we invers the scaling
inv_yhat = scaler.inverse_transform(inv_yhat)
#we take the inversed scaling of the predicted chloride 
inv_yhat = inv_yhat[:,149]
# invert scaling for real chloride 
test_y = test_y.reshape((len(test_y), 1))
#we merge the real chloride with the rest of the data set 
inv_y = concatenate(( test_X[: , 750:900],test_y), axis=1)
# we invers the scaling 
inv_y = scaler.inverse_transform(inv_y)
#we take the inversed scaling of the real chloride 
inv_y = inv_y[:,149]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)
#Calculate R squared
from sklearn.metrics import r2_score
Example #11
0
dataset_total = dataset_all[len(dataset_all) - (timesteps + testing_records):]
inputs = dataset_total.iloc[:, 1:7].values
#inputs = np.reshape(-1, 1)
inputs = scale.transform(inputs)

X_test = []
#y_test = []
for i in range(timesteps, inputs.shape[0]):
    X_test.append(inputs[i - timesteps:i, 0:6])
    #y_test.append(inputs[i, 3:4])

y_scale = MinMaxScaler(feature_range=(0, 1))
y_training_set_scaled = y_scale.fit_transform(training_set[:, 3:4])

X_test = np.array(X_test)
#X_test = np.reshape(X_test, (X_test.shape[0], timesteps, 6))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 6))
predicted_price = regressor.predict(X_test)
predicted_price = y_scale.inverse_transform(predicted_price)

real_stock_price = dataset_test.iloc[:, 3:4].values
plt.plot(real_stock_price, color='red', label='Real Stock Price')
plt.plot(predicted_price, color='blue', label='Predicted Stock Price')
plt.title('Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

#with tf.device('/gpu:0'):
model_JPM.add(LSTM(units=50))
model_JPM.add(Dropout(0.1))
model_JPM.add(Dense(units=1))
model_JPM.compile(optimizer ='adam', metrics = ['mse'], loss = 'mean_absolute_error')
#fitting(epoch)
model_JPM.fit(x_train, y_train, epochs = 200, batch_size = 62)

x_test=[]
y_test=[]

for i in range(2001, len(JPM_stock)):
    x_test.append(newdata_scaled[i-30:i,0])
    y_test.append(newdata_scaled[i,0])
x_test, y_test = np.array(x_test), np.array(y_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

forecast_stock = model_JPM.predict(x_test)

fore = sc.inverse_transform(np.array(forecast_stock).reshape(-1,1))
actual = sc.inverse_transform(np.array(y_test).reshape(-1,1))


plt.plot(actual, color='red', label='Actual price')
plt.plot(fore, color='blue', label='forecasted price')
plt.title('JPM stock prediction')
plt.ylabel('Price')
plt.xlabel('Time')
plt.legend()
plt.show()

Example #13
0
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_dim=look_back))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, nb_epoch=100, batch_size=1, verbose=2)
# Estimate model performance
trainScore = model.evaluate(trainX, trainY, verbose=0)
trainScore = math.sqrt(trainScore)
trainScore = scaler.inverse_transform(numpy.array([[trainScore]]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = model.evaluate(testX, testY, verbose=0)
testScore = math.sqrt(testScore)
testScore = scaler.inverse_transform(numpy.array([[testScore]]))
print('Test Score: %.2f RMSE' % (testScore))
# generate predictions for training
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
Example #14
0
model.add(Dropout(0.5))
model.add(Conv1D( strides=2, filters=nb_features, kernel_size=2))
'''
model.load_weights(
    'weights/bitcoin2015to2017_close_CNN_2_relu-44-0.00023.hdf5')
model.compile(loss='mse', optimizer='adam')

# In[336]:

predicted = model.predict(validation_datas)
predicted_inverted = []

# In[7]:
for i in range(original_datas.shape[1]):
    scaler.fit(original_datas[:, i].reshape(-1, 1))
    predicted_inverted.append(scaler.inverse_transform(predicted[:, :, i]))
print(np.array(predicted_inverted).shape)
#get only the close data
ground_true = ground_true[:, :, 0].reshape(-1)
ground_true_times = ground_true_times.reshape(-1)
ground_true_times = pd.to_datetime(ground_true_times, unit='s')
# since we are appending in the first dimension
predicted_inverted = np.array(predicted_inverted)[0, :, :].reshape(-1)
print(np.array(predicted_inverted).shape)
validation_output_times = pd.to_datetime(validation_output_times.reshape(-1),
                                         unit='s')

# In[337]:

ground_true_df = pd.DataFrame()
ground_true_df['times'] = ground_true_times
Example #15
0
def create_dataset(dataset, look_back):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), :]
        dataX.append(a)
        dataY.append(dataset[i + look_back, :])
    return np.array(dataX), np.array(dataY)


# normalize dataset
dataset = df.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

if (n_features != dataset.shape[1]):
    raise ValueError("Excel file format does not match with trained model.")

test = dataset[35:, :]

# reshape into X=t and Y=t+1

testX, testY = create_dataset(test, look_back)
testPredict = model.predict(testX, batch_size=batch_size)

normalizedTestPrediction = scaler.inverse_transform(testPredict)
#normalizedTest           =  scaler.inverse_transform(test[3:])

print("Prediction for last 6 columns:\n",
      normalizedTestPrediction[:, 1:].astype(dtype=np.int32))
Example #16
0
    for i in xrange(0,numSteps):
        if (i%100==0):
            print float(i)/numSteps
        #batch_xs, batch_ys = GetSubset(trainDataX, trainDataY, batchsize, batchnum)
        batch_xs, batch_ys = GetRandomSubset(trainDataX, trainDataY, batchsize)
        batchnum += 1
        if batchnum > (float(numPoints) / batchsize):
            batchnum = 0
        sess.run(train, feed_dict={x: batch_xs, y_: batch_ys})
        lossdata[i] = sess.run(loss, feed_dict={x: batch_xs, y_: batch_ys})

    predTrain = sess.run(y, feed_dict={x: trainDataX})
    predTest = sess.run(y, feed_dict={x: testDataX})
    #predKaggleTest = sess.run(y, feed_dict={x: data})

predTrainPrice = scaler.inverse_transform(predTrain)
predTestPrice = scaler.inverse_transform(predTest)
#predKaggleTestPrice = scaler.inverse_transform(predKaggleTest)
realPrice = scaler.inverse_transform(Y)

"""
i=1461
with open("hp_nnet_test.csv", "w") as f:
    f.write("Id,SalePrice\n")
    for x in predKaggleTestPrice:
        f.write(str(i)+","+str(x[0])+"\n")
        i+=1
"""
f, axarr = plt.subplots(2)
axarr[0].scatter(realPrice[:int(trainSplit*max(realPrice.shape))],predTrainPrice,c='Blue')
axarr[0].scatter(realPrice[int(trainSplit*max(realPrice.shape)):],predTestPrice,c='Red')
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xTrain, yTrain, epochs=100, batch_size=32)

datasetTotal = pd.concat(
    (pricesDatasetTrain['adj_close'], pricesDatasetTest['adj_close']), axis=0)
inputs = datasetTotal[len(datasetTotal) - len(pricesDatasetTest) - 40:].values
inputs = inputs.reshape(-1, 1)

inputs = min_max_scaler.transform(inputs)

xTest = []
for i in range(40, len(pricesDatasetTest) + 40):
    xTest.append(inputs[i - 40:i, 0])

xTest = np.array(xTest)
xTest = np.reshape(xTest, (xTest.shape[0], xTest.shape[1], 1))

predictions = model.predict(xTest)

predictions = min_max_scaler.inverse_transform(predictions)

plt.plot(testset, color='blue', label='Actual S&P500 Prices')
plt.plot(predictions, color='green', label='LSTM Predictions')
plt.title('S&P500 Predictions with Recurrent Neural Network')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()
saver = tf.train.Saver()

with tf.Session() as sesion:
    sesion.run(init)
    for iteracion in range(numero_iteraciones):
        lote_x, lote_y = lotes(entrenamiento_normalizado, tamaño_lote,
                               numero_pasos)
        sesion.run(entrenamiento, feed_dict={x: lote_x, y: lote_y})
        if iteracion % 100 == 0:
            error = funcion_error.eval(feed_dict={x: lote_x, y: lote_y})
            print(iteracion, "\t Error", error)

        saver.save(sesion, "./modelo_series_temporales2")

with tf.Session() as sesion:
    saver.restore(sesion, "./modelo_series_temporales2")
    entrenamiento_seed = list(entrenamiento_normalizado[-18:])
    for iteracion in range(18):
        lote_x = np.array(entrenamiento_seed[-numero_pasos:]).reshape(
            1, numero_pasos, 1)
        prediccion_y = sesion.run(salidas, feed_dict={x: lote_x})
        entrenamiento_seed.append(prediccion_y[0, -1, 0])

resultados = normalizacion.inverse_transform(
    np.array(entrenamiento_seed[18:]).reshape(18, 1))

conjunto_pruebas['predicciones'] = resultados
conjunto_pruebas

conjunto_pruebas.plot()
Example #19
0
class softArm:
    def __init__(self):
        # parameters
        self.name = 'softArm'
        self.enable_actions = (0, 1, 2, 3)
        self.action_bound = [[1900, 2500], [500, 1100], [500, 1100],
                             [1900, 2500]]
        self.load_model()
        self.reset()

    def load_model(self):
        workbook = xlrd.open_workbook('Excel_Workbook220.xls')
        sheet1 = workbook.sheet_by_index(0)

        self.row_num = len(sheet1.col_values(0))
        self.input_data = np.zeros([self.row_num, 4])
        self.input_data[:, 0] = array(sheet1.col_values(0))
        self.input_data[:, 1] = array(sheet1.col_values(1))
        self.input_data[:, 2] = array(sheet1.col_values(2))
        self.input_data[:, 3] = array(sheet1.col_values(3))

        self.minMax0 = MinMaxScaler()
        self.input_data1 = self.minMax0.fit_transform(self.input_data)
        output_data = np.zeros([self.row_num, 12])
        output_data[:, 0] = array(sheet1.col_values(4))
        output_data[:, 1] = array(sheet1.col_values(5))
        output_data[:, 2] = array(sheet1.col_values(6))
        output_data[:, 3] = array(sheet1.col_values(7))
        output_data[:, 4] = array(sheet1.col_values(8))
        output_data[:, 5] = array(sheet1.col_values(9))
        output_data[:, 6] = array(sheet1.col_values(10))
        output_data[:, 7] = array(sheet1.col_values(11))
        output_data[:, 8] = array(sheet1.col_values(12))
        output_data[:, 9] = array(sheet1.col_values(13))
        output_data[:, 10] = array(sheet1.col_values(14))
        output_data[:, 11] = array(sheet1.col_values(15))

        self.minMax2 = MinMaxScaler()
        self.output_data1 = self.minMax2.fit_transform(output_data)

        self.model = keras.Sequential()
        self.model.add(
            keras.layers.Dense(64, input_shape=(4, ), activation='tanh'))
        self.model.add(keras.layers.Dense(64, activation='tanh'))
        self.model.add(keras.layers.Dense(64, activation='tanh'))
        self.model.add(keras.layers.Dense(64, activation='tanh'))
        self.model.add(keras.layers.Dense(64, activation='tanh'))
        self.model.add(keras.layers.Dense(64, activation='tanh'))
        self.model.add(keras.layers.Dense(12, activation='tanh'))
        opt = keras.optimizers.Adam(lr=0.001)
        self.model.compile(loss='mse', optimizer=opt, metrics=['accuracy'])
        # self.model.load_weights('123.h')
        # self.model.fit(self.input_data1,self.output_data1,epochs=10000,verbose=2)
        # self.model.save_weights("123.h")
        self.model.load_weights('220.h')

    def draw(self):
        aa = self.minMax2.inverse_transform(self.current_pos)
        plt.plot(aa[0][0:6], -aa[0][6:12], '-')
        plt.hold(True)
        plt.plot(aa[0][0], -aa[0][6], 'o')
        plt.hold(True)
        aa = self.minMax2.inverse_transform(self.target)
        plt.plot(aa[0][0], -aa[0][6], '*')
        plt.title(
            str(self.motorA_pos) + ' ' + str(self.motorB_pos) + ' ' +
            str(self.motorC_pos) + ' ' + str(self.motorD_pos) + ' ' +
            str(self.reward))
        plt.pause(0.001)
        plt.hold(False)

    def update(self, action):
        """
        action:
            0: 上节 move left
            1: 上节 move right
            2: 下节 move left
            3: 下节 move right
            
        """
        # update player position
        self.over_boundary = 0

        if action == self.enable_actions[0]:
            #A move left
            if self.motorA_pos <= 1900:
                self.over_boundary = 1
            elif self.motorB_pos == 500:
                if self.motorA_pos - self.step <= 1900:  #判断可不可以执行
                    self.motorA_pos = 1900
                else:
                    self.motorA_pos -= self.step  #25
            else:
                if self.motorB_pos - self.step <= 500:  #判断可不可以执行
                    self.motorB_pos = 500
                else:
                    self.motorB_pos -= self.step

        elif action == self.enable_actions[1]:
            #A move right
            if self.motorB_pos >= 1100:
                self.over_boundary = 1
            elif self.motorA_pos >= 2500:
                #判断可不可以执行
                if self.motorB_pos + self.step >= 1100:
                    self.motorB_pos = 1100
                else:
                    self.motorB_pos += self.step
            else:
                #判断可不可以执行
                if self.motorA_pos + self.step >= 2500:
                    self.motorA_pos = 2500
                else:
                    self.motorA_pos += self.step

        elif action == self.enable_actions[2]:
            #B move left
            if self.motorC_pos >= 1100:
                self.over_boundary = 1
            elif self.motorD_pos >= 2500:
                #判断可不可以执行
                if self.motorC_pos + self.step >= 1100:
                    self.motorC_pos = 1100
                else:
                    self.motorC_pos += self.step
            else:
                #判断可不可以执行
                if self.motorD_pos + self.step >= 2500:
                    self.motorD_pos = 2500
                else:
                    self.motorD_pos += self.step
        elif action == self.enable_actions[3]:
            #B move right
            if self.motorD_pos <= 1900:
                self.over_boundary = 1
            elif self.motorC_pos <= 500:
                #判断可不可以执行
                if self.motorD_pos - self.step <= 1900:
                    self.motorD_pos = 1900
                else:
                    self.motorD_pos -= self.step
            else:
                #判断可不可以执行
                if self.motorC_pos - self.step <= 500:
                    self.motorC_pos = 500
                else:
                    self.motorC_pos -= self.step
        else:
            # do nothing
            print('?')
        if self.motorA_pos != 2500 and self.motorB_pos != 500:
            print('position error')
        if self.motorC_pos != 500 and self.motorD_pos != 2500:
            print('position error')
        # collision detection
        self.reward = 0
        q = array([[
            self.motorA_pos, self.motorB_pos, self.motorC_pos, self.motorD_pos
        ]])
        input_ = self.minMax0.transform(q)
        self.current_pos = self.model.predict(input_)
        A_target = self.minMax2.inverse_transform(self.target)
        B_current_pos = self.minMax2.inverse_transform(self.current_pos)

        #-----------------last pos method---------------
        c_last_pos = self.minMax2.inverse_transform(self.last_pos)
        #--------------------------------------------------
        # self.distance = 0
        # self.cur_distance = 0
        self.last_distance = 0
        self.terminal_dis = 0

        # self.distance += abs(A[0][current_point] - B[0][current_point])# 奖励为距离的绝对值
        #-----------------last pos method---------------------
        self.terminal_dis = abs(A_target[0][0] -
                                B_current_pos[0][0]) + abs(A_target[0][6] -
                                                           B_current_pos[0][6])

        self.last_distance = abs(A_target[0][0] -
                                 c_last_pos[0][0]) + abs(A_target[0][6] -
                                                         c_last_pos[0][6])

        # self.distance = self.last_distance - self.terminal_dis
        self.last_pos = self.current_pos
        #-----------------------------------------------------------

        self.run_step += 1
        self.terminal = False

        if self.terminal_dis < 15:  #15
            # catch
            self.terminal = True
            self.reward = 2
        else:
            if self.over_boundary == 1:
                self.reward = -1
            elif self.terminal_dis < 20:
                self.reward = 1.2 - 0.005 * self.terminal_dis
                self.step = 5
            elif self.terminal_dis < 50:
                self.reward = 1 - 0.005 * self.terminal_dis
                self.step = 10
            elif self.terminal_dis < 90:
                self.reward = 0.5 - 0.002 * self.terminal_dis
                self.step = 25
            elif self.terminal_dis < 150:
                self.reward = 0.1 - 0.001 * self.terminal_dis
                self.step = 50
            else:
                self.reward = -0.001 * self.terminal_dis
                self.step = 75

        if self.run_step > 150:
            self.terminal = True

    def observe(self):
        # self.draw()
        envir = []
        target = np.array([self.target[0][0], self.target[0][6]])
        current_pos = np.array(
            [self.current_pos[0][0], self.current_pos[0][6]])
        envir.extend(target.flatten())
        envir.extend(current_pos.flatten())
        envir = np.array(envir)
        return envir, self.reward, self.terminal

    def execute_action(self, action):
        self.update(action)

    def reset(self):
        # reset player position
        start_pos = np.random.randint(0, self.row_num)
        self.motorA_pos = self.input_data[start_pos, 0]
        self.motorB_pos = self.input_data[start_pos, 1]
        self.motorC_pos = self.input_data[start_pos, 2]
        self.motorD_pos = self.input_data[start_pos, 3]
        end_pos = np.random.randint(0, self.row_num)
        # reset taget position
        c = self.input_data[end_pos, 0]
        d = self.input_data[end_pos, 1]
        e = self.input_data[end_pos, 2]
        f = self.input_data[end_pos, 3]
        q = array([[c, d, e, f]])
        input_ = self.minMax0.transform(q)
        self.target = self.model.predict(input_)

        q = array([[
            self.motorA_pos, self.motorB_pos, self.motorC_pos, self.motorD_pos
        ]])
        input_ = self.minMax0.transform(q)
        self.current_pos = self.model.predict(input_)
        self.last_pos = self.current_pos
        # reset other variables
        self.reward = 0
        self.run_step = 0
        self.terminal = False
        self.distance = 0
        self.step = 10  #步长

    #function to get_pic from cv
    def get_from_cv(self):
        cap = cv2.VideoCapture(0)
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        for i in range(50):
            ret, frame = cap.read()
        while (1):
            # get a frame

            ret, frame = cap.read()
            # show a frame
            frame = frame[60:420, :630]
            # cv2.imshow("aaa",frame)

            gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            # cv2.imshow("aaa",gray)
            # 2-mode
            ret1, binary = cv2.threshold(gray, 23, 255, cv2.THRESH_BINARY)
            # cv2.imshow("aaa",binary)

            # 开运算
            opened_pic = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
            # cv2.imshow("aaa",opened_pic)
            #寻找边界
            contours, boundary, ret2 = cv2.findContours(
                opened_pic, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            cv2.drawContours(opened_pic, boundary, -1, (0, 0, 255), 3)
            # cv2.imshow("bbb",opened_pic)
            #计算坐标

            x_min = np.zeros((len(boundary), 1))
            x_max = np.zeros((len(boundary), 1))
            y_max = np.zeros((len(boundary), 1))
            y_min = np.zeros((len(boundary), 1))
            color = (0, 255, 0)
            for i in range(len(boundary)):
                x_min[i] = boundary[i][0][0][0]
                x_max[i] = boundary[i][0][0][0]
                y_min[i] = boundary[i][0][0][1]
                y_max[i] = boundary[i][0][0][1]
                for j in range(len(boundary[i])):
                    if boundary[i][j][0][0] > x_max[i]:
                        x_max[i] = boundary[i][j][0][0]
                    if boundary[i][j][0][0] < x_min[i]:
                        x_min[i] = boundary[i][j][0][0]
                    if boundary[i][j][0][1] > y_max[i]:
                        y_max[i] = boundary[i][j][0][1]
                    if boundary[i][j][0][1] < y_min[i]:
                        y_min[i] = boundary[i][j][0][1]
            for i in range(len(boundary) - 1):
                i = i + 1
                yuanxin_x = int((x_max[i] - x_min[i]) / 2 + x_min[i])
                yuanxin_y = int((y_max[i] - y_min[i]) / 2 + y_min[i])
                yuanxin = (yuanxin_x, yuanxin_y)
                cv2.circle(frame, yuanxin, 10, color)
            cv2.waitKey(50)
            cv2.imshow("get_characteristic", frame)

        cap.release()
Example #20
0
                            mode='min',
                            period=1)
ts_model.fit(x=X_train,
             y=y_train,
             batch_size=16,
             epochs=20,
             verbose=1,
             callbacks=[save_best],
             validation_data=(X_val, y_val),
             shuffle=True)

completeName = 'PRSA_data_Air_Pressure_MLP_weights.11-0.0001.hdf5'
best_model = load_model(
    os.path.join(os.path.expanduser('~'), 'Documents', completeName))
preds = best_model.predict(X_val)
pred_PRES = scaler.inverse_transform(preds)
pred_PRES = np.squeeze(pred_PRES)

from sklearn.metrics import r2_score

r2 = r2_score(df_val['PRES'].loc[7:], pred_PRES)
print('R-squared for the validation set:', round(r2, 4))

#Let's plot the first 50 actual and predicted values of air pressure.
plt.figure(figsize=(5.5, 5.5))
plt.plot(range(50),
         df_val['PRES'].loc[7:56],
         linestyle='-',
         marker='*',
         color='r')
plt.plot(range(50), pred_PRES[:50], linestyle='-', marker='.', color='b')
def lstm(data,neurons,look_back,ep,batch,ver,plot=False):
    # load the dataset
    dataset = data.values.reshape(-1,1)
    dataset = dataset.astype('float32')

    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    # split into train and test sets
    train_size = int(len(dataset) * 0.67)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

    # reshape into X=t and Y=t+1
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)

    # reshape input to be [samples, time steps, features]
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(trainX, trainY, epochs=ep, batch_size=batch, verbose=ver)

    # make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)

    print('Obs: ', look_back)
    ev=explained_variance_score(testY,testPredict)
    mae=mean_absolute_error(testY,testPredict)
    r2=r2_score(testY, testPredict)
    corr=pearsonr(testY,testPredict.reshape(-1))[0]

    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])

    # shift train predictions for plotting
    trainPredictPlot = np.empty_like(dataset)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

    # shift test predictions for plotting
    testPredictPlot = np.empty_like(dataset)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
    
    if plot:
        # plot baseline and predictions
        plt.plot(scaler.inverse_transform(dataset))
        plt.plot(trainPredictPlot)
        plt.plot(testPredictPlot)
        plt.show()
    return [ev,mae,r2,corr]
trainX, trainY = create_dataset(train, look_back, sentiment[0:train_size],sent=True)
testX, testY = create_dataset(test, look_back, sentiment[train_size:len(scaled)], sent=True)

trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

model = Sequential()
model.add(LSTM(100, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(100))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
history = model.fit(trainX, trainY, epochs=300, batch_size=100, validation_data=(testX, testY), verbose=0, shuffle=False)

yhat = model.predict(testX)

yhat_inverse_sent = scaler.inverse_transform(yhat.reshape(-1, 1))
testY_inverse_sent = scaler.inverse_transform(testY.reshape(-1, 1))

rmse_sent = sqrt(mean_squared_error(testY_inverse_sent, yhat_inverse_sent))
print('Test RMSE: %.3f' % rmse_sent)

import MySQLdb
#Enter the values for you database connection
dsn_database = "bitcoin"         # e.g. "MySQLdbtest"
dsn_hostname = "173.194.231.244"      # e.g.: "mydbinstance.xyz.us-east-1.rds.amazonaws.com"
dsn_port = 3306                  # e.g. 3306 
dsn_uid = "demo"             # e.g. "user1"
dsn_pwd = "qwerty@123"              # e.g. "Password123"

conn = MySQLdb.connect(host=dsn_hostname, port=dsn_port, user=dsn_uid, passwd=dsn_pwd, db=dsn_database)
regressor.add(Dense(1))

regressor.compile(optimizer="adam",loss = "mse")
regressor.fit(x_train,y_train,epochs=100,batch_size=64)

inputs = data[len(data)-len(test_data)-timestep:]
inputs = inputs.values.reshape(-1,1)
inputs = scaler.transform(inputs)

x_test = []
for i in range(timestep,inputs.shape[0]):
    x_test.append(inputs[i-timestep:i,0])
x_test = np.array(x_test)
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],1)

predicted_data = regressor.predict(x_test)
predicted_data = scaler.inverse_transform(predicted_data)

data_test = np.array(test_data)
data_test = data_test.reshape(len(data_test),1)

#Visualizing results

plt.figure(figsize = (8,4))
plt.plot(data_test,color = "r",label = "true result")
plt.plot(predicted_data,color = "b",label = "predicted result")
plt.legend(loc = "best")
plt.xlabel("Time")
plt.ylabel("Bitcoin Volume")
plt.show()
Example #24
0
X = sc.fit_transform(X)

# Training the SOM
from minisom import MiniSom
som = MiniSom(x = 10, y = 10, input_len = 15, sigma = 1.0, learning_rate = 0.5)
som.random_weights_init(X)
som.train_random(data = X, num_iteration = 100)

# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
         markeredgecolor = colors[y[i]],
         markerfacecolor = 'None',
         markersize = 10,
         markeredgewidth = 2)
show()

# Finding the frauds
mappings = som.win_map(X)
frauds = np.concatenate((mappings[(8,1)], mappings[(6,8)]), axis = 0)
frauds = sc.inverse_transform(frauds)
inputs_a = inputs_a.reshape(-1, 1)
inputs_a = scaler.transform(inputs_a)
inputs_v = g_dv[len(g_dv) - len(valid) - 20:].values
inputs_v = inputs_v.reshape(-1, 1)
inputs_v = scaler.transform(inputs_v)
X_test_r = []
X_test_a = []
X_test_v = []
for i in range(20, inputs_r.shape[0]):
    X_test_r.append(inputs_r[i - 20:i, 0])
    X_test_a.append(inputs_a[i - 20:i, 0])
    X_test_v.append(inputs_v[i - 20:i, 0])
X_test_r = np.array(X_test_r)
X_test_r = np.reshape(X_test_r, (X_test_r.shape[0], X_test_r.shape[1], 1))
growth_r = model.predict(X_test_r)
growth_r = scaler.inverse_transform(growth_r)
X_test_a = np.array(X_test_a)
X_test_a = np.reshape(X_test_a, (X_test_a.shape[0], X_test_a.shape[1], 1))
growth_a = model.predict(X_test_a)
growth_a = scaler.inverse_transform(growth_a)
X_test_v = np.array(X_test_v)
X_test_v = np.reshape(X_test_v, (X_test_v.shape[0], X_test_v.shape[1], 1))
growth_v = model.predict(X_test_v)
growth_v = scaler.inverse_transform(growth_v)
state_a = np.multiply(g_da > g_dr, g_da > g_dv)
state_r = np.multiply(g_dr > g_da, g_dr > g_dv)
state_v = np.multiply(g_dv > g_dr, g_dv > g_da)
# dictionary for encoding and decoding seq.
table = {0: 'A', 1: 'R', 2: 'V'}
# initializing data set, one hot encoded seq.
state = 1 * np.column_stack((state_a, state_r, state_v))
Example #26
0
reframed.drop(reframed.columns[[7]], axis=1, inplace=True)

values = reframed.values
x_test = values[6:, :]
x_test = x_test.reshape((x_test.shape[0],1,x_test.shape[1]))


def aggValue(x_test, nVal):
    for i in range(x_test.shape[2]-1):
        x_test[0][0][i]=x_test[0][0][i+1]
    x_test[0][0][x_test.shape[2]-1]=nVal
    return x_test

results=[]
for i in range(7):
    parcial = model.predict(x_test)
    results.append(parcial[0])
    x_test=aggValue(x_test, parcial[0])

adimen = [x for x in results]
inverted = scaler.inverse_transform(adimen)

prediccion1SemanaDic = pd.DataFrame(inverted)
prediccion1SemanaDic.columns = ['Pronostico']
prediccion1SemanaDic.plot()
diciembre2017 = df['2017-12-01':'2017-12-08']
plt.plot(diciembre2017.values, color='red', label='2017')
plt.legend()
plt.show()

Example #27
0
    # Get the final accuracy scores by running the "cost" operation on the training and test data sets
    final_training_cost = session.run(cost,
                                      feed_dict={
                                          X: X_scaled_training,
                                          Y: Y_scaled_training
                                      })
    final_testing_cost = session.run(cost,
                                     feed_dict={
                                         X: X_scaled_testing,
                                         Y: Y_scaled_testing
                                     })

    print("Final Training cost: {}".format(final_training_cost))
    print("Final Testing cost: {}".format(final_testing_cost))

    # Now that the neural network is trained, let's use it to make predictions for our test data.
    # Pass in the X testing data and run the "prediciton" operation
    Y_predicted_scaled = session.run(prediction,
                                     feed_dict={X: X_scaled_testing})

    # Unscale the data back to it's original units (dollars)
    Y_predicted = Y_scaler.inverse_transform(Y_predicted_scaled)

    real_earnings = test_data_df['total_earnings'].values[0]
    predicted_earnings = Y_predicted[0][0]

    print("The actual earnings of Game #1 were ${}".format(real_earnings))
    print("Our neural network predicted earnings of ${}".format(
        predicted_earnings))
Example #28
0
for i in range(60,len(test_data)):
    x_test.append(test_data[i-60:i,0])##The independent testing variables or testing feature set 'X' (holds the stock price value for the past 60 days at position 'i-60')
##Convert x_test to a numpy array so we can use it to test and make predictions using the LSTM model we created
x_test = np.array(x_test)

##A LSTM network expects the input to be 3-Dimensional in the form [samples, time steps, features]:
## samples is the number of data points (or rows/ records) we have, 
## time steps is the number of time-dependent steps that are there in a single data point (60),
## features/indicators refers to the number of variables we have for the corresponding true value in Y, since we are only using one feature 'Close',
## the number of features/indicators will be one
##Reshape the data into the shape accepted by the LSTM
x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) ##Reshaping the variable to be 3-Dimensional in the form [samples, time steps, features] for the Neural Network

##Getting the models predicted price values
predictions = model.predict(x_test) ##Getting the models predicted price values
predictions = scaler.inverse_transform(predictions) ##Undo the scaling

##Get the root mean squared error (rmse)
##RMSE is a good measure of how accurately the model predicts the response
##(RMSE) is the standard deviation of the residuals (prediction errors).
##Lower values of RMSE indicate better fit. 
##Usually you want to evaluate your model with other metrics as well to truly get an idea of how well your model performs.

##Calculate/Get the value of RMSE
rmse=np.sqrt(np.mean((predictions- y_test)**2))
rmse

##Plot the data
train = data[:training_data_len] ## A dataframe to get rows from index 0 to 1602 (1603 rows total)
valid = data[training_data_len:] ## A dataframe to get rows from index 1603 to the end of the list (2004 - 1603 = 401 rows total)
valid['Predictions'] = predictions ##Create a new column called 'Predictions' that will hold the predicted prices
Example #29
0
test_inputs = apple_total[len(apple_total) - len(apple_testing_complete) -
                          60:].values

test_inputs = test_inputs.reshape(-1, 1)
test_inputs = scaler.transform(test_inputs)

test_features = []
for i in range(60, 80):
    test_features.append(test_inputs[i - 60:i, 0])

test_features = np.array(test_features)
test_features = np.reshape(test_features,
                           (test_features.shape[0], test_features.shape[1], 1))

## Making Predictions

predictions = model.predict(test_features)

predictions = scaler.inverse_transform(predictions)

plt.figure(figsize=(10, 6))
plt.plot(apple_testing_processed,
         color='blue',
         label='Actual Apple Stock Price')
plt.plot(predictions, color='red', label='Predicted Apple Stock Price')
plt.title('Apple Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Apple Stock Price')
plt.legend()
plt.show()
class PreprocessingDenga:
    def __init__(self, data_X, data_y, nan_fill_method='mean'):
        self.raw_data = data_X

        processing_data = self.raw_data.copy()
        processing_data.set_index('week_start_date', inplace=True)
        processing_data.drop('year', axis=1, inplace=True)

        # Drop redundant columns
        processing_data.drop('reanalysis_sat_precip_amt_mm',
                             axis=1,
                             inplace=True)
        processing_data.drop('reanalysis_specific_humidity_g_per_kg',
                             axis=1,
                             inplace=True)

        # Fill nan values
        self.values_to_nan_fill = None
        if nan_fill_method == 'mean':
            self.values_to_nan_fill = processing_data.mean()
        if self.values_to_nan_fill is not None:
            processing_data.fillna(self.values_to_nan_fill, inplace=True)

        # Convert city name to numerical, and week_of_year to circle
        self.train_city_index = processing_data.keys().get_loc('city')
        values = processing_data.values
        self.encoder = LabelBinarizer()
        city = self.encoder.fit_transform(values[:, self.train_city_index])
        city_encoded = np.hstack(
            (city, 1 - city))  # convert 1 column to 2 column each for one city

        self.train_week_index = processing_data.keys().get_loc('weekofyear')
        week_values = values[:, self.train_week_index]
        self.encoder_of_weeks = CircleTransform(week_values)
        week_sin, week_cos = self.encoder_of_weeks.transform(week_values)
        week_sin_cos = np.column_stack((week_sin, week_cos))

        values = np.delete(values,
                           np.s_[self.train_city_index, self.train_week_index],
                           axis=1)
        values = np.hstack((city_encoded, week_sin_cos, values))
        values = values.astype('float32')

        # Get final count of features
        self.features_count = values.shape[
            1]  # Warning: Shape may be dependent of transformations before, so check it.

        # normalize features
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.scaler.fit(values)
        self.scaled = self.scaler.transform(
            values)  # TODO: Scaled to normal_version

        # data_y preprocessing:
        self.data_y = data_y['total_cases']
        self.y_scaler = MinMaxScaler(feature_range=(0, 1))
        self.y_scaler.fit(self.data_y.values.reshape(-1, 1))

    def basic_X_preprocessing(self, given_data):
        """Prepare the same preprocessing process just like in init to new data."""
        processing_data = given_data.copy()
        processing_data.set_index('week_start_date', inplace=True)
        processing_data.drop('year', axis=1, inplace=True)
        processing_data.drop('reanalysis_sat_precip_amt_mm',
                             axis=1,
                             inplace=True)
        processing_data.drop('reanalysis_specific_humidity_g_per_kg',
                             axis=1,
                             inplace=True)
        if self.values_to_nan_fill is not None:
            processing_data.fillna(self.values_to_nan_fill, inplace=True)

        # Transform of a city and a week attribute:
        city_index = processing_data.keys().get_loc('city')
        values = processing_data.values
        city = self.encoder.transform(values[:, city_index])
        city_encoded = np.hstack((city, 1 - city))

        week_index = processing_data.keys().get_loc('weekofyear')
        week_values = values[:, self.train_week_index]
        week_sin, week_cos = self.encoder_of_weeks.transform(week_values)
        week_sin_cos = np.column_stack((week_sin, week_cos))

        values = np.delete(values, np.s_[city_index, week_index], axis=1)
        values = np.hstack((city_encoded, week_sin_cos, values))

        scaled = self.scaler.transform(values)
        return scaled

    def get_truncated_data_X(self, data_X_prepared, truncated_backprop_length):
        data_X_truncated = PreprocessingSeries.prepare_for_truncated_backpropagation(
            data_X_prepared, truncated_backprop_length - 1, 0)
        return data_X_truncated

    def get_split_data_X_by_city(self, data_X):
        index_of_first_occurrence_next_city = self.get_city_split_index(
            data_X, self.train_city_index)
        city1 = data_X[:index_of_first_occurrence_next_city, :]
        city2 = data_X[index_of_first_occurrence_next_city:, :]
        return city1, city2

    def get_split_and_truncated_data_y_by_city(self, data_y,
                                               truncated_backprop_length):
        index_of_first_occurrence_next_city = self.get_city_split_index(data_y)
        data_y = data_y['total_cases']
        y_city_1 = data_y[:index_of_first_occurrence_next_city]
        y_city_2 = data_y[index_of_first_occurrence_next_city:]

        # Delete rows with not enough history
        y_city_1 = y_city_1[truncated_backprop_length - 1:]
        y_city_2 = y_city_2[truncated_backprop_length - 1:]
        return y_city_1, y_city_2

    def get_normalized_y(self, given_data_y):
        data_y_norm = self.y_scaler.transform(given_data_y.reshape(-1, 1))
        data_y_norm_and_shape = data_y_norm.reshape(-1)
        return data_y_norm_and_shape

    def get_inverse_y(self, given_y):
        y_result_not_shaped = self.y_scaler.inverse_transform(given_y)
        y_result = y_result_not_shaped.astype(int).reshape(-1)
        return y_result

    # TODO: Refactor
    def get_city_split_index(self, data, index=float('nan')):
        if math.isnan(index) and not isinstance(data, pd.DataFrame):
            raise AttributeError("Wrong arguments to split_data")
        if math.isnan(index):
            index = data.keys().get_loc('city')
        if isinstance(data, pd.DataFrame):
            data = data.values
        values_column = data[:, index]

        if values_column.dtype not in [
                np.dtype(np.float16),
                np.dtype(np.float32),
                np.dtype(np.float64)
        ]:
            values_column = self.encoder.transform(values_column)

        index = np.argmax(values_column == 0)
        return index
Example #31
0
loss = model.evaluate(test_x_scale, test_y_norm)
print("loss (MSE):", loss)

# R2 coefficient
r2 = r2_score(test_y_norm, testPredict)
print("R2:", r2)

import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(train_y_norm, trainPredict))
print('Train Score: %.2f RMSE' % (trainScore * 100))
testScore = math.sqrt(mean_squared_error(test_y_norm, testPredict))
print('Test Score: %.2f RMSE' % (testScore * 100))

#calculate mean absolute percent error
trainMAPE = mean_absolute_error(train_y, trainPredict)
print('train MAPE: %.2f MAPE' % (trainMAPE * 100))
testMAPE = mean_absolute_error(test_y, testPredict)
print('test MAPE: %.2f MAPE' % (testMAPE * 100))

# Get something which has as many features as dataset
testPredict_extended = np.zeros((len(testPredict), 5))
# Put the predictions there
testPredict_extended[:, 4] = testPredict[:, 0]
# Inverse transform it and select the 5rd column.
testPredict = scaler.inverse_transform(testPredict_extended)[:, 4]
print('testPredict', testPredict)

model.save("prediction.h5")
Example #32
0
model.add(LSTM(
    20,
    input_shape=(1,
                 1)))  # (timestep, feature)   # 층이 추가됨 (add)   # 입력층, 첫번째 은닉층
model.add(Dense(1))  # 출력층 (하나)
# 모델을 컴파일 (컴퓨터가 알아들을 수 있도록)   # 오차함수, 최적화 방법
model.compile(loss='mean_squared_error',
              optimizer='adam',
              metrics=['accuracy'])  # metrics : 모델 수행 결과를 나타내게 설정 (과적합 문제 방지)
model.fit(trainX, trainY, epochs=1000, batch_size=50,
          verbose=2)  # 모델을 실제로 수행     # batch_size : 전체 데이터를 10개씩 사용
# verbose(로깅)  2 : epoch당 나오게

# 예측값 평가하기 (얼마나 정확한지)
testPredict = model.predict(testX)
testPredict = scaler.inverse_transform(testPredict)  # testPredict : 예측 값
testY = scaler.inverse_transform(testY)  # testY : 실제 값
testScore = math.sqrt(mean_squared_error(
    testY, testPredict))  # mean_squared_error : 평균 제곱근 오차
print('Train Score: %.2f RMSE' % testScore)  # 예측 값과 실제 값 차이 출력

# 예측 데이터 출력
lastX = nptf[-1]
lastX = np.reshape(lastX, (1, 1, 1))
lastY = model.predict(lastX)
predict = scaler.inverse_transform(lastY)  # 정규화 시킨 값을 역변환
print('Predict the Close value of final day: %d' %
      predict)  # 데이터 입력 마지막 다음날 종가 예측

# 차트출력, 저장
plt.plot(testPredict)
Example #33
0
def DayAheadMLP(endog,
                exog,
                date,
                lags=range(1, 169),
                hoursAhead=38,
                epochs=200,
                activation='relu',
                optimizer='adam',
                loss='mse',
                verbose=0):
    """
    Trains a fresh MLP and returns a day-ahead forecast 
    endog(pd.Series): Series (usually corresponding to a cluster) of hourly indexed load data
    exog(pd.DataFrame): DataFrame of covariates if applicable
    date(date-like obj): Hour after which to begin 38-hour-ahead forecast

    lags(List): List of lag features to generate (default=range(1,169))
    hoursAhead(int): number of hours ahead to predict (default=38)
    epochs(int): number of epochs for training (default=200)
    activation(str): key for activation function (default='relu')
    optimizer(str): key for optimizer (default='adam')
    loss(str): key for loss function (default='mse')
    """
    # force DataFrame dtype for y, copy X
    y = pd.DataFrame(endog)
    X = exog.copy(deep=True)
    testDate = pd.to_datetime(date)

    # scale y (0,1) on annual min and max, important assumption
    scaler = MinMaxScaler().fit(y)
    y = pd.DataFrame(data=scaler.transform(y),
                     index=y.index,
                     columns=y.columns)

    for i in lags:
        X['t-' + str(i)] = y.iloc[:, 0].shift(i)
    for j in range(1, 38):
        y['t+' + str(j)] = y.iloc[:, 0].shift(-j)

    # truncate on both sides to remove NaNs
    X.dropna(inplace=True)
    y = y.reindex(X.index, axis=0).dropna()
    X = X.reindex(y.index, axis=0).dropna()

    # train/test split, train range includes all available data up to two days prior to test date
    y_train = y.loc[y.index < testDate - pd.Timedelta(days=2)].values
    X_train = X.loc[X.index < testDate - pd.Timedelta(days=2)].values
    X_test = X.loc[X.index == testDate].values

    # set input and hidden layer dimensions
    inputDim = X_train.shape[1]
    hiddenDim = (inputDim - 38) // 2 + 38

    # define model based on hyperparameters
    model = Sequential()
    model.add(Dense(hiddenDim, activation=activation, input_dim=inputDim))
    model.add(Dense(38))
    model.compile(optimizer=optimizer, loss=loss)

    # fit the model and make a prediction
    model.fit(X_train, y_train, epochs=epochs)
    y_pred = model.predict(X_test, verbose=0)

    # return result after reverse scaling
    return scaler.inverse_transform(y_pred).flatten()
Example #34
0
class NeuralNetwork(BaseEstimator, RegressorMixin, object):

    _maxSteps = None
    _maxNonChangingSteps = None
    _learningRate = None
    _shrinkage = None
    _architecture = None
    _momentum = None
    _useDropout = None
    _alphaStandout = None
    _betaStandout = None
    _warmStart = None
    _batchSize = None

    _weights = None
    _inputDimension = None
    _outputDimension = None
    _step = None
    _lastDelta = None

    _percentageDropout = None

    _inputNormalizer = None
    _outputNormalizer = None

    def __init__(self, maxSteps=50, maxNonChangingSteps=5, learningRate=1e-6, shrinkage=0.9, architecture=[10],
                 momentum=0.7, useDropout=False, alphaStandout=0, betaStandout=0.5, warmStart=False,
                 startingWeights=None, batchSize = 1, step = 0, lastDelta = None, percentageDropout=1):
        self._maxSteps = maxSteps
        self._maxNonChangingSteps = maxNonChangingSteps
        self._learningRate = learningRate
        self._shrinkage = shrinkage
        self._architecture = architecture
        self._momentum = momentum
        self._useDropout = useDropout
        self._alphaStandout = alphaStandout
        self._betaStandout = betaStandout
        self._warmStart = warmStart
        self._weights = startingWeights
        self._batchSize = batchSize
        self._step = step
        self._lastDelta = None
        self._percentageDropout = percentageDropout

    def get_params(self, deep=True):
        params = {}
        params["maxSteps"] = self._maxSteps
        params["maxNonChangingSteps"] = self._maxNonChangingSteps
        params["learningRate"] = self._learningRate
        params["shrinkage"] = self._shrinkage
        params["architecture"] = self._architecture
        params["momentum"] = self._momentum
        params["useDropout"] = self._useDropout
        params["alphaStandout"] = self._alphaStandout
        params["betaStandout"] = self._betaStandout
        params["warmStart"] = self._warmStart
        params["batchSize"] = self._batchSize
        params["step"] = self._step
        params["lastDelta"] = self._lastDelta
        params["percentageDropout"] = self._percentageDropout
        return params

    def _initializeWeights(self, randomState=0):
        randomState = check_random_state(randomState)

        self._weights = []
        for k in range(len(self.neuronsPerLayer())-1):
            self._weights.append(np.ones(shape=(self.neuronsPerLayer()[k]+1, self.neuronsPerLayer()[k+1])))
        for k in range(len(self.neuronsPerLayer())-1):
            for i in range(len(self._weights[k])):
                for j in range(len(self._weights[k][i])):
                    #Starting weights are set randomly, dependant on the number of inputs. Compare lecture 17, neuralnetworks slide 10.
                    self._weights[k][i][j] = randomState.uniform(0, 1)/(self.neuronsPerLayer()[k+1])**0.5
                    #self._weights[k][i][j] = randomState.uniform(0, 1)

    def _batchify(self, X, batchSize, y=None):
        #first, set the batches.
        #A list of feature matrixes, with the ith column representing the ith example of said feature.
        index = 0
        batchFeatures = []
        #A list of matrices in the one of k coding scheme.
        if not y is None:
            batchTargets = []

        while index < len(X):
            if batchSize != 0:
                numberExamples = min(batchSize, len(X) - index)
            else:
                numberExamples = len(X)
            batchFeatures.append(np.ones(shape=(self._inputDimension+1, numberExamples)))
            if (not y == None):
                batchTargets.append(np.zeros(shape=(self._outputDimension, numberExamples)))
            for i in range(numberExamples):
                for j in range(self._inputDimension):
                    batchFeatures[-1][j, i] = X[index, j] #TODO in case of multiple dimensions, break glass.

                #Now, set the one out of k training scheme
                if (not y == None):
                    for j in range(self._outputDimension):
                        batchTargets[-1][j, i] = y[index, j]
                    #batchTargets[-1][0, i] = y[index]

                index += 1
        if not y == None:
            return batchFeatures, batchTargets
        else:
            return batchFeatures

    def neuronsPerLayer(self):
        neuronsPerLayer = []
        neuronsPerLayer.append(self._inputDimension)
        neuronsPerLayer.extend(self._architecture)
        neuronsPerLayer.append(self._outputDimension)
        return neuronsPerLayer


    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            if (parameter == 'maxSteps'):
                self._maxSteps = value
            elif (parameter == 'maxNonChangingSteps'):
                self._maxNonChangingSteps = value
            elif (parameter == 'learningRate'):
                self._learningRate = value
            elif (parameter == 'shrinkage'):
                self._shrinkage = value
            elif (parameter == 'architecture'):
                self._architecture = value
            elif (parameter == 'momentum'):
                self._momentum = value
            elif (parameter == 'useDropout'):
                self._useDropout = value
            elif (parameter == 'alphaStandout'):
                self._alphaStandout = value
            elif (parameter == 'betaStandout'):
                self._betaStandout = value
            elif (parameter == 'warmStart'):
                self._warmStart = value
            elif (parameter == 'batchSize'):
                self._batchSize = value
            elif (parameter == 'step'):
                self._step = value
            elif (parameter == 'lastDelta'):
                self._lastDelta = value
            elif parameter == 'percentageDropout':
                self._percentageDropout = value

        return self

    def calcLayerOutputsBatch(self, batchFeatures, doDropout, randomState = 0):
        randomState = check_random_state(randomState)

        dropoutVectors = []

        numExamples = batchFeatures.shape[1]
        for k in range(len(self.neuronsPerLayer())):
            if (k != len(self.neuronsPerLayer())-1):
                #if a bias neuron exists.
                dropoutVectors.append(np.ones((self.neuronsPerLayer()[k]+1, numExamples)))
            else:
                #else.
                dropoutVectors.append(np.ones((self.neuronsPerLayer()[k], numExamples)))

        outputsPerLayer = []
        outputsPerLayer.append(batchFeatures)
        for k in range(0, len(self._weights)): #All the same except for the output layer.
            if (k == len(self._weights)-1): # Do not append the bias.
                #outputsPerLayer.append(np.maximum(np.matrix(np.dot(self._weights[k].transpose(), outputsPerLayer[k])), 0))
                #outputsPerLayer.append(self.sigmoid(np.dot(self._weights[k].transpose(), outputsPerLayer[k])))
                outputsPerLayer.append(self.sigmoid(np.dot(self._weights[k].transpose(), outputsPerLayer[k])))

            else: #Do append the bias neuron.
                outputsPerLayer.append(np.ones((self.neuronsPerLayer()[k+1]+1, numExamples)))
                inputThisLayer = np.dot(self._weights[k].transpose(), outputsPerLayer[k])
                #outputsPerLayer[k+1][:self.neuronsPerLayer()[k+1]] = np.maximum(inputThisLayer[:self.neuronsPerLayer()[k+1]], 0)
                #print(inputThisLayer)
                outputsPerLayer[k+1][:-1] = self.sigmoid(inputThisLayer)
                if (self._useDropout):
                    dropoutNeuronNumber = int(self.neuronsPerLayer()[k+1]*self._percentageDropout)
                    dropoutVectors[k+1][:dropoutNeuronNumber] = np.clip(self.sigmoidStandout(self._alphaStandout * inputThisLayer + self._betaStandout), 0, 1)[:dropoutNeuronNumber]
                    #print(dropoutVectors[k+1])
                    if (doDropout):
                        dropoutVectors[k+1] = np.ones((dropoutVectors[k+1].shape[0], dropoutVectors[k+1].shape[1])) * dropoutVectors[k+1] > np.random.rand(dropoutVectors[k+1].shape[0], dropoutVectors[k+1].shape[1])
                        #print(dropoutVectors[k+1])
                    outputsPerLayer[k+1] = np.multiply(outputsPerLayer[k+1], dropoutVectors[k+1])
                #print(outputsPerLayer[-1])
        if (doDropout):
            return outputsPerLayer, dropoutVectors
        else:
            return outputsPerLayer, dropoutVectors



    def _learnFromBatch(self, batchFeatures, batchTargets):
        outputsPerLayer, dropoutVectors = self.calcLayerOutputsBatch(batchFeatures, True)

        errorsPerLayer = []
        for i in range(len(outputsPerLayer)-1):
            errorsPerLayer.append(np.zeros((outputsPerLayer[i].shape[0], len(batchTargets))))
        #Set the error for the output layer.
        errorsPerLayer.append(batchTargets - outputsPerLayer[-1])

        #now, it gets funny.: Calculate all of the errors. In both cases. dropout applies to the errorsPerLayer, too. A neuron that isn't 'active' will have no error.
        for k in range(len(self._weights)-1, -1, -1):
            if (k == len(self._weights)-1):
                errorsPerLayer[k] = np.dot(self._weights[k], errorsPerLayer[k+1])
            else:
                errorsPerLayer[k] = np.dot(self._weights[k], errorsPerLayer[k+1][0:-1])
            if (self._useDropout):
                    errorsPerLayer[k] = np.multiply(errorsPerLayer[k], dropoutVectors[k])

        #Calculate the deltaW.
        deltaW = []
        for k in range(len(self._weights)):
            deltaW.append(np.zeros(shape=self._weights[k].shape))
        for k in range(len(self._weights)-1, -1, -1):
            if (k == len(self._weights)-1):
                #derivative = 1./(np.exp(-outputsPerLayer[k+1])+1)
                #tmp = np.multiply(errorsPerLayer[k+1], derivative).transpose()
                tmp = np.multiply(np.multiply(errorsPerLayer[k+1], outputsPerLayer[k+1]), 1-outputsPerLayer[k+1]).transpose()
            else:
                #derivative = 1./(np.exp(-outputsPerLayer[k+1])+1)
                #tmp = np.multiply(errorsPerLayer[k+1], derivative)[0:-1].transpose()

                tmp = (np.multiply(np.multiply(errorsPerLayer[k+1], outputsPerLayer[k+1]), 1-outputsPerLayer[k+1]))[0:-1].transpose()
            #And again, a neuron which doesn't exist won't cause deltaWs.
            if (self._useDropout):
                deltaW[k] = np.dot(np.multiply(outputsPerLayer[k], dropoutVectors[k]), tmp)
            else:
                deltaW[k] = np.dot(outputsPerLayer[k], tmp)
        #print(deltaW)
        #raw_input()
        return deltaW

    def fit(self, X, y):
        X = np.matrix(X)
        y = np.matrix(y)
        self._outputNormalizer = MinMaxScaler()
        self._inputNormalizer = MinMaxScaler()
        self._outputNormalizer = self._outputNormalizer.fit(y)
        self._inputNormalizer = self._inputNormalizer.fit(X)
        self._inputDimension = X.shape[1]
        self._outputDimension = y.shape[1]#For now, hardcoded to 1-dimensional regression problems.
        if (not self._warmStart or self._weights == None):
            self._initializeWeights()
            self._lastDelta = None
        batchFeatures, batchTargets = self._batchify(np.matrix(self._inputNormalizer.transform(X)), self._batchSize,
                                                     np.matrix(self._outputNormalizer.transform(y)))


        #do for each step until the maximum steps:
        for i in range(self._maxSteps):
            reducedLearningRate = self._learningRate * self._shrinkage ** self._step
            for j in range(len(batchFeatures)):
                deltaW = self._learnFromBatch(batchFeatures[j], batchTargets[j])
                if (self._lastDelta == None):
                    self._lastDelta = deltaW
                for k in range(len(self._weights)):
                    self._lastDelta[k] = ((1-self._momentum) * deltaW[k] + self._momentum * self._lastDelta[k])
                    self._weights[k] = self._weights[k] + reducedLearningRate * self._lastDelta[k]
                #self._positifyWeights()
            self._step += 1
        #print(step)
        return self

    def predict(self, X, debug=False):
        X = np.matrix(X)
        batchFeatures = self._batchify(self._inputNormalizer.transform(X), self._batchSize)
        batchResults = np.zeros((X.shape[0], self._outputDimension))
        dropoutResults = []
        for k in range(len(self.neuronsPerLayer())):
            if (k != len(self.neuronsPerLayer())-1):
                #if a bias neuron exists.
                dropoutResults.append(np.zeros((self.neuronsPerLayer()[k]+1, 1)))
            else:
                #else.
                dropoutResults.append(np.zeros((self.neuronsPerLayer()[k], 1)))
        begin = 0
        end = batchFeatures[0].shape[1]
        for i in range(len(batchFeatures)):
            outputsLast, dropoutFeatures = self.calcLayerOutputsBatch(batchFeatures[i], False)
            outputsLast = outputsLast[-1]
            batchResults[begin:end, :] = outputsLast.transpose()

            begin = end
            end = end + batchFeatures[i].shape[1]
        #for featureList in batchFeatures:
        #    outputsLast, dropoutFeatures = self.calcLayerOutputsBatch(featureList, False)
        #    outputsLast = outputsLast[-1]
        #    batchResults.extend(list(np.array(outputsLast).reshape(-1,)))
        #    for i in range(len(dropoutFeatures)):
        #        summed = np.matrix(np.sum(dropoutFeatures[i], 1)).transpose()
        #        dropoutResults[i] += summed
        batchResults = np.matrix(batchResults)
        if (debug):
            return self._outputNormalizer.inverse_transform(batchResults), dropoutResults
        else:
            return self._outputNormalizer.inverse_transform(batchResults)

    def sigmoid(self, X):
        #return 1 / (1 + np.exp(-X))
        return 0.5 * (X/(1+abs(X))+1)

    def sigmoidStandout(self, X):
        #return 1 / (1 + np.exp(-X))
        sigmoidResult = 0.5 * (X/(1+abs(X))+1)
        #return 4*(sigmoidResult * (1-sigmoidResult))
        return sigmoidResult
Example #35
0
            saver.save(sess, model_path)

            # display
            if False:
                x = list(range(len(y_list)))

                plt.subplot(211)
                plt.plot(x, y_list, 'r', x, y_pre_list, 'b')
                plt.subplot(212)
                plt.plot(list(range(len(all_loss))), all_loss, 'g')
                plt.show()

            # predict
            if False:
                test_data = get_both_batch_data(test_records, batch_size, series_timesteps, cycle_timesteps)
                test_y_list = [] 
                test_y_pre_list = []
                test_all_loss = []
                for batch in test_data:
                    predict, loss = sess.run([y_pre, loss_func], feed_dict = {series_model.X_ : batch[0], cycle_model.X_ : batch[1], y : batch[2]})
                    test_y_list.extend(batch[2])
                    test_y_pre_list.extend(predict)
                    test_all_loss.append(loss)

                # display
                test_x = list(range(len(test_y_list)))

                print('---------------- Test Loss:', np.mean(test_all_loss))
                plt.plot(test_x, scaler.inverse_transform(test_y_list), 'r', test_x, scaler.inverse_transform(test_y_pre_list), 'b')
                plt.show()
Example #36
0
                axis=1)
x_1
yhat[0] = model.predict(x_1)

### predict the next X steps
for i in range(1, steps):
    x_n = np.append(np.append(yhat[i - 1],
                              zsy[i - 1, :]).reshape(1, 1, features + 1),
                    x_1[-1,
                        0:(timesteps - 1), :].reshape(1, (timesteps - 1),
                                                      features + 1),
                    axis=1)
    yhat[i] = model.predict(x_n)
    x_1 = x_n

yhat = scaler.inverse_transform(yhat.reshape(steps, 1))

trainPredictPlot = numpy.zeros(shape=(len(train) + steps, 1))
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[:len(train), :] = train.reshape(len(train), 1)
trainPredictPlot[len(train):len(train) + steps, :] = test.reshape(len(test), 1)

testPredictPlot = numpy.zeros(shape=(len(train) + steps, 1))
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(train):len(train) + steps, :] = yhat
# yhat0=yhat

plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
Example #37
0
class RNNGRU(object):
    """Process data for ingestion.

    Roughly based on:

    https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/23_Time-Series-Prediction.ipynb

    """

    def __init__(
            self, _data, batch_size=64, epochs=20,
            sequence_length=20, warmup_steps=50, dropout=0,
            layers=1, patience=10, units=512, display=False, binary=False):
        """Instantiate the class.

        Args:
            data: Tuple of (x_data, y_data, target_names)
            batch_size: Size of batch
            sequence_length: Length of vectors for for each target
            warmup_steps:
            display: Show charts of results if True
            binary: Process data for predicting boolean up / down movement vs

                actual values if True
        Returns:
            None

        """
        # Initialize key variables
        self._warmup_steps = warmup_steps
        self._binary = binary
        self._display = display
        self._data = _data
        self._gpus = len(general.get_available_gpus())

        # Set key file locations
        path_prefix = '/tmp/keras-{}'.format(int(time.time()))
        self._path_checkpoint = '{}.checkpoint'.format(path_prefix)
        self._path_model_weights = '{}.weights.h5'.format(path_prefix)
        self._path_model_parameters = '{}.model'.format(path_prefix)

        # Initialize parameters
        self.hyperparameters = {
            'units': units,
            'dropout': dropout,
            'layers': int(abs(layers)),
            'sequence_length': sequence_length,
            'patience': patience,
            'batch_size': batch_size * self._gpus,
            'epochs': epochs
        }

        # Delete any stale checkpoint file
        if os.path.exists(self._path_checkpoint) is True:
            os.remove(self._path_checkpoint)

        ###################################
        # TensorFlow wizardry
        config = tf.ConfigProto()

        # Don't pre-allocate memory; allocate as-needed
        config.gpu_options.allow_growth = True

        # Only allow a total of half the GPU memory to be allocated
        config.gpu_options.per_process_gpu_memory_fraction = 0.8

        # Crash with DeadlineExceeded instead of hanging forever when your
        # queues get full/empty
        config.operation_timeout_in_ms = 60000

        # Create a session with the above options specified.
        backend.tensorflow_backend.set_session(tf.Session(config=config))
        ###################################

        # Get data
        self._y_current = self._data.values()

        # Create test and training arrays for VALIDATION and EVALUATION
        (x_train,
         x_validation,
         _x_test,
         self._y_train,
         self._y_validation,
         self._y_test) = self._data.train_validation_test_split()

        (self.training_rows, self._training_vector_count) = x_train.shape
        (self.test_rows, _) = _x_test.shape
        (_, self._training_class_count) = self._y_train.shape

        '''
        The neural network works best on values roughly between -1 and 1, so we
        need to scale the data before it is being input to the neural network.
        We can use scikit-learn for this.

        We first create a scaler-object for the input-signals.

        Then we detect the range of values from the training-data and scale
        the training-data.

        From StackOverflow:

        To center the data (make it have zero mean and unit standard error),
        you subtract the mean and then divide the result by the standard
        deviation.

            x'=x−μσ

        You do that on the training set of data. But then you have to apply the
        same transformation to your testing set (e.g. in cross-validation), or
        to newly obtained examples before forecast. But you have to use the
        same two parameters μ and σ (values) that you used for centering the
        training set.

        Hence, every sklearn's transform's fit() just calculates the parameters
        (e.g. μ and σ in case of StandardScaler) and saves them as an internal
        objects state. Afterwards, you can call its transform() method to apply
        the transformation to a particular set of examples.

        fit_transform() joins these two steps and is used for the initial
        fitting of parameters on the training set x, but it also returns a
        transformed x'. Internally, it just calls first fit() and then
        transform() on the same data.
        '''
        self._x_scaler = MinMaxScaler()
        _ = self._x_scaler.fit_transform(self._data.vectors())
        self._x_train_scaled = self._x_scaler.transform(x_train)
        self._x_validation_scaled = self._x_scaler.transform(x_validation)
        self._x_test_scaled = self._x_scaler.transform(_x_test)

        '''print(np.amin(self._x_train_scaled), np.amax(self._x_train_scaled))
        print(np.amin(self._x_train_scaled), np.amax(self._x_train_scaled))
        print(np.amin(self._x_test_scaled), np.amax(self._x_test_scaled))

        print('\n', _x_test, '\n')
        print('\n', self._x_test_scaled, '\n')'''

        '''
        The target-data comes from the same data-set as the input-signals,
        because it is the weather-data for one of the cities that is merely
        time-shifted. But the target-data could be from a different source with
        different value-ranges, so we create a separate scaler-object for the
        target-data.
        '''

        self._y_scaler = MinMaxScaler()
        _ = self._y_scaler.fit_transform(self._data.classes())
        self._y_train_scaled = self._y_scaler.transform(self._y_train)
        self._y_validation_scaled = self._y_scaler.transform(
            self._y_validation)
        self._y_test_scaled = self._y_scaler.transform(self._y_test)

        '''print(np.amin(self._y_train_scaled), np.amax(self._y_train_scaled))
        print(np.amin(self._y_train_scaled), np.amax(self._y_train_scaled))
        print(np.amin(self._y_test_scaled), np.amax(self._y_test_scaled))
        sys.exit()'''

        # Print stuff
        print('\n> Numpy Data Type: {}'.format(type(x_train)))
        print("> Numpy Data Shape: {}".format(x_train.shape))
        print("> Numpy Data Row[0]: {}".format(x_train[0]))
        print("> Numpy Data Row[Last]: {}".format(x_train[-1]))
        print('> Numpy Targets Type: {}'.format(type(self._y_train)))
        print("> Numpy Vector Feature Type: {}".format(type(x_train[0][0])))
        print("> Numpy Targets Shape: {}".format(self._y_train.shape))

        print('> Number of Samples: {}'.format(self._y_current.shape[0]))
        print('> Number of Training Samples: {}'.format(x_train.shape[0]))
        print('> Number of Training Classes: {}'.format(
            self._training_class_count))
        print('> Number of Test Samples: {}'.format(self.test_rows))
        print("> Training Minimum Value:", np.min(x_train))
        print("> Training Maximum Value:", np.max(x_train))
        print('> Number X signals: {}'.format(self._training_vector_count))
        print('> Number Y signals: {}'.format(self._training_class_count))

        # Print epoch related data
        print('> Epochs:', self.hyperparameters['epochs'])
        print('> Batch Size:', self.hyperparameters['batch_size'])

        # Display estimated memory footprint of training data.
        print("> Data size: {:.2f} Bytes".format(x_train.nbytes))

        print('> Scaled Training Minimum Value: {}'.format(
            np.min(self._x_train_scaled)))
        print('> Scaled Training Maximum Value: {}'.format(
            np.max(self._x_train_scaled)))

        '''
        The data-set has now been prepared as 2-dimensional numpy arrays. The
        training-data has almost 300k observations, consisting of 20
        input-signals and 3 output-signals.

        These are the array-shapes of the input and output data:
        '''

        print('> Scaled Training Data Shape: {}'.format(
            self._x_train_scaled.shape))
        print('> Scaled Training Targets Shape: {}'.format(
            self._y_train_scaled.shape))

    def model(self, params=None):
        """Create the Recurrent Neural Network.

        Args:
            None

        Returns:
            _model: RNN model

        """
        # Initialize key variables
        if params is None:
            _hyperparameters = self.hyperparameters
        else:
            params['batch_size'] = params['batch_size'] * self._gpus
            _hyperparameters = params

        '''print(_hyperparameters['batch_size'])
        print(self.hyperparameters['batch_size'])
        sys.exit(0)'''

        # Calculate the steps per epoch
        epoch_steps = int(
            self.training_rows / _hyperparameters['batch_size']) + 1

        '''
        Instantiate the base model (or "template" model).
        We recommend doing this with under a CPU device scope,
        so that the model's weights are hosted on CPU memory.
        Otherwise they may end up hosted on a GPU, which would
        complicate weight sharing.

        NOTE: multi_gpu_model values will be way off if you don't do this.
        '''
        with tf.device('/cpu:0'):
            serial_model = Sequential()

        '''
        We can now add a Gated Recurrent Unit (GRU) to the network. This will
        have 512 outputs for each time-step in the sequence.

        Note that because this is the first layer in the model, Keras needs to
        know the shape of its input, which is a batch of sequences of arbitrary
        length (indicated by None), where each observation has a number of
        input-signals (num_x_signals).
        '''

        serial_model.add(GRU(
            _hyperparameters['units'],
            stateful=True,
            batch_size=_hyperparameters['batch_size'],
            return_sequences=True,
            recurrent_dropout=_hyperparameters['dropout'],
            input_shape=(None, self._training_vector_count,)))

        for _ in range(1, _hyperparameters['layers']):
            serial_model.add(GRU(
                _hyperparameters['units'],
                stateful=True,
                batch_size=_hyperparameters['batch_size'],
                recurrent_dropout=_hyperparameters['dropout'],
                return_sequences=True))

        '''
        The GRU outputs a batch from keras_contrib.layers.advanced_activations
        of sequences of 512 values. We want to predict
        3 output-signals, so we add a fully-connected (or dense) layer which
        maps 512 values down to only 3 values.

        The output-signals in the data-set have been limited to be between 0
        and 1 using a scaler-object. So we also limit the output of the neural
        network using the Sigmoid activation function, which squashes the
        output to be between 0 and 1.
        '''

        if False:
            serial_model.add(Dense(
                self._training_class_count,
                activation='sigmoid'))

        '''
        A problem with using the Sigmoid activation function, is that we can
        now only output values in the same range as the training-data.

        For example, if the training-data only has values between -20 and +30,
        then the scaler-object will map -20 to 0 and +30 to 1. So if we limit
        the output of the neural network to be between 0 and 1 using the
        Sigmoid function, this can only be mapped back to values between
        -20 and +30.

        We can use a linear activation function on the output instead. This
        allows for the output to take on arbitrary values. It might work with
        the standard initialization for a simple network architecture, but for
        more complicated network architectures e.g. with more layers, it might
        be necessary to initialize the weights with smaller values to avoid
        NaN values during training. You may need to experiment with this to
        get it working.
        '''

        if True:
            # Maybe use lower init-ranges.
            init = RandomUniform(minval=-0.05, maxval=0.05)

            serial_model.add(Dense(
                self._training_class_count,
                activation='linear',
                kernel_initializer=init))

        '''print(inspect.getmembers(_model, predicate=inspect.ismethod))
        print('\n\n----------------------\n\n')'''

        # Apply multi-GPU logic.
        try:
            # We have to wrap multi_gpu_model this way to get callbacks to work
            if False:
                parallel_model = ModelMGPU(
                    serial_model,
                    cpu_relocation=True,
                    gpus=self._gpus)
            #_model = _model
            if True:
                parallel_model = multi_gpu_model(
                    serial_model,
                    cpu_relocation=True,
                    gpus=self._gpus)
            # parallel_model = serial_model
            if False:
                parallel_model = serial_model
            print('> Training using multiple GPUs...')
        except ValueError:
            parallel_model = serial_model
            print('> Training using single GPU or CPU...')

        # Compile Model

        '''
        This is the optimizer and the beginning learning-rate that we will use.
        We then compile the Keras model so it is ready for training.
        '''

        optimizer = RMSprop(lr=1e-3)
        # optimizer = Adam(lr=1e-3)
        if self._binary is True:
            parallel_model.compile(
                loss='binary_crossentropy',
                optimizer=optimizer,
                metrics=['accuracy'])
        else:
            parallel_model.compile(
                loss=self._loss_mse_warmup,
                optimizer=optimizer,
                metrics=['accuracy'])

        '''
        This is a very small model with only two layers. The output shape of
        (None, None, 3) means that the model will output a batch with an
        arbitrary number of sequences, each of which has an arbitrary number of
        observations, and each observation has 3 signals. This corresponds to
        the 3 target signals we want to predict.
        '''
        print('\n> Model Summary (Serial):\n')
        print(serial_model.summary())
        print('\n> Model Summary (Parallel):\n')
        print(parallel_model.summary())

        '''print(_hyperparameters['batch_size'])
        sys.exit(0)'''

        # Create the batch-generator.
        generator = self._batch_generator(
            _hyperparameters['batch_size'],
            _hyperparameters['sequence_length'])

        # Validation Set

        '''
        The neural network trains quickly so we can easily run many training
        epochs. But then there is a risk of overfitting the model to the
        training-set so it does not generalize well to unseen data. We will
        therefore monitor the model's performance on the test-set after each
        epoch and only save the model's weights if the performance is improved
        on the test-set.

        The batch-generator randomly selects a batch of short sequences from
        the training-data and uses that during training. But for the
        validation-data we will instead run through the entire sequence from
        the test-set and measure the prediction accuracy on that entire
        sequence.
        '''

        validation_data = (np.expand_dims(self._x_validation_scaled, axis=0),
                           np.expand_dims(self._y_validation_scaled, axis=0))

        # Callback Functions

        '''
        During training we want to save checkpoints and log the progress to
        TensorBoard so we create the appropriate callbacks for Keras.

        This is the callback for writing checkpoints during training.
        '''

        callback_checkpoint = ModelCheckpoint(
            filepath=self._path_checkpoint,
            monitor='val_loss',
            verbose=1,
            save_weights_only=True,
            save_best_only=True)

        '''
        This is the callback for stopping the optimization when performance
        worsens on the validation-set.
        '''

        callback_early_stopping = EarlyStopping(
            monitor='val_loss',
            patience=_hyperparameters['patience'],
            verbose=1)

        '''
        This is the callback for writing the TensorBoard log during training.
        '''

        callback_tensorboard = TensorBoard(
            log_dir='/tmp/23_logs/',
            histogram_freq=0,
            write_graph=False)

        '''
        This callback reduces the learning-rate for the optimizer if the
        validation-loss has not improved since the last epoch
        (as indicated by patience=0). The learning-rate will be reduced by
        multiplying it with the given factor. We set a start learning-rate of
        1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4.
        We don't want the learning-rate to go any lower than this.
        '''

        callback_reduce_lr = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.1,
            min_lr=1e-4,
            patience=0,
            verbose=1)

        callbacks = [
            callback_early_stopping,
            callback_checkpoint,
            callback_tensorboard,
            callback_reduce_lr]

        # Train the Recurrent Neural Network

        '''We can now train the neural network.

        Note that a single "epoch" does not correspond to a single processing
        of the training-set, because of how the batch-generator randomly
        selects sub-sequences from the training-set. Instead we have selected
        steps_per_epoch so that one "epoch" is processed in a few minutes.

        With these settings, each "epoch" took about 2.5 minutes to process on
        a GTX 1070. After 14 "epochs" the optimization was stopped because the
        validation-loss had not decreased for 5 "epochs". This optimization
        took about 35 minutes to finish.

        Also note that the loss sometimes becomes NaN (not-a-number). This is
        often resolved by restarting and running the Notebook again. But it may
        also be caused by your neural network architecture, learning-rate,
        batch-size, sequence-length, etc. in which case you may have to modify
        those settings.
        '''

        print('\n> Parameters for training\n')
        pprint(_hyperparameters)
        print('\n> Starting data training\n')

        history = parallel_model.fit_generator(
            generator=generator,
            epochs=_hyperparameters['epochs'],
            steps_per_epoch=epoch_steps,
            use_multiprocessing=True,
            validation_data=validation_data,
            callbacks=callbacks)

        '''v_steps = (self.test_rows // _hyperparameters['batch_size']) - 1
        parallel_model.fit(
            self._x_train_scaled,
            self._y_train_scaled,
            epochs=_hyperparameters['epochs'],
            steps_per_epoch=epoch_steps,
            validation_data=validation_data,
            validation_steps=v_steps,
            callbacks=callbacks)'''

        # Save model
        '''self.save(serial_model)
        serial_model = self.load_model()'''

        # Determine whether the weights of the parallel and serial models match
        if general.weights_match(serial_model, parallel_model) is True:
            print('> Weights match (Parallel / Serial)')
        else:
            print('> Weights different (Parallel / Serial)')
        # sys.exit(0)

        '''print("Ploting History")
        plt.plot(history.history['loss'], label='Parallel Training Loss')
        plt.plot(history.history['val_loss'], label='Parallel Validation Loss')
        # plt.plot(parallel_model.history['loss'], label='Parallel Training Loss')
        # plt.plot(parallel_model.history['val_loss'], label='Parallel Validation Loss')
        plt.legend()
        plt.show()
        sys.exit(0)'''

        # Return
        return serial_model

    def save(self, _model):
        """Save the Recurrent Neural Network model.

        Args:
            None

        Returns:
            _model: RNN model

        """
        # Serialize model to JSON
        model_yaml = _model.to_yaml()
        with open(self._path_model_parameters, 'w') as yaml_file:
            yaml_file.write(model_yaml)

        # Serialize weights to HDF5
        _model.save_weights(self._path_model_weights)
        print('> Saved model to disk')

    def load_model(self):
        """Load the Recurrent Neural Network model from disk.

        Args:
            None

        Returns:
            _model: RNN model

        """
        # Load json and create model
        print('> Loading model from disk')
        with open(self._path_model_parameters, 'r') as yaml_file:
            loaded_model_yaml = yaml_file.read()
        _model = model_from_yaml(loaded_model_yaml)

        # Load weights into new model
        _model.load_weights(self._path_model_weights)
        print('> Finished loading model from disk')

        # Return
        return _model

    def evaluate(self, _model):
        """Evaluate the model.

        Args:
            _model: Model to evaluate

        Returns:
            None

        """
        # Load Checkpoint

        '''
        Because we use early-stopping when training the model, it is possible
        that the model's performance has worsened on the test-set for several
        epochs before training was stopped. We therefore reload the last saved
        checkpoint, which should have the best performance on the test-set.
        '''

        print('> Loading model weights')
        if os.path.exists(self._path_checkpoint):
            _model.load_weights(self._path_checkpoint)

        '''optimizer = RMSprop(lr=1e-3)
        if self._binary is True:
            _model.compile(
                loss='binary_crossentropy',
                optimizer=optimizer,
                metrics=['accuracy'])
        else:
            _model.compile(
                loss=self._loss_mse_warmup,
                optimizer=optimizer,
                metrics=['accuracy'])'''

        # Performance on Test-Set.

        '''
        We can now evaluate the model's performance on the validation-set.
        This function expects a batch of data, but we will just use one long
        time-series for the test-set, so we just expand the
        array-dimensionality to create a batch with that one sequence.
        '''

        if self._binary is False:
            x_scaled = self._x_test_scaled
            y_scaled = self._y_test_scaled
        else:
            # Get the filtered vectors and classes
            (filtered_vectors,
             filtered_classes) = self._data.stochastic_vectors_classes()

            # Scale and then evaluate
            x_scaled = self._x_scaler.transform(filtered_vectors)
            y_scaled = self._y_scaler.transform(filtered_classes)

        # Evaluate the MSE accuracy
        result = _model.evaluate(
            x=np.expand_dims(x_scaled, axis=0),
            y=np.expand_dims(y_scaled, axis=0))

        # If you have several metrics you can use this instead.
        print('> Metrics (test-set):')
        for _value, _metric in zip(result, _model.metrics_names):
            print('\t{}: {:.10f}'.format(_metric, _value))

        if self._binary is True:
            # Input-signals for the model.
            x_values = np.expand_dims(x_scaled, axis=0)

            # Get the predictions
            predictions_scaled = _model.predict_classes(x_values, verbose=1)

            # The output of the model is between 0 and 1.
            # Do an inverse map to get it back to the scale
            # of the original data-set.
            predictions = self._y_scaler.inverse_transform(
                predictions_scaled[0])

            # Print meaningful human accuracy values
            print(
                '> Human accuracy {:.3f} %'
                ''.format(general.binary_accuracy(
                    predictions, filtered_classes) * 100))

    def objective(self, params=None):
        """Optimize the Recurrent Neural Network.

        Args:
            None

        Returns:
            _model: RNN model

        """
        model = deepcopy(self.model(params=params))

        if bool(self._binary) is False:
            scaled_vectors = self._x_test_scaled
            test_classes = self._y_test
        else:
            # Get the filtered vectors and classes
            (filtered_vectors,
             test_classes) = self._data.stochastic_vectors_classes()

            # Scale and then evaluate
            scaled_vectors = self._x_scaler.transform(filtered_vectors)

        # Input-signals for the model.
        x_values = np.expand_dims(scaled_vectors, axis=0)

        # Get the predictions
        predictions_scaled = model.predict(x_values, verbose=1)

        # The output of the model is between 0 and 1.
        # Do an inverse map to get it back to the scale
        # of the original data-set.
        predictions = self._y_scaler.inverse_transform(
            predictions_scaled[0])

        # Get the error value
        accuracy = mean_absolute_error(test_classes, predictions)

        # Free object memory
        del model
        gc.collect()

        # Print meaningful human accuracy values
        if self._binary is True:
            # Print predictions and actuals:
            print(
                '> Human accuracy {:.5f} %'
                ''.format(general.binary_accuracy(
                    predictions, test_classes) * 100))

        # Return
        return {
            'loss': (accuracy * -1),
            'status': STATUS_OK,
            'estimated_accuracy': accuracy,
            'hyperparameters': params}

    def cleanup(self):
        """Release memory and delete checkpoint files.

        Args:
            None

        Returns:
            None

        """
        # Delete
        os.remove(self._path_checkpoint)

    def stationary(self):
        """Evaluate wether the timeseries is stationary.

        non-stationary timeseries are probably random walks and not
        suitable for forecasting.

        Args:
            None

        Returns:
            state: True if stationary

        """
        # Initialize key variables
        state = False
        values = []

        # statistical test
        result = adfuller(self._y_current)
        adf = result[0]
        print('> Stationarity Test:')
        print('  ADF Statistic: {:.3f}'.format(adf))
        print('  p-value: {:.3f}'.format(result[1]))
        print('  Critical Values:')
        for key, value in result[4].items():
            print('\t{}: {:.3f}'.format(key, value))
            values.append(value)

        # Return
        if adf < min(values):
            state = True
        print('  Stationarity: {}'.format(state))
        return state

    def _batch_generator(self, batch_size, sequence_length):
        """Create generator function to create random batches of training-data.

        Args:
            batch_size: Size of batch
            sequence_length: Length of sequence

        Returns:
            (x_batch, y_batch)

        """
        # Infinite loop.
        while True:
            # Allocate a new array for the batch of input-signals.
            x_shape = (
                batch_size, sequence_length, self._training_vector_count)
            x_batch = np.zeros(shape=x_shape, dtype=np.float16)

            # Allocate a new array for the batch of output-signals.
            y_shape = (batch_size, sequence_length, self._training_class_count)
            y_batch = np.zeros(shape=y_shape, dtype=np.float16)

            # Fill the batch with random sequences of data.
            for i in range(batch_size):
                # Get a random start-index.
                # This points somewhere into the training-data.
                idx = np.random.randint(
                    self.training_rows - sequence_length)

                # Copy the sequences of data starting at this index.
                x_batch[i] = self._x_train_scaled[idx:idx+sequence_length]
                y_batch[i] = self._y_train_scaled[idx:idx+sequence_length]

            yield (x_batch, y_batch)

    def _loss_mse_warmup(self, y_true, y_pred):
        """Calculate the Mean Squared Errror.

        Calculate the Mean Squared Error between y_true and y_pred,
        but ignore the beginning "warmup" part of the sequences.

        We will use Mean Squared Error (MSE) as the loss-function that will be
        minimized. This measures how closely the model's output matches the
        true output signals.

        However, at the beginning of a sequence, the model has only seen
        input-signals for a few time-steps, so its generated output may be very
        inaccurate. Using the loss-value for the early time-steps may cause the
        model to distort its later output. We therefore give the model a
        "warmup-period" of 50 time-steps where we don't use its accuracy in the
        loss-function, in hope of improving the accuracy for later time-steps

        Args:
            y_true: Desired output.
            y_pred: Model's output.

        Returns:
            loss_mean: Mean Squared Error

        """
        warmup_steps = self._warmup_steps

        # The shape of both input tensors are:
        # [batch_size, sequence_length, num_y_signals].

        # Ignore the "warmup" parts of the sequences
        # by taking slices of the tensors.
        y_true_slice = y_true[:, warmup_steps:, :]
        y_pred_slice = y_pred[:, warmup_steps:, :]

        # These sliced tensors both have this shape:
        # [batch_size, sequence_length - warmup_steps, num_y_signals]

        # Calculate the MSE loss for each value in these tensors.
        # This outputs a 3-rank tensor of the same shape.
        loss = tf.losses.mean_squared_error(labels=y_true_slice,
                                            predictions=y_pred_slice)

        # Keras may reduce this across the first axis (the batch)
        # but the semantics are unclear, so to be sure we use
        # the loss across the entire tensor, we reduce it to a
        # single scalar with the mean function.
        loss_mean = tf.reduce_mean(loss)

        return loss_mean

    def plot_train(self, model, start_idx, length=100):
        """Plot the predicted and true output-signals.

        Args:
            model: Training model
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.

        Returns:
            None

        """
        # Plot
        self._plot_comparison(model, start_idx, length=length, train=True)

    def plot_test(self, model, start_idx, length=100):
        """Plot the predicted and true output-signals.

        Args:
            model: Training model
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.

        Returns:
            None

        """
        # Plot
        self._plot_comparison(model, start_idx, length=length, train=False)

    def _plot_comparison(self, model, start_idx, length=100, train=True):
        """Plot the predicted and true output-signals.

        Args:
            model: Training model
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.
            train: Boolean whether to use training- or test-set.

        Returns:
            None

        """
        # Initialize key variables
        datetimes = {}
        num_train = self.training_rows

        # Don't plot if we are looking at binary classes
        if bool(self._binary) is True:
            print('> Will not plot charts for binary class values.')
            return

        # End-index for the sequences.
        end_idx = start_idx + length

        # Variables for date formatting
        days = mdates.DayLocator()   # Every day
        months = mdates.MonthLocator()  # Every month
        months_format = mdates.DateFormatter('%b %Y')
        days_format = mdates.DateFormatter('%d')

        # Assign other variables dependent on the type of data we are plotting
        if train is True:
            # Use training-data.
            x_values = self._x_train_scaled[start_idx:end_idx]
            y_true = self._y_train[start_idx:end_idx]
            shim = 'Train'

            # Datetimes to use for training
            datetimes[shim] = self._data.datetime()[
                :num_train][start_idx:end_idx]

        else:
            # Scale the data
            x_test_scaled = self._x_scaler.transform(
                self._data.vectors_test_all())

            # Use test-data.
            x_values = x_test_scaled[start_idx:end_idx]
            y_true = self._y_test[start_idx:end_idx]
            shim = 'Test'

            # Datetimes to use for testing
            datetimes[shim] = self._data.datetime()[
                -self.test_rows-1:][start_idx:end_idx]

        # Input-signals for the model.
        x_values = np.expand_dims(x_values, axis=0)

        # Use the model to predict the output-signals.
        y_pred = model.predict(x_values)

        # The output of the model is between 0 and 1.
        # Do an inverse map to get it back to the scale
        # of the original data-set.
        y_pred_rescaled = self._y_scaler.inverse_transform(y_pred[0])

        # For each output-signal.
        for signal in range(len(self._data.labels())):
            # Assign other variables dependent on the type of data plot
            if train is True:
                # Only get current values that are a part of the training data
                current = self._y_current[:num_train][start_idx:end_idx]

                # The number of datetimes for the 'actual' plot must match
                # that of current values
                datetimes['actual'] = self._data.datetime()[
                    :num_train][start_idx:end_idx]

            else:
                # Only get current values that are a part of the test data.
                current = self._y_current[
                    -self.test_rows:][start_idx:]

                # The number of datetimes for the 'actual' plot must match
                # that of current values
                datetimes['actual'] = self._data.datetime()[
                    -self.test_rows:][start_idx:]

            # Create a filename
            filename = (
                '/tmp/batch_{}_epochs_{}_training_{}_{}_{}_{}.png').format(
                    self.hyperparameters['batch_size'],
                    self.hyperparameters['epochs'],
                    num_train,
                    signal,
                    int(time.time()),
                    shim)

            # Get the output-signal predicted by the model.
            signal_pred = y_pred_rescaled[:, signal]

            # Get the true output-signal from the data-set.
            signal_true = y_true[:, signal]

            # Create a new chart
            (fig, axis) = plt.subplots(figsize=(15, 5))

            # Plot and compare the two signals.
            axis.plot(
                datetimes[shim][:len(signal_true)],
                signal_true,
                label='Current +{}'.format(self._data.labels()[signal]))
            axis.plot(
                datetimes[shim][:len(signal_pred)],
                signal_pred,
                label='Prediction')
            axis.plot(datetimes['actual'], current, label='Current')

            # Set plot labels and titles
            axis.set_title('{1}ing Forecast ({0} Future Intervals)'.format(
                self._data.labels()[signal], shim))
            axis.set_ylabel('Values')
            axis.legend(
                bbox_to_anchor=(1.04, 0.5),
                loc='center left', borderaxespad=0)

            # Add gridlines and ticks
            ax = plt.gca()
            ax.grid(True)

            # Add major gridlines
            ax.xaxis.grid(which='major', color='black', alpha=0.2)
            ax.yaxis.grid(which='major', color='black', alpha=0.2)

            # Add minor ticks (They must be turned on first)
            ax.minorticks_on()
            ax.xaxis.grid(which='minor', color='black', alpha=0.1)
            ax.yaxis.grid(which='minor', color='black', alpha=0.1)

            # Format the tick labels
            ax.xaxis.set_major_locator(months)
            ax.xaxis.set_major_formatter(months_format)
            ax.xaxis.set_minor_locator(days)

            # Remove tick marks
            ax.tick_params(axis='both', which='both', length=0)

            # Print day numbers on xaxis for Test data only
            if train is False:
                ax.xaxis.set_minor_formatter(days_format)
                plt.setp(ax.xaxis.get_minorticklabels(), rotation=90)

            # Rotates and right aligns the x labels, and moves the bottom of
            # the axes up to make room for them
            fig.autofmt_xdate()

            # Plot grey box for warmup-period if we are working with training
            # data and the start is within the warmup-period
            if (0 < start_idx < self._warmup_steps):
                if train is True:
                    plt.axvspan(
                        datetimes[shim][start_idx],
                        datetimes[shim][self._warmup_steps],
                        facecolor='black', alpha=0.15)

            # Show and save the image
            if self._display is True:
                fig.savefig(filename, bbox_inches='tight')
                plt.show()
            else:
                fig.savefig(filename, bbox_inches='tight')
            print('> Saving file: {}'.format(filename))

            # Close figure
            plt.close(fig=fig)
Example #38
0
X_final = np.zeros_like(X_test)
print(X_final.shape)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
# predict the model.In case of volume use high no of epochs ~500-800 to achieve the shape of the graph.
no_of_models = 5
for k in range(no_of_models):
    history = model.fit(X_train,
                        y_train,
                        epochs=20,
                        validation_split=0.015,
                        shuffle=False)
    Xt = model.predict(X_test)
    score = model.evaluate(X_test, y_test, verbose=1)
    Xt = scl.inverse_transform(Xt.reshape(-1, look_forward))
    # print(Xt.shape)
    # print(Xt)
    X_final += Xt

Xt = X_final / no_of_models
y_test = scl.inverse_transform(y_test.reshape(-1, look_forward))
model.save('open_lstm.h5')
plt.figure(figsize=(18, 9))
plt.subplot(1, 2, 1)
plt.plot(y_test, color='red', label='actual')
plt.plot(Xt, color='blue', label='predict')
plt.legend()

error = abs((y_test - Xt) / y_test)
for i in range(len(Xt)):
dataset = df.values
dataset = dataset.astype('float32')

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

#prepare the X and Y label
X,y = create_dataset(dataset, 1)

print X.shape, 'XXXXX', y.shape

#Take 80% of data as the training sample and 20% as testing sample
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.20, shuffle=False)

print scaler.inverse_transform(trainX[:10])
print len(testY), 'testY length'

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
print trainX[:3],trainX.shape, 'ttttttttttX'
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history=model.fit(trainX, trainY, nb_epoch=5, batch_size=1, validation_data=(testX, testY), verbose=2)

plt.plot(history.history['loss'], label='train')
Example #40
0
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    # LSTM
    model = Sequential()
    model.add(LSTM(32, input_dim=1)) #look_back))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, y_train, nb_epoch=100, batch_size=5, verbose=2)

    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test) 
   
    # scale back 
    train_pred = scaler.inverse_transform(train_pred)
    y_train = scaler.inverse_transform(y_train)
    test_pred = scaler.inverse_transform(test_pred)
    y_test = scaler.inverse_transform(y_test)
   
    # shift predictions for plotting
    train_pred_plot = np.empty_like(dataset)
    train_pred_plot[:,:] = np.nan
    train_pred_plot[look_back:len(train_pred)+look_back,:] = train_pred

    test_pred_plot = np.empty_like(dataset)
    test_pred_plot[:,:] = np.nan
    test_pred_plot[len(train_pred)+(look_back*2)+1:len(dataset)-1,:] = test_pred

    f = plt.figure()
    plt.plot(scaler.inverse_transform(dataset), color='b', lw=2.0, label='S&P 500')
Example #41
0
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(6, input_dim=look_back))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, nb_epoch=100, batch_size=1, verbose=2)


# Estimate model performance
trainScore = model.evaluate(trainX, trainY, verbose=0)
print('Train Score: ', scaler.inverse_transform(numpy.array([[trainScore]])))
testScore = model.evaluate(testX, testY, verbose=0)
print('Test Score: ', scaler.inverse_transform(numpy.array([[testScore]])))

# generate predictions for training
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

#print(scaler.inverse_transform(testPredict))

# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
#training set plus testset
dataset_total = pd.concat((prices_dataset_train['adj_close'],prices_dataset_test['adj_close']), axis=0) #vertical axis=0 horizontal axis=1
#all inputs for test set
inputs = dataset_total[len(dataset_total)-len(prices_dataset_test)-40:].values
inputs = inputs.reshape(-1,1)

#neural net trained on the scaled values we have to min-max normalize the inputs
#it is already fitted so we can use transform directly
inputs = min_max_scaler.transform(inputs)      

X_test = []

for i in range(40,len(prices_dataset_test)+40):
    X_test.append(inputs[i-40:i,0])
    
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))

predictions = model.predict(X_test)

#inverse the predicitons because we applied normalization but we want to compare with the original prices
predictions = min_max_scaler.inverse_transform(predictions)

#plotting the results
plt.plot(testset, color='blue', label='Actual S&P500 Prices')
plt.plot(predictions, color='green', label='LSTM Predictions')
plt.title('S&P500 Predictions with Reccurent Neural Network')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()
Example #43
0
prediction. The default sigmoid activation function is used for the
LSTM blocks. The network is trained for 100 epochs and a batch size of
1 is used."""

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_dim=look_back))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, nb_epoch=100, batch_size=1, verbose=2)

# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print(trainY[0])
print(trainPredict[:,0])
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print(testY[0])
print(testPredict[:,0])
print('Test Score: %.2f RMSE' % (testScore))

# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
file.close()

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

################## predict ######################
# 测试集输入模型进行预测
predicted_stock_price = model.predict(x_test)
# 对预测数据还原---从(0,1)反归一化到原始范围
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
# 对真实数据还原---从(0,1)反归一化到原始范围
real_stock_price = sc.inverse_transform(test_set[60:])
# 画出真实数据和预测数据的对比曲线
plt.plot(real_stock_price, color='red', label='MaoTai Stock Price')
plt.plot(predicted_stock_price,
         color='blue',
         label='Predicted MaoTai Stock Price')
plt.title('MaoTai Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('MaoTai Stock Price')
plt.legend()
plt.show()

##########evaluate##############
# calculate MSE 均方误差 ---> E[(预测值-真实值)^2] (预测值减真实值求平方后求均值)
Example #45
0
scaler = MinMaxScaler(feature_range=(0, 1))  # 正则化函数

MAPE_list = [0 for i in range(120)]
for i in range(120):
    train_data_norm = scaler.fit_transform(pd.DataFrame(data_list[i][1:145]))
    train_data_norm = np.array(train_data_norm).tolist()
    train_data_norm = list(chain.from_iterable(train_data_norm))
    # del(train_data_norm[0])
    X, y = split_sequence(train_data_norm, 24)
    n_features = 1
    X = X.reshape((X.shape[0], X.shape[1], n_features))
    # reshape input to be [samples, time steps, features]
    #trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    model.fit(X, y, epochs=50, batch_size=1, verbose=2)
    pred_val = []
    pred_data = train_data_norm[120:145]
    for i in range(24):  # 预测一天
        pred_X = array(pred_data).reshape((1, 24, 1))
        pred = model.predict(pred_X)[:, 0].tolist()[0]
        #       print(pred)
        pred_val.append(pred)
        del (pred_data[0])
        pred_data.append(pred)
    pred_val = scaler.inverse_transform(pd.DataFrame(pred_val))
    pred_val = np.array(pred_val).tolist()
    pred_val = list(chain.from_iterable(pred_val))

    ## 计算MAPE
    MAPE_list[i] = MAPE(pred_val, data_list[i][145:169])
    print(MAPE_list[i])
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
history=model.fit(trainX, trainY, nb_epoch=5, batch_size=1, validation_data=(testX, testY), verbose=2)

# plot the loss vs val_loss
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

# predict the bitcoin price in train_predict and test_predict
train_predict = model.predict(trainX)
test_predict = model.predict(testX)

futurePredict = model.predict(np.asarray([[test_predict[-1]]]))
futurePredict = scaler.inverse_transform(futurePredict)

# invert predictions
train_predict = scaler.inverse_transform(train_predict)
trainY = scaler.inverse_transform(trainY)

test_predict = scaler.inverse_transform(test_predict)
testY = scaler.inverse_transform(testY)

print("Price for last 10 days: ")
print(test_predict[-10:])
print("Bitcoin price for tomorrow: ", futurePredict)
print ('test prdict length', len(test_predict))

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[:,0], train_predict[:,0]))
Example #47
0
#LSTM creation
model = Sequential()
model.add(LSTM(10, input_shape=(1, timesteps)))
model.add(Dense(units=1))

model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=50, batch_size=1)

#Prediction

trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

#İnvert scaling
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)
trainY = scaler.inverse_transform([trainY])
testY = scaler.inverse_transform([testY])

#Error hesabı
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train score:%.2f RMSE' % (trainScore))

testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
print('Test score:%.2f RMSE' % (testScore))

#Shifting and Plotting
plt.plot(scaler.inverse_transform(dataset))
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
Example #48
0
def main():
    # fix random seed for reproducibility
    numpy.random.seed(7)

    # Get CLI arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--gpus',
        help='Number of GPUs to use.',
        type=int, default=1)
    args = parser.parse_args()
    gpus = args.gpus

    # load the dataset
    dataframe = DataFrame(
        [0.00000, 5.99000, 11.92016, 17.73121, 23.36510, 28.76553, 33.87855,
         38.65306, 43.04137, 46.99961, 50.48826, 53.47244, 55.92235, 57.81349,
         59.12698, 59.84970, 59.97442, 59.49989, 58.43086, 56.77801, 54.55785,
         51.79256, 48.50978, 44.74231, 40.52779, 35.90833, 30.93008, 25.64279,
         20.09929, 14.35496, 8.46720, 2.49484, -3.50245, -9.46474, -15.33247,
         -21.04699, -26.55123, -31.79017, -36.71147, -41.26597, -45.40815,
         -49.09663, -52.29455, -54.96996, -57.09612, -58.65181, -59.62146,
         -59.99540, -59.76988, -58.94716, -57.53546, -55.54888, -53.00728,
         -49.93605, -46.36587, -42.33242, -37.87600, -33.04113, -27.87613,
         -22.43260, -16.76493, -10.92975, -4.98536, 1.00883, 6.99295, 12.90720,
         18.69248, 24.29100, 29.64680, 34.70639, 39.41920, 43.73814, 47.62007,
         51.02620, 53.92249, 56.28000, 58.07518, 59.29009, 59.91260, 59.93648,
         59.36149, 58.19339, 56.44383, 54.13031, 51.27593, 47.90923, 44.06383,
         39.77815, 35.09503, 30.06125, 24.72711, 19.14590, 13.37339, 7.46727,
         1.48653, -4.50907, -10.45961, -16.30564, -21.98875, -27.45215,
         -32.64127, -37.50424, -41.99248, -46.06115, -49.66959, -52.78175,
         -55.36653, -57.39810, -58.85617, -59.72618, -59.99941, -59.67316,
         -58.75066, -57.24115, -55.15971, -52.52713, -49.36972, -45.71902,
         -41.61151, -37.08823, -32.19438, -26.97885, -21.49376, -15.79391,
         -9.93625, -3.97931, 2.01738, 7.99392, 13.89059, 19.64847, 25.21002,
         30.51969, 35.52441, 40.17419, 44.42255, 48.22707, 51.54971, 54.35728,
         56.62174, 58.32045, 59.43644, 59.95856, 59.88160, 59.20632, 57.93947,
         56.09370, 53.68747, 50.74481, 47.29512, 43.37288, 39.01727, 34.27181,
         29.18392, 23.80443, 18.18710, 12.38805, 6.46522, 0.47779, -5.51441,
         -11.45151])
    dataset = dataframe.values
    dataset = dataset.astype('float32')

    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    # split into train and test sets
    train_size = int(len(dataset) * 0.67)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]

    # reshape into X=t and Y=t+1
    look_back = 1
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)

    # reshape input to be [samples, time steps, features]
    trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

    # Create layers for model
    x_tensor = Input(shape=(1, look_back))
    layer_1 = LSTM(4)(x_tensor)
    y_tensor = Dense(1)(layer_1)

    # Create and fit the LSTM network
    with tf.device('/cpu:0'):
        serial_model = Model(inputs=x_tensor, outputs=y_tensor)

    # Modify model for GPUs if necessary
    if gpus == 1:
        parallel_model = serial_model
    else:
        parallel_model = multi_gpu_model(
            serial_model,
            cpu_relocation=True,
            gpus=gpus)
    parallel_model.compile(
        loss='mean_squared_error', optimizer='adam')
    parallel_model.fit(
        trainX, trainY,
        epochs=100,
        batch_size=int(dataset.size * gpus / 20),
        verbose=2)

    # make predictions
    if gpus == 1:
        trainPredict = parallel_model.predict(trainX)
        testPredict = parallel_model.predict(testX)
    else:
        trainPredict = serial_model.predict(trainX)
        testPredict = serial_model.predict(testX)

    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])

    # calculate root mean squared error
    trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
    print('Test Score: %.2f RMSE' % (testScore))

    # shift train predictions for plotting
    trainPredictPlot = numpy.empty_like(dataset)
    trainPredictPlot[:, :] = numpy.nan
    trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

    # shift test predictions for plotting
    testPredictPlot = numpy.empty_like(dataset)
    testPredictPlot[:, :] = numpy.nan
    testPredictPlot[
        len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

    # plot baseline and predictions
    plt.plot(scaler.inverse_transform(dataset), label='Complete Data')
    plt.plot(trainPredictPlot, label='Training Data')
    plt.plot(testPredictPlot, label='Prediction Data')
    plt.legend(loc='upper left')
    plt.title('Using {} GPUs'.format(gpus))
    plt.show()
Example #49
0
history = model.fit(train_X,
                    train_y,
                    epochs=50,
                    batch_size=20,
                    validation_data=(test_X, test_y),
                    verbose=2,
                    shuffle=False)
# plot training and test loss curve
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:, 0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:, 0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

# model.save('baseline_model')
    # Training is now complete!

    # Get the final accuracy scores by running the "cost" operation on the training and test data sets
    final_training_cost = session.run(cost, feed_dict={X: X_scaled_training, Y: Y_scaled_training})
    final_testing_cost = session.run(cost, feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})

    print("Final Training cost: {}".format(final_training_cost))
    print("Final Testing cost: {}".format(final_testing_cost))

    # Now that the neural network is trained, let's use it to make predictions for our test data.
    # Pass in the X testing data and run the "prediciton" operation
    Y_predicted_scaled = session.run(prediction, feed_dict={X: X_scaled_testing})

    # Unscale the data back to it's original units (dollars)
    Y_predicted = Y_scaler.inverse_transform(Y_predicted_scaled)

    real_earnings = test_data_df['total_earnings'].values[0]
    predicted_earnings = Y_predicted[0][0]

    print("The actual earnings of Game #1 were ${}".format(real_earnings))
    print("Our neural network predicted earnings of ${}".format(predicted_earnings))

    model_builder = tf.saved_model.builder.SavedModelBuilder("exported_model")

    inputs = {
        'input': tf.saved_model.utils.build_tensor_info(X)
        }
    outputs = {
        'earnings': tf.saved_model.utils.build_tensor_info(prediction)
        }
Example #51
0


# Part 3 - Making the predictions and visualising the results

# Getting the real stock price of 2018 feb
dataset_test = pd.read_csv('enscotest.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values

# Getting the predicted stock price of 2018 feb 
dataset_total = pd.concat((dataset_train['open'], dataset_test['open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 80):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

# Visualising the results
plt.plot(real_stock_price, color = 'red', label = 'Real Ensco Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Ensco Stock Price')
plt.title('Ensco Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Ensco Stock Price')
plt.legend()
plt.show()
# verify
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('loss func')
plt.ylabel('loss/val_loss')
plt.xlabel('epochs')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

scores = model.evaluate(tX_test, ty_test)
print('\n%s: %.3f' % (model.metrics_names, scores))

# Step 4: test

y_pred = model.predict(tX_test)
y_pred = y_scaler.inverse_transform(y_pred)
print('y_pred: \n', y_pred[:5])

import pandas as pd
dfy = pd.DataFrame({'Truth class': y_test[...,0],
                  'Pred class': y_pred[...,0]})

long_dfy = pd.melt(dfy, value_vars=['Truth class', 'Pred class'])

from plotnine import *
(ggplot(long_dfy, aes(x='value', color='variable', fill='variable'))
  + geom_density(alpha=0.5)
  + theme_bw())
Example #53
0
class Train:
    def __init__(self, f, maps):
        self.data = pandas.DataFrame(read_csv(f, header=0))
        self.scaler = None
        self.map_condition = maps

    def series2supervise(self, data, n_in=24, n_out=6, is_drop=True):
        data = pandas.DataFrame(data)
        cols, names = list(), list()
        # add input patterns
        for i in range(n_in, 0, -1):
            cols.append(data.shift(i))
        # add output patterns
        for i in range(0, n_out):
            cols.append(data.shift(-i))
        # concat cols to array
        res = concat(cols, axis=1)
        if is_drop:
            res.dropna(inplace=True)
        return res

    def data_prepro(self):
        dt = self.data.sort_values([self.data.columns[1], self.data.columns[2]], ascending=True)
        v = dt.values
        v[:, 0] = [self.map_condition[x.lower()] for x in v[:, 0]]
        v = v.astype('float32')
        reframed = self.series2supervise(v)
        reframed.columns = range(6*30)
        drop = []
        for i in range(30):
            if i < 24:
                drop += [i*6+1, i*6+2]
            else:
                drop += [i*6+1, i*6+2, i*6+3, i*6+4, i*6+5]
        print(reframed.values[10, :])
        reframed.drop(reframed.columns[drop], axis=1, inplace=True)
        print(reframed.shape)
        print(reframed.values[10, :])
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        reframed = pandas.DataFrame(self.scaler.fit_transform(reframed))
        return reframed

    def model(self):
        reframed = self.data_prepro()
        values = reframed.values
        n_train = 365*24
        n_test = 30*24
        train = values[:-n_test, :]
        test = values[-n_test:, :]
        train_X, train_y = train[:, :-6], train[:, -6:]
        test_X, test_y = test[:, :-6], test[:, -6:]

        print(train.shape)
        # reshape
        train_X = train_X.reshape(train_X.shape[0], 1, train_X.shape[1])
        test_X = test_X.reshape(test_X.shape[0], 1, test_X.shape[1])

        # build network
        model = Sequential()
        model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
        model.add(Dense(6))
        model.compile(loss='mae', optimizer='adam')

        # fit
        fit = model.fit(train_X, train_y, epochs=1, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
        print(" -------train loss-------")

        # predict
        res_y = model.predict(test_X)

        # get true value
        test_X = test_X.reshape(test_X.shape[0], test_X.shape[2])
        y = concatenate((test_X[:, :], res_y), axis=1)
        y = self.scaler.inverse_transform(y)
        y = y[:, -6:]

        # evaluate RMSE
        ground_y = test_y.reshape((len(test_y), 6))
        ground_y = concatenate((test_X[:, :], ground_y), axis=1)
        ground_y = self.scaler.inverse_transform(ground_y)
        ground_y = ground_y[:, -6:]

        print(" -------PREDICTED Condition-------")
        # print(y)
        #print(ground_y)
        print(" -------ERROR DELTA------")
        #print(y-ground_y)
        rmse = sqrt(mean_squared_error(y, ground_y))
        print(" -------RMSE------")
        print(rmse)
        return model, self.scaler
# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, batch_size = 32, epochs = 200)

# Part 3 - Making the predictions and visualising the results

# Getting the real stock price of 2017
test_set = pd.read_csv('Google_Stock_Price_Test.csv')
real_stock_price = test_set.iloc[:,1:2].values

# Getting the predicted stock price of 2017
inputs = real_stock_price
inputs = sc.transform(inputs)
inputs = np.reshape(inputs, (20, 1, 1))
predicted_stock_price = regressor.predict(inputs)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

# Visualising the results
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()

# Homework

# Getting the real stock price of 2012 - 2016
real_stock_price_train = pd.read_csv('Google_Stock_Price_Train.csv')
real_stock_price_train = real_stock_price_train.iloc[:,1:2].values
Example #55
0
def trainTestAndPredict(startDateForPrediction, dateToPredict, tick,
                        dim_window):
    fix.pdr_override()

    start = "2005-01-01"
    end = str(date.today())
    sc = MinMaxScaler(feature_range=(0, 1))

    alldata = ist.get_data(tick, start_date=start, end_date=end)
    process = DataProcessing("stock_prices.csv", 0.9)

    process.generate_test(30)
    process.generate_train(30)

    X_train = sc.fit_transform(process.X_train)
    Y_train = sc.fit_transform(process.Y_train)
    X_test = sc.fit_transform(process.X_test)
    Y_test = sc.fit_transform(process.Y_test)

    model = Sequential()
    model.add(Dense(60, activation=tf.nn.selu))
    model.add(Dropout(0.2))
    model.add(Dense(40, activation=tf.nn.selu))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation=tf.nn.selu))

    model.compile(optimizer="Adam",
                  loss="mean_squared_error",
                  metrics=['mean_absolute_error', 'mean_squared_error'])

    history = model.fit(X_train,
                        Y_train,
                        epochs=100,
                        validation_split=0.2,
                        batch_size=30)
    print(model.summary())
    loss, mae, mse = model.evaluate(X_test, Y_test)

    a = 'Adam MLP.png'
    b = 'Adam MLP'

    # stampa di errori MAE e MSE
    def plot_history(storia):
        hist = pd.DataFrame(storia.history)
        hist['epoch'] = storia.epoch

        plt.figure()
        plt.title("Mean Absolute Error " + str(b))
        plt.xlabel('Epoch')
        plt.ylabel('Mean Abs Error')
        plt.plot(hist['epoch'],
                 hist['mean_absolute_error'],
                 label='Train Error')
        plt.plot(hist['epoch'],
                 hist['val_mean_absolute_error'],
                 label='Val Error')
        plt.legend()
        plt.savefig('MAE' + str(a))

        plt.figure()
        plt.title("Mean Squared Error " + str(b))
        plt.xlabel('Epoch')
        plt.ylabel('Mean Square Error')
        plt.plot(hist['epoch'],
                 hist['mean_squared_error'],
                 label='Train Error')
        plt.plot(hist['epoch'],
                 hist['val_mean_squared_error'],
                 label='Val Error')
        plt.legend()
        fig = plt.gcf()
        plt.show()
        plt.draw()
        fig.savefig('MSE' + str(a))

    plot_history(history)

    # stampa punti nello spazio con valori predizioni vs valori reali
    test_predictions = sc.inverse_transform(model.predict(X_test)).flatten()
    Y_test = sc.inverse_transform(Y_test)

    plt.figure()
    plt.title("True Value VS Predicted Value " + str(b))
    plt.scatter(Y_test, test_predictions)
    plt.xlabel('True Values')
    plt.ylabel('Predictions')
    plt.axis('equal')
    plt.axis('square')
    _ = plt.plot([-100, 100], [-100, 100])
    fig = plt.gcf()
    plt.show()
    plt.draw()
    fig.savefig('TrueVSPredicted' + str(a))

    # stampa conteggio errori
    plt.figure()
    plt.title("Error Distribution " + str(b))
    error = test_predictions - Y_test
    plt.hist(error, bins=25)
    plt.xlabel("Prediction Error")
    _ = plt.ylabel("Count")
    fig = plt.gcf()
    plt.show()
    plt.draw()
    fig.savefig('PredictionError' + str(a))

    Y_train = sc.inverse_transform(Y_train)
    # stampa valore predetto nel test/ valore reale + storia
    plt.figure()
    plt.title("True vs Predicted Test " + str(b))
    plt.plot(test_predictions, label="Predicted")
    plt.plot(Y_test, label="Real")
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend(loc='upper left')
    fig = plt.gcf()
    plt.show()
    plt.draw()
    fig.savefig('History' + str(a))
    return 0
class RNNGRU(object):
    """Process data for ingestion."""

    def __init__(
            self, data, periods=288, batch_size=64, sequence_length=20,
            warmup_steps=50, epochs=20, display=False):
        """Instantiate the class.

        Args:
            data: Dict of values keyed by timestamp
            periods: Number of timestamp data points per vector
            batch_size: Size of batch
            sequence_length: Length of vectors for for each target
            warmup_steps:

        Returns:
            None

        """
        # Initialize key variables
        self.periods = periods
        self.target_names = ['value']
        self.warmup_steps = warmup_steps
        self.epochs = epochs
        self.batch_size = batch_size
        self.display = display

        ###################################
        # TensorFlow wizardry
        config = tf.ConfigProto()

        # Don't pre-allocate memory; allocate as-needed
        config.gpu_options.allow_growth = True

        # Only allow a total of half the GPU memory to be allocated
        config.gpu_options.per_process_gpu_memory_fraction = 0.95

        # Crash with DeadlineExceeded instead of hanging forever when your
        # queues get full/empty
        config.operation_timeout_in_ms = 60000

        # Create a session with the above options specified.
        backend.tensorflow_backend.set_session(tf.Session(config=config))
        ###################################

        # Get data
        (x_data, y_data) = convert_data(data, periods, self.target_names)

        print('\n> Numpy Data Type: {}'.format(type(x_data)))
        print("> Numpy Data Shape: {}".format(x_data.shape))
        print("> Numpy Data Row[0]: {}".format(x_data[0]))
        print('> Numpy Targets Type: {}'.format(type(y_data)))
        print("> Numpy Targets Shape: {}".format(y_data.shape))

        '''
        This is the number of observations (aka. data-points or samples) in
        the data-set:
        '''

        num_data = len(x_data)

        '''
        This is the fraction of the data-set that will be used for the
        training-set:
        '''

        train_split = 0.9

        '''
        This is the number of observations in the training-set:
        '''

        self.num_train = int(train_split * num_data)

        '''
        This is the number of observations in the test-set:
        '''

        num_test = num_data - self.num_train

        print('> Number of Samples: {}'.format(num_data))
        print("> Number of Training Samples: {}".format(self.num_train))
        print("> Number of Test Samples: {}".format(num_test))

        # Create test and training data
        x_train = x_data[0:self.num_train]
        x_test = x_data[self.num_train:]
        self.y_train = y_data[0:self.num_train]
        self.y_test = y_data[self.num_train:]
        self.num_x_signals = x_data.shape[1]
        self.num_y_signals = y_data.shape[1]

        print("> Training Minimum Value:", np.min(x_train))
        print("> Training Maximum Value:", np.max(x_train))

        '''
        steps_per_epoch is the number of batch iterations before a training
        epoch is considered finished.
        '''

        self.steps_per_epoch = int(self.num_train / batch_size) + 1
        print("> Epochs:", epochs)
        print("> Batch Size:", batch_size)
        print("> Steps:", self.steps_per_epoch)

        '''
        Calculate the estimated memory footprint.
        '''

        print("> Data size: {:.2f} Bytes".format(x_data.nbytes))

        '''
        if memory_footprint > 7:
            print('\n\n{}\n\n'.format(
                '> Estimated GPU memory usage too large. Use new parameters '
                'to reduce the footprint.'))
            sys.exit(0)
        '''

        '''
        The neural network works best on values roughly between -1 and 1, so we
        need to scale the data before it is being input to the neural network.
        We can use scikit-learn for this.

        We first create a scaler-object for the input-signals.

        Then we detect the range of values from the training-data and scale
        the training-data.
        '''

        x_scaler = MinMaxScaler()
        self.x_train_scaled = x_scaler.fit_transform(x_train)

        print('> Scaled Training Minimum Value: {}'.format(
            np.min(self.x_train_scaled)))
        print('> Scaled Training Maximum Value: {}'.format(
            np.max(self.x_train_scaled)))

        self.x_test_scaled = x_scaler.transform(x_test)

        '''
        The target-data comes from the same data-set as the input-signals,
        because it is the weather-data for one of the cities that is merely
        time-shifted. But the target-data could be from a different source with
        different value-ranges, so we create a separate scaler-object for the
        target-data.
        '''

        self.y_scaler = MinMaxScaler()
        self.y_train_scaled = self.y_scaler.fit_transform(self.y_train)
        y_test_scaled = self.y_scaler.transform(self.y_test)

        # Data Generator

        '''
        The data-set has now been prepared as 2-dimensional numpy arrays. The
        training-data has almost 300k observations, consisting of 20
        input-signals and 3 output-signals.

        These are the array-shapes of the input and output data:
        '''

        print('> Scaled Training Data Shape: {}'.format(
            self.x_train_scaled.shape))
        print('> Scaled Training Targets Shape: {}'.format(
            self.y_train_scaled.shape))

        # We then create the batch-generator.

        generator = self.batch_generator(batch_size, sequence_length)

        # Validation Set

        '''
        The neural network trains quickly so we can easily run many training
        epochs. But then there is a risk of overfitting the model to the
        training-set so it does not generalize well to unseen data. We will
        therefore monitor the model's performance on the test-set after each
        epoch and only save the model's weights if the performance is improved
        on the test-set.

        The batch-generator randomly selects a batch of short sequences from
        the training-data and uses that during training. But for the
        validation-data we will instead run through the entire sequence from
        the test-set and measure the prediction accuracy on that entire
        sequence.
        '''

        validation_data = (np.expand_dims(self.x_test_scaled, axis=0),
                           np.expand_dims(y_test_scaled, axis=0))

        # Create the Recurrent Neural Network

        self.model = Sequential()

        '''
        We can now add a Gated Recurrent Unit (GRU) to the network. This will
        have 512 outputs for each time-step in the sequence.

        Note that because this is the first layer in the model, Keras needs to
        know the shape of its input, which is a batch of sequences of arbitrary
        length (indicated by None), where each observation has a number of
        input-signals (num_x_signals).
        '''

        self.model.add(GRU(
            units=512,
            return_sequences=True,
            input_shape=(None, self.num_x_signals,)))

        '''
        The GRU outputs a batch of sequences of 512 values. We want to predict
        3 output-signals, so we add a fully-connected (or dense) layer which
        maps 512 values down to only 3 values.

        The output-signals in the data-set have been limited to be between 0
        and 1 using a scaler-object. So we also limit the output of the neural
        network using the Sigmoid activation function, which squashes the
        output to be between 0 and 1.'''

        self.model.add(Dense(self.num_y_signals, activation='sigmoid'))

        '''
        A problem with using the Sigmoid activation function, is that we can
        now only output values in the same range as the training-data.

        For example, if the training-data only has temperatures between -20
        and +30 degrees, then the scaler-object will map -20 to 0 and +30 to 1.
        So if we limit the output of the neural network to be between 0 and 1
        using the Sigmoid function, this can only be mapped back to temperature
        values between -20 and +30.

        We can use a linear activation function on the output instead. This
        allows for the output to take on arbitrary values. It might work with
        the standard initialization for a simple network architecture, but for
        more complicated network architectures e.g. with more layers, it might
        be necessary to initialize the weights with smaller values to avoid
        NaN values during training. You may need to experiment with this to
        get it working.
        '''

        if False:
            # Maybe use lower init-ranges.
            # init = RandomUniform(minval=-0.05, maxval=0.05)
            init = RandomUniform(minval=-0.05, maxval=0.05)

            self.model.add(Dense(
                self.num_y_signals,
                activation='linear',
                kernel_initializer=init))

        # Compile Model

        '''
        This is the optimizer and the beginning learning-rate that we will use.
        We then compile the Keras model so it is ready for training.
        '''
        optimizer = RMSprop(lr=1e-3)
        self.model.compile(loss=self.loss_mse_warmup, optimizer=optimizer)

        '''
        This is a very small model with only two layers. The output shape of
        (None, None, 3) means that the model will output a batch with an
        arbitrary number of sequences, each of which has an arbitrary number of
        observations, and each observation has 3 signals. This corresponds to
        the 3 target signals we want to predict.
        '''
        print('> Model Summary:\n')
        print(self.model.summary())

        # Callback Functions

        '''
        During training we want to save checkpoints and log the progress to
        TensorBoard so we create the appropriate callbacks for Keras.

        This is the callback for writing checkpoints during training.
        '''

        path_checkpoint = '/tmp/23_checkpoint.keras'
        callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
                                              monitor='val_loss',
                                              verbose=1,
                                              save_weights_only=True,
                                              save_best_only=True)

        '''
        This is the callback for stopping the optimization when performance
        worsens on the validation-set.
        '''

        callback_early_stopping = EarlyStopping(monitor='val_loss',
                                                patience=5, verbose=1)

        '''
        This is the callback for writing the TensorBoard log during training.
        '''

        callback_tensorboard = TensorBoard(log_dir='/tmp/23_logs/',
                                           histogram_freq=0,
                                           write_graph=False)

        '''
        This callback reduces the learning-rate for the optimizer if the
        validation-loss has not improved since the last epoch
        (as indicated by patience=0). The learning-rate will be reduced by
        multiplying it with the given factor. We set a start learning-rate of
        1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4.
        We don't want the learning-rate to go any lower than this.
        '''

        callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                               factor=0.1,
                                               min_lr=1e-4,
                                               patience=0,
                                               verbose=1)

        callbacks = [callback_early_stopping,
                     callback_checkpoint,
                     callback_tensorboard,
                     callback_reduce_lr]

        # Train the Recurrent Neural Network

        '''We can now train the neural network.

        Note that a single "epoch" does not correspond to a single processing
        of the training-set, because of how the batch-generator randomly
        selects sub-sequences from the training-set. Instead we have selected
        steps_per_epoch so that one "epoch" is processed in a few minutes.

        With these settings, each "epoch" took about 2.5 minutes to process on
        a GTX 1070. After 14 "epochs" the optimization was stopped because the
        validation-loss had not decreased for 5 "epochs". This optimization
        took about 35 minutes to finish.

        Also note that the loss sometimes becomes NaN (not-a-number). This is
        often resolved by restarting and running the Notebook again. But it may
        also be caused by your neural network architecture, learning-rate,
        batch-size, sequence-length, etc. in which case you may have to modify
        those settings.
        '''

        print('\n> Starting data training\n')

        try:
            self.model.fit_generator(
                generator=generator,
                epochs=self.epochs,
                steps_per_epoch=self.steps_per_epoch,
                validation_data=validation_data,
                callbacks=callbacks)
        except Exception as error:
            print('\n>{}\n'.format(error))
            traceback.print_exc()
            sys.exit(0)

        # Load Checkpoint

        '''
        Because we use early-stopping when training the model, it is possible
        that the model's performance has worsened on the test-set for several
        epochs before training was stopped. We therefore reload the last saved
        checkpoint, which should have the best performance on the test-set.
        '''

        print('> Loading model weights')

        try:
            self.model.load_weights(path_checkpoint)
        except Exception as error:
            print('\n> Error trying to load checkpoint.\n\n{}'.format(error))
            traceback.print_exc()
            sys.exit(0)

        # Performance on Test-Set

        '''
        We can now evaluate the model's performance on the test-set. This
        function expects a batch of data, but we will just use one long
        time-series for the test-set, so we just expand the
        array-dimensionality to create a batch with that one sequence.
        '''

        result = self.model.evaluate(
            x=np.expand_dims(self.x_test_scaled, axis=0),
            y=np.expand_dims(y_test_scaled, axis=0))

        print('> Loss (test-set): {}'.format(result))

        # If you have several metrics you can use this instead.
        if False:
            for res, metric in zip(result, self.model.metrics_names):
                print('{0}: {1:.3e}'.format(metric, res))

    def batch_generator(self, batch_size, sequence_length):
        """Create generator function to create random batches of training-data.

        Args:
            batch_size: Size of batch
            sequence_length: Length of sequence

        Returns:
            (x_batch, y_batch)

        """
        # Infinite loop.
        while True:
            # Allocate a new array for the batch of input-signals.
            x_shape = (batch_size, sequence_length, self.num_x_signals)
            x_batch = np.zeros(shape=x_shape, dtype=np.float16)

            # Allocate a new array for the batch of output-signals.
            y_shape = (batch_size, sequence_length, self.num_y_signals)
            y_batch = np.zeros(shape=y_shape, dtype=np.float16)

            # Fill the batch with random sequences of data.
            for i in range(batch_size):
                # Get a random start-index.
                # This points somewhere into the training-data.
                idx = np.random.randint(self.num_train - sequence_length)

                # Copy the sequences of data starting at this index.
                x_batch[i] = self.x_train_scaled[idx:idx+sequence_length]
                y_batch[i] = self.y_train_scaled[idx:idx+sequence_length]

            yield (x_batch, y_batch)

    def loss_mse_warmup(self, y_true, y_pred):
        """Calculate the Mean Squared Errror.

        Calculate the Mean Squared Error between y_true and y_pred,
        but ignore the beginning "warmup" part of the sequences.

        We will use Mean Squared Error (MSE) as the loss-function that will be
        minimized. This measures how closely the model's output matches the
        true output signals.

        However, at the beginning of a sequence, the model has only seen
        input-signals for a few time-steps, so its generated output may be very
        inaccurate. Using the loss-value for the early time-steps may cause the
        model to distort its later output. We therefore give the model a
        "warmup-period" of 50 time-steps where we don't use its accuracy in the
        loss-function, in hope of improving the accuracy for later time-steps

        Args:
            y_true: Desired output.
            y_pred: Model's output.

        Returns:
            loss_mean: Mean Squared Error

        """
        warmup_steps = self.warmup_steps

        # The shape of both input tensors are:
        # [batch_size, sequence_length, num_y_signals].

        # Ignore the "warmup" parts of the sequences
        # by taking slices of the tensors.
        y_true_slice = y_true[:, warmup_steps:, :]
        y_pred_slice = y_pred[:, warmup_steps:, :]

        # These sliced tensors both have this shape:
        # [batch_size, sequence_length - warmup_steps, num_y_signals]

        # Calculate the MSE loss for each value in these tensors.
        # This outputs a 3-rank tensor of the same shape.
        loss = tf.losses.mean_squared_error(labels=y_true_slice,
                                            predictions=y_pred_slice)

        # Keras may reduce this across the first axis (the batch)
        # but the semantics are unclear, so to be sure we use
        # the loss across the entire tensor, we reduce it to a
        # single scalar with the mean function.
        loss_mean = tf.reduce_mean(loss)

        return loss_mean

    def plot_comparison(self, start_idx, length=100, train=True):
        """Plot the predicted and true output-signals.

        Args:
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.
            train: Boolean whether to use training- or test-set.

        Returns:
            None

        """
        if train:
            # Use training-data.
            x_values = self.x_train_scaled
            y_true = self.y_train
            shim = 'Train'
        else:
            # Use test-data.
            x_values = self.x_test_scaled
            y_true = self.y_test
            shim = 'Test'

        # End-index for the sequences.
        end_idx = start_idx + length

        # Select the sequences from the given start-index and
        # of the given length.
        x_values = x_values[start_idx:end_idx]
        y_true = y_true[start_idx:end_idx]

        # Input-signals for the model.
        x_values = np.expand_dims(x_values, axis=0)

        # Use the model to predict the output-signals.
        y_pred = self.model.predict(x_values)

        # The output of the model is between 0 and 1.
        # Do an inverse map to get it back to the scale
        # of the original data-set.
        y_pred_rescaled = self.y_scaler.inverse_transform(y_pred[0])

        # For each output-signal.
        for signal in range(len(self.target_names)):
            # Create a filename
            filename = (
                '/tmp/batch_{}_epochs_{}_training_{}_{}_{}_{}.png').format(
                    self.batch_size, self.epochs, self.num_train, signal,
                    int(time.time()), shim)

            # Get the output-signal predicted by the model.
            signal_pred = y_pred_rescaled[:, signal]

            # Get the true output-signal from the data-set.
            signal_true = y_true[:, signal]

            # Make the plotting-canvas bigger.
            plt.figure(figsize=(15, 5))

            # Plot and compare the two signals.
            plt.plot(signal_true, label='true')
            plt.plot(signal_pred, label='pred')

            # Plot grey box for warmup-period.
            _ = plt.axvspan(
                0, self.warmup_steps, facecolor='black', alpha=0.15)

            # Plot labels etc.
            plt.ylabel(self.target_names[signal])
            plt.legend()

            # Show and save the image
            if self.display is True:
                plt.savefig(filename, bbox_inches='tight')
                plt.show()
            else:
                plt.savefig(filename, bbox_inches='tight')
            print('> Saving file: {}'.format(filename))
Example #57
0
testX, testY = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# simple lstm network learning
model = Sequential()
model.add(LSTM(16, input_shape=(1, look_back)))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.05))  #lr 학습률
model.fit(trainX, trainY, epochs=500, batch_size=1, verbose=2)

# make prediction
trainPredict = model.predict(trainX)
trainPredict = scaler.inverse_transform(trainPredict)

testPredict = model.predict(testX)
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform(testY)
testScore = math.sqrt(mean_squared_error(testY, testPredict))
print('Train Score: %.2f RMSE' % testScore)

# plot
fig = plt.figure(facecolor='white')
plt.plot(testY, label='test data')
plt.plot(testPredict, label='Predict')
plt.legend()
plt.show()

#save plot to 'png'file
Example #58
0
class RNNGRU(DataGRU):
    """Process data for ingestion."""

    def __init__(
            self, filename, lookahead_periods, batch_size=64, epochs=20,
            sequence_length=20, warmup_steps=50, dropout=0,
            layers=1, patience=10, units=512, display=False):
        """Instantiate the class.

        Args:
            data: Tuple of (x_data, y_data, target_names)
            batch_size: Size of batch
            sequence_length: Length of vectors for for each target
            warmup_steps:

        Returns:
            None

        """
        # Setup inheritance
        DataGRU.__init__(self, filename, lookahead_periods)

        # Initialize key variables
        self._warmup_steps = warmup_steps
        self._epochs = epochs
        self._batch_size = batch_size
        self._patience = patience
        self._sequence_length = sequence_length
        self._units = units
        self._dropout = dropout
        self._layers = int(abs(layers))
        self.display = display
        self._path_checkpoint = '/tmp/checkpoint.keras'

        # Delete any stale checkpoint file
        if os.path.exists(self._path_checkpoint) is True:
            os.remove(self._path_checkpoint)

        ###################################
        # TensorFlow wizardry
        config = tf.ConfigProto()

        # Don't pre-allocate memory; allocate as-needed
        config.gpu_options.allow_growth = True

        # Only allow a total of half the GPU memory to be allocated
        config.gpu_options.per_process_gpu_memory_fraction = 0.8

        # Crash with DeadlineExceeded instead of hanging forever when your
        # queues get full/empty
        config.operation_timeout_in_ms = 60000

        # Create a session with the above options specified.
        backend.tensorflow_backend.set_session(tf.Session(config=config))
        ###################################

        # Get data
        self._y_current = self.close()

        # Create test and training arrays for VALIDATION and EVALUATION
        (x_train, xv_test,
         self._y_train, self._yv_test) = self.train_test_split(.1)

        (self.training_rows, self._training_vector_count) = x_train.shape
        (self.test_rows, _) = xv_test.shape
        (_, self._training_class_count) = self._y_train.shape

        # Define the number of steps per epoch
        self._epoch_steps = int(self.training_rows / self._batch_size) + 1

        # Print stuff
        print('\n> Numpy Data Type: {}'.format(type(x_train)))
        print("> Numpy Data Shape: {}".format(x_train.shape))
        print("> Numpy Data Row[0]: {}".format(x_train[0]))
        print("> Numpy Data Row[Last]: {}".format(x_train[-1]))
        print('> Numpy Targets Type: {}'.format(type(self._y_train)))
        print("> Numpy Targets Shape: {}".format(self._y_train.shape))

        print('> Number of Samples: {}'.format(self._y_current.shape[0]))
        print('> Number of Training Samples: {}'.format(x_train.shape[0]))
        print('> Number of Training Classes: {}'.format(
            self._training_class_count))
        print('> Number of Test Samples: {}'.format(self.test_rows))
        print("> Training Minimum Value:", np.min(x_train))
        print("> Training Maximum Value:", np.max(x_train))
        print('> Number X signals: {}'.format(self._training_vector_count))
        print('> Number Y signals: {}'.format(self._training_class_count))

        # Print epoch related data
        print('> Epochs:', self._epochs)
        print('> Batch Size:', self._batch_size)
        print('> Steps:', self._epoch_steps)

        # Display estimated memory footprint of training data.
        print("> Data size: {:.2f} Bytes".format(x_train.nbytes))

        '''
        The neural network works best on values roughly between -1 and 1, so we
        need to scale the data before it is being input to the neural network.
        We can use scikit-learn for this.

        We first create a scaler-object for the input-signals.

        Then we detect the range of values from the training-data and scale
        the training-data.
        '''
        self._x_scaler = MinMaxScaler()
        self._x_train_scaled = self._x_scaler.fit_transform(x_train)

        print('> Scaled Training Minimum Value: {}'.format(
            np.min(self._x_train_scaled)))
        print('> Scaled Training Maximum Value: {}'.format(
            np.max(self._x_train_scaled)))

        self._xv_test_scaled = self._x_scaler.transform(xv_test)

        '''
        The target-data comes from the same data-set as the input-signals,
        because it is the weather-data for one of the cities that is merely
        time-shifted. But the target-data could be from a different source with
        different value-ranges, so we create a separate scaler-object for the
        target-data.
        '''

        self._y_scaler = MinMaxScaler()
        self._y_train_scaled = self._y_scaler.fit_transform(self._y_train)
        self._yv_test_scaled = self._y_scaler.transform(self._yv_test)

        # Data Generator

        '''
        The data-set has now been prepared as 2-dimensional numpy arrays. The
        training-data has almost 300k observations, consisting of 20
        input-signals and 3 output-signals.

        These are the array-shapes of the input and output data:
        '''

        print('> Scaled Training Data Shape: {}'.format(
            self._x_train_scaled.shape))
        print('> Scaled Training Targets Shape: {}'.format(
            self._y_train_scaled.shape))

        # Create RNN model
        self.model = self._model()

    def _model(self):
        """Create the Recurrent Neural Network.

        Args:
            None

        Returns:
            _model: RNN model

        """
        # Create the model object
        _model = Sequential()

        '''
        We can now add a Gated Recurrent Unit (GRU) to the network. This will
        have 512 outputs for each time-step in the sequence.

        Note that because this is the first layer in the model, Keras needs to
        know the shape of its input, which is a batch of sequences of arbitrary
        length (indicated by None), where each observation has a number of
        input-signals (num_x_signals).
        '''

        _model.add(GRU(
            units=self._units,
            return_sequences=True,
            recurrent_dropout=self._dropout,
            input_shape=(None, self._training_vector_count,)))

        for _ in range(0, self._layers):
            _model.add(GRU(
                units=self._units,
                recurrent_dropout=self._dropout,
                return_sequences=True))

        '''
        The GRU outputs a batch of sequences of 512 values. We want to predict
        3 output-signals, so we add a fully-connected (or dense) layer which
        maps 512 values down to only 3 values.

        The output-signals in the data-set have been limited to be between 0
        and 1 using a scaler-object. So we also limit the output of the neural
        network using the Sigmoid activation function, which squashes the
        output to be between 0 and 1.'''

        _model.add(
            Dense(self._training_class_count, activation='sigmoid'))

        '''
        A problem with using the Sigmoid activation function, is that we can
        now only output values in the same range as the training-data.

        For example, if the training-data only has temperatures between -20
        and +30 degrees, then the scaler-object will map -20 to 0 and +30 to 1.
        So if we limit the output of the neural network to be between 0 and 1
        using the Sigmoid function, this can only be mapped back to temperature
        values between -20 and +30.

        We can use a linear activation function on the output instead. This
        allows for the output to take on arbitrary values. It might work with
        the standard initialization for a simple network architecture, but for
        more complicated network architectures e.g. with more layers, it might
        be necessary to initialize the weights with smaller values to avoid
        NaN values during training. You may need to experiment with this to
        get it working.
        '''

        if False:
            # Maybe use lower init-ranges.
            # init = RandomUniform(minval=-0.05, maxval=0.05)
            init = RandomUniform(minval=-0.05, maxval=0.05)

            _model.add(Dense(
                self._training_class_count,
                activation='linear',
                kernel_initializer=init))

        # Compile Model

        '''
        This is the optimizer and the beginning learning-rate that we will use.
        We then compile the Keras model so it is ready for training.
        '''
        optimizer = RMSprop(lr=1e-3)
        _model.compile(
            loss=self._loss_mse_warmup,
            optimizer=optimizer,
            metrics=['accuracy'])

        '''
        This is a very small model with only two layers. The output shape of
        (None, None, 3) means that the model will output a batch with an
        arbitrary number of sequences, each of which has an arbitrary number of
        observations, and each observation has 3 signals. This corresponds to
        the 3 target signals we want to predict.
        '''
        print('> Model Summary:\n')
        print(_model.summary())

        # Return
        return _model

    def test(self):
        """Test the Recurrent Neural Network.

        Args:
            None

        Returns:
            _model: RNN model

        """
        # Initialize key variables
        patience = self._patience
        sequence_length = self._sequence_length

        # Create the batch-generator.
        generator = self._batch_generator(
            self._batch_size, sequence_length)

        # Validation Set

        '''
        The neural network trains quickly so we can easily run many training
        epochs. But then there is a risk of overfitting the model to the
        training-set so it does not generalize well to unseen data. We will
        therefore monitor the model's performance on the test-set after each
        epoch and only save the model's weights if the performance is improved
        on the test-set.

        The batch-generator randomly selects a batch of short sequences from
        the training-data and uses that during training. But for the
        validation-data we will instead run through the entire sequence from
        the test-set and measure the prediction accuracy on that entire
        sequence.
        '''

        validation_data = (np.expand_dims(self._xv_test_scaled, axis=0),
                           np.expand_dims(self._yv_test_scaled, axis=0))

        # Callback Functions

        '''
        During training we want to save checkpoints and log the progress to
        TensorBoard so we create the appropriate callbacks for Keras.

        This is the callback for writing checkpoints during training.
        '''

        callback_checkpoint = ModelCheckpoint(filepath=self._path_checkpoint,
                                              monitor='val_loss',
                                              verbose=1,
                                              save_weights_only=True,
                                              save_best_only=True)

        '''
        This is the callback for stopping the optimization when performance
        worsens on the validation-set.
        '''

        callback_early_stopping = EarlyStopping(monitor='val_loss',
                                                patience=patience, verbose=1)

        '''
        This is the callback for writing the TensorBoard log during training.
        '''

        callback_tensorboard = TensorBoard(log_dir='/tmp/23_logs/',
                                           histogram_freq=0,
                                           write_graph=False)

        '''
        This callback reduces the learning-rate for the optimizer if the
        validation-loss has not improved since the last epoch
        (as indicated by patience=0). The learning-rate will be reduced by
        multiplying it with the given factor. We set a start learning-rate of
        1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4.
        We don't want the learning-rate to go any lower than this.
        '''

        callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                               factor=0.1,
                                               min_lr=1e-4,
                                               patience=0,
                                               verbose=1)

        callbacks = [callback_early_stopping,
                     callback_checkpoint,
                     callback_tensorboard,
                     callback_reduce_lr]

        # Train the Recurrent Neural Network

        '''We can now train the neural network.

        Note that a single "epoch" does not correspond to a single processing
        of the training-set, because of how the batch-generator randomly
        selects sub-sequences from the training-set. Instead we have selected
        steps_per_epoch so that one "epoch" is processed in a few minutes.

        With these settings, each "epoch" took about 2.5 minutes to process on
        a GTX 1070. After 14 "epochs" the optimization was stopped because the
        validation-loss had not decreased for 5 "epochs". This optimization
        took about 35 minutes to finish.

        Also note that the loss sometimes becomes NaN (not-a-number). This is
        often resolved by restarting and running the Notebook again. But it may
        also be caused by your neural network architecture, learning-rate,
        batch-size, sequence-length, etc. in which case you may have to modify
        those settings.
        '''

        print('\n> Starting data training\n')

        self.model.fit_generator(
            generator=generator,
            epochs=self._epochs,
            steps_per_epoch=self._epoch_steps,
            validation_data=validation_data,
            callbacks=callbacks)

        # Load Checkpoint

        '''
        Because we use early-stopping when training the model, it is possible
        that the model's performance has worsened on the test-set for several
        epochs before training was stopped. We therefore reload the last saved
        checkpoint, which should have the best performance on the test-set.
        '''

        print('> Loading model weights')
        if os.path.exists(self._path_checkpoint):
            self.model.load_weights(self._path_checkpoint)

        # Performance on Test-Set

        '''
        We can now evaluate the model's performance on the test-set. This
        function expects a batch of data, but we will just use one long
        time-series for the test-set, so we just expand the
        array-dimensionality to create a batch with that one sequence.
        '''

        result = self.model.evaluate(
            x=np.expand_dims(self._xv_test_scaled, axis=0),
            y=np.expand_dims(self._yv_test_scaled, axis=0))

        print('> Loss (test-set): {}'.format(result))

        # If you have several metrics you can use this instead.
        if False:
            for res, metric in zip(result, self.model.metrics_names):
                print('{0}: {1:.3e}'.format(metric, res))

    def _batch_generator(self, batch_size, sequence_length):
        """Create generator function to create random batches of training-data.

        Args:
            batch_size: Size of batch
            sequence_length: Length of sequence

        Returns:
            (x_batch, y_batch)

        """
        # Infinite loop.
        while True:
            # Allocate a new array for the batch of input-signals.
            x_shape = (
                batch_size, sequence_length, self._training_vector_count)
            x_batch = np.zeros(shape=x_shape, dtype=np.float16)

            # Allocate a new array for the batch of output-signals.
            y_shape = (batch_size, sequence_length, self._training_class_count)
            y_batch = np.zeros(shape=y_shape, dtype=np.float16)

            # Fill the batch with random sequences of data.
            for i in range(batch_size):
                # Get a random start-index.
                # This points somewhere into the training-data.
                idx = np.random.randint(
                    self.training_rows - sequence_length)

                # Copy the sequences of data starting at this index.
                x_batch[i] = self._x_train_scaled[idx:idx+sequence_length]
                y_batch[i] = self._y_train_scaled[idx:idx+sequence_length]

            yield (x_batch, y_batch)

    def _loss_mse_warmup(self, y_true, y_pred):
        """Calculate the Mean Squared Errror.

        Calculate the Mean Squared Error between y_true and y_pred,
        but ignore the beginning "warmup" part of the sequences.

        We will use Mean Squared Error (MSE) as the loss-function that will be
        minimized. This measures how closely the model's output matches the
        true output signals.

        However, at the beginning of a sequence, the model has only seen
        input-signals for a few time-steps, so its generated output may be very
        inaccurate. Using the loss-value for the early time-steps may cause the
        model to distort its later output. We therefore give the model a
        "warmup-period" of 50 time-steps where we don't use its accuracy in the
        loss-function, in hope of improving the accuracy for later time-steps

        Args:
            y_true: Desired output.
            y_pred: Model's output.

        Returns:
            loss_mean: Mean Squared Error

        """
        warmup_steps = self._warmup_steps

        # The shape of both input tensors are:
        # [batch_size, sequence_length, num_y_signals].

        # Ignore the "warmup" parts of the sequences
        # by taking slices of the tensors.
        y_true_slice = y_true[:, warmup_steps:, :]
        y_pred_slice = y_pred[:, warmup_steps:, :]

        # These sliced tensors both have this shape:
        # [batch_size, sequence_length - warmup_steps, num_y_signals]

        # Calculate the MSE loss for each value in these tensors.
        # This outputs a 3-rank tensor of the same shape.
        loss = tf.losses.mean_squared_error(labels=y_true_slice,
                                            predictions=y_pred_slice)

        # Keras may reduce this across the first axis (the batch)
        # but the semantics are unclear, so to be sure we use
        # the loss across the entire tensor, we reduce it to a
        # single scalar with the mean function.
        loss_mean = tf.reduce_mean(loss)

        return loss_mean

    def plot_train(self, start_idx, length=100):
        """Plot the predicted and true output-signals.

        Args:
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.

        Returns:
            None

        """
        # Plot
        self._plot_comparison(start_idx, length=length, train=True)

    def plot_test(self, start_idx, length=100):
        """Plot the predicted and true output-signals.

        Args:
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.

        Returns:
            None

        """
        # Plot
        self._plot_comparison(start_idx, length=length, train=False)

    def _plot_comparison(self, start_idx, length=100, train=True):
        """Plot the predicted and true output-signals.

        Args:
            start_idx: Start-index for the time-series.
            length: Sequence-length to process and plot.
            train: Boolean whether to use training- or test-set.

        Returns:
            None

        """
        # Initialize key variables
        datetimes = {}
        num_train = self.training_rows

        # End-index for the sequences.
        end_idx = start_idx + length

        # Variables for date formatting
        days = mdates.DayLocator()   # Every day
        months = mdates.MonthLocator()  # Every month
        months_format = mdates.DateFormatter('%b %Y')
        days_format = mdates.DateFormatter('%d')

        # Assign other variables dependent on the type of data we are plotting
        if train is True:
            # Use training-data.
            x_values = self._x_train_scaled[start_idx:end_idx]
            y_true = self._y_train[start_idx:end_idx]
            shim = 'Train'

            # Datetimes to use for training
            datetimes[shim] = self.datetime()[
                :num_train][start_idx:end_idx]

        else:
            # Scale the data
            x_test_scaled = self._x_scaler.transform(
                self.vectors_test_all())

            # Use test-data.
            x_values = x_test_scaled[start_idx:end_idx]
            y_true = self._yv_test[start_idx:end_idx]
            shim = 'Test'

            # Datetimes to use for testing
            datetimes[shim] = self.datetime()[
                num_train:][start_idx:end_idx]

        # Input-signals for the model.
        x_values = np.expand_dims(x_values, axis=0)

        # Use the model to predict the output-signals.
        y_pred = self.model.predict(x_values)

        # The output of the model is between 0 and 1.
        # Do an inverse map to get it back to the scale
        # of the original data-set.
        y_pred_rescaled = self._y_scaler.inverse_transform(y_pred[0])

        # For each output-signal.
        for signal in range(len(self.labels())):
            # Assign other variables dependent on the type of data plot
            if train is True:
                # Only get current values that are a part of the training data
                current = self._y_current[:num_train][start_idx:end_idx]

                # The number of datetimes for the 'actual' plot must match
                # that of current values
                datetimes['actual'] = self.datetime()[
                    :num_train][start_idx:end_idx]

            else:
                # Only get current values that are a part of the test data.
                current = self._y_current[
                    num_train:][start_idx:]

                # The number of datetimes for the 'actual' plot must match
                # that of current values
                datetimes['actual'] = self.datetime()[
                    num_train:][start_idx:]

            # Create a filename
            filename = (
                '/tmp/batch_{}_epochs_{}_training_{}_{}_{}_{}.png').format(
                    self._batch_size,
                    self._epochs,
                    num_train,
                    signal,
                    int(time.time()),
                    shim)

            # Get the output-signal predicted by the model.
            signal_pred = y_pred_rescaled[:, signal]

            # Get the true output-signal from the data-set.
            signal_true = y_true[:, signal]

            # Create a new chart
            (fig, axis) = plt.subplots(figsize=(15, 5))

            # Plot and compare the two signals.
            axis.plot(
                datetimes[shim][:len(signal_true)],
                signal_true,
                label='Current +{}'.format(self.labels()[signal]))
            axis.plot(
                datetimes[shim][:len(signal_pred)],
                signal_pred,
                label='Prediction')
            axis.plot(datetimes['actual'], current, label='Current')

            # Set plot labels and titles
            axis.set_title('{1}ing Forecast ({0} Future Intervals)'.format(
                self.labels()[signal], shim))
            axis.set_ylabel('Values')
            axis.legend(
                bbox_to_anchor=(1.04, 0.5),
                loc='center left', borderaxespad=0)

            # Add gridlines and ticks
            ax = plt.gca()
            ax.grid(True)

            # Add major gridlines
            ax.xaxis.grid(which='major', color='black', alpha=0.2)
            ax.yaxis.grid(which='major', color='black', alpha=0.2)

            # Add minor ticks (They must be turned on first)
            ax.minorticks_on()
            ax.xaxis.grid(which='minor', color='black', alpha=0.1)
            ax.yaxis.grid(which='minor', color='black', alpha=0.1)

            # Format the tick labels
            ax.xaxis.set_major_locator(months)
            ax.xaxis.set_major_formatter(months_format)
            ax.xaxis.set_minor_locator(days)

            # Remove tick marks
            ax.tick_params(axis='both', which='both', length=0)

            # Print day numbers on xaxis for Test data only
            if train is False:
                ax.xaxis.set_minor_formatter(days_format)
                plt.setp(ax.xaxis.get_minorticklabels(), rotation=90)

            # Rotates and right aligns the x labels, and moves the bottom of
            # the axes up to make room for them
            fig.autofmt_xdate()

            # Plot grey box for warmup-period if we are working with training
            # data and the start is within the warmup-period
            if (0 < start_idx < self._warmup_steps):
                if train is True:
                    plt.axvspan(
                        datetimes[shim][start_idx],
                        datetimes[shim][self._warmup_steps],
                        facecolor='black', alpha=0.15)

            # Show and save the image
            if self.display is True:
                fig.savefig(filename, bbox_inches='tight')
                plt.show()
            else:
                fig.savefig(filename, bbox_inches='tight')
            print('> Saving file: {}'.format(filename))

            # Close figure
            plt.close(fig=fig)
Example #59
0
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=6000, batch_size=1, verbose=2)

# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
showPredict = model.predict(showX)

# invert predictions, needs change for different look_back
# for train data
tmp3 = numpy.concatenate((trainPredict, trainPredict, trainPredict), axis=1)
trainPredict = scaler.inverse_transform(tmp3)
trainPredict = trainPredict[:, 0]

tmp4 = numpy.concatenate(([trainY], [trainY], [trainY]), axis=0)
tmp4 = tmp4.transpose()
trainY = scaler.inverse_transform(tmp4)
trainY = trainY[:, 0]

# for test data
tmp5 = numpy.concatenate((testPredict, testPredict, testPredict), axis=1)
testPredict = scaler.inverse_transform(tmp5)
testPredict = testPredict[:, 0]

tmp6 = numpy.concatenate(([testY], [testY], [testY]), axis=0)
tmp6 = tmp6.transpose()
testY = scaler.inverse_transform(tmp6)
Example #60
-1
 def predict_new(self, input):
     model = self.train_model()
     assert len(input) == 5 and type(input) == list
     scaler = MinMaxScaler(feature_range=(0, 1))
     scaler.fit(self.data)
     inp = scaler.transform([input])
     print(scaler.inverse_transform(model.predict(numpy.array(inp).reshape(1, 1, 5))))