def main(): localtime = time.asctime(time.localtime(time.time())) print(localtime) outputlist = [] filecount = 0 explore_path = "/Users/praneet/Documents/Kaggle/Amazon/test" classifier = SKCompat( learn.Estimator(model_fn=train, model_dir="/Users/praneet/Downloads/model")) for root, dirs, files in os.walk(explore_path): for file_name in files: if file_name.endswith(".jpg"): lst = [] eval_data = [] filecount += 1 file_path = os.path.abspath(os.path.join(root, file_name)) img = cv2.imread(file_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) fixed_size = (56, 56) img = cv2.resize(img, dsize=fixed_size) eval_data.append(img) eval_data = np.array(eval_data, dtype=np.float32) / 255. predictions = classifier.predict(x=eval_data) print(file_name) lst.append(file_name) for x in predictions['probabilities']: for y in x: lst.append(y) outputlist.append(lst) print(filecount) df = pd.DataFrame(outputlist) df.to_csv('output.csv', index=False, header=False) localtime = time.asctime(time.localtime(time.time())) print(localtime)
def predict_done(): LOG_DIR = './ops_logs' TIMESTEPS = 10 RNN_LAYERS = [{'num_units': TIMESTEPS}] DENSE_LAYERS = [10, 10] TRAINING_STEPS = 1000 BATCH_SIZE = 100 PRINT_STEPS = TRAINING_STEPS / 100 dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M') rawdata = pd.read_csv( "./model/input/ElectricityPrice/RealMarketPriceDataPT.csv", parse_dates={'timeline': ['date', '(UTC)']}, index_col='timeline', date_parser=dateparse) X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False) regressor = SKCompat( learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), )) validation_monitor = learn.monitors.ValidationMonitor( X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000) SKCompat( regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) predicted = regressor.predict(X['test']) mse = mean_absolute_error(y['test'], predicted) plot_predicted, = plt.plot(predicted, label='predicted') plt.legend(handles=[plot_predicted]) plt.show()
def test(): if((os.path.isfile("input.csv") == False) and (os.path.isfile("output.csv") == False)): gatherData() regressor = SKCompat(learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),)) X, y = load_csvdata(TIMESTEPS,seperate=False) print('-----------------------------------------') print('train y shape',y['train'].shape) print('train y shape_num',y['train'][1:5]) print(y['val'].shape) y['val'] = y['val'].reshape(359,8) # create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'],) y['train'] = y['train'].reshape(3239,8) SKCompat(regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', X['train'].shape) print('y train shape', y['train'].shape) y['test'] = y['test'].reshape(399,8) print('X test shape', X['test'].shape) print('y test shape', y['test'].shape) lol2 = regressor.predict(X['test']) predicted = np.asmatrix(lol2,dtype = np.float32) #,as_iterable=False)) lol = np.asarray((predicted - y['test'])) ** 2 rmse = np.sqrt((lol).mean()) print(rmse.shape) # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean()) score = mean_squared_error(predicted, y['test']) #.reshape(397,8)) nmse = score / np.var(y['test']) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse) print("NSME: %f" % nmse) print("MSE: %f" % score)
train_X, train_y = train[:, 0:n_lag], train[:, n_lag:] train_X = train_X.reshape(train_X.shape[0], 1, train_X.shape[1]) SKCompat( regressor.fit(train_X, train_y, monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', train_X.shape) print('y train shape', train_y.shape) print('X test shape', test_X.shape) print('y test shape', test_y.shape) predicted = np.asmatrix(regressor.predict(test_X), dtype=np.float32) # ,as_iterable=False)) predicted = np.transpose(predicted) rmse = np.sqrt((np.asarray((np.subtract(predicted, test_y)))**2).mean()) # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean()) score = mean_squared_error(predicted, test_y) nmse = score / np.var( test_y ) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse) print("NSME: %f" % nmse) print("MSE: %f" % score) plot_test, = plt.plot(test_y, label='test')
# test data length train_len = round(3 * data_len / 4) x_train = data[0:train_len, :, :] # start with just predicting the opening value the next day y_train = data[1:train_len + 1, :, 1] x_test = data[train_len + 1:-1, :, :] # start with just predicting the opening value the next day y_test = data[train_len + 2:, :, 1] # let's start by just flattening the data x_train = np.reshape(x_train, (train_len, -1)) x_test = np.reshape(x_test, (len(y_test), -1)) # Specify that all features have real-value data feature_columns = [ tf.contrib.layers.real_valued_column("", dimension=x_train.shape[1]) ] classifier = SKCompat( learn.DNNRegressor(label_dimension=y_train.shape[1], feature_columns=feature_columns, hidden_units=[100, 50, 20])) classifier.fit(x_train, y_train, steps=100000, batch_size=100) score = metrics.r2_score(y_test, classifier.predict(x_test)['scores']) accuracy = metrics.mean_squared_error(y_test, classifier.predict(x_test)['scores']) # score= np.linalg.norm(y_test-classifier.predict(x_test)['scores'])/np.linalg.norm(y_test) print("Score: %f, Accuracy: %f" % (score, accuracy))
# create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor( X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000) #validation_monitor = tf.train.SessionRunHook(X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000) # print(X['train']) # print(y['train']) for batch_idx in range(1): print(batch_idx) regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS) print(X['test'].shape) print(y['test'].shape) predicted = regressor.predict(X['test']) # ,as_iterable=False) #rmse = np.sqrt(((predicted - y['test']) ** 2).mean(axis=0)) score = mean_squared_error(predicted, y['test']) print("MSE: %f" % score) print(predicted) plot_predicted, = plt.plot(predicted, label='predicted') plot_test, = plt.plot(y['test'], label='test') plt.legend([plot_predicted, plot_test], ['predicted', 'real']) plt.show()
def main(unused_arguments): mnist_classifier = SKCompat( learn.Estimator(model_fn=trainTypes, model_dir="/Users/praneet/Downloads/model")) print('Predictions: ') evallst = [] outputlst = [] fileCount = 1 evaluatePath = "/Users/praneet/Downloads/test/" lst = [] lst.append("image_name") lst.append("Type_1") lst.append("Type_2") lst.append("Type_3") evallst.append(lst) outputlst.append(lst) for root, dirs, files in os.walk(evaluatePath): for fileName in files: if fileName.endswith(".jpg"): eval_data = [] filePath = os.path.abspath(os.path.join(root, fileName)) img = cv2.imread(filePath) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = processImage(img) fixedSize = (256, 256) img = cv2.resize(img, dsize=fixedSize) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) tmp_data = np.zeros((256, 256), dtype=np.float32) for x in range(img.shape[0]): for y in range(img.shape[1]): tmp_data[x][y] = img[x][y] / 255.0 eval_data.append(tmp_data) eval_data = np.array(eval_data) # Predict values for each image predictions = mnist_classifier.predict(x=eval_data) print(fileName, predictions) lst = [] lst.append(fileName) for x in predictions['probabilities']: for y in x: lst.append(y) outputlst.append(lst) if lst[2] > 0.5: lst = [] lst.append(fileName) lst.append(0) lst.append(1) lst.append(0) if lst[3] > 0.5: lst = [] lst.append(fileName) lst.append(0) lst.append(0) lst.append(1) if lst[1] > 0.5: lst = [] lst.append(fileName) lst.append(1) lst.append(0) lst.append(0) if lst[1] == 0 or lst[1] == 1: print("Non Ambiguous Prediction") evallst.append(lst) fileCount += 1 print('Total files: ', fileCount) df = pd.DataFrame(evallst) df.to_csv('output_normalized_check.csv', index=False, header=False) df = pd.DataFrame(outputlst) df.to_csv('output_integers_check.csv', index=False, header=False)
validation_monitor = learn.monitors.ValidationMonitors( X['val'], y['val'], every_n_steps=PRINT_STEPS) SKCompat( regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', X['train'].shape) print('y_train shape', y['train'].shape) print('X test shape', X['test'].shape) print('y test shape', y['test'].shape) predicted = np.asmatrix(regressor.predict(X['test']), dtype=np.float32) predicted = np.transpose(predicted) RMSE = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean()) score = MSE(predicted, y['test']) NMSE = score / np.var(y['test']) print("RMSE: %f" % RMSE) print("NMSE: %f" % NMSE) print("MSE: %f" % score) plot_test = plt.plot(y['test'], label='test') plot_predicted = plt.plot(predicted, label='predicted') plt.legend(handles=[plot_predicted, plot_test]) plt.show()
def bag_of_words_model(features, target): """先转成词袋模型""" target = tf.one_hot(target, 15, 1, 0) features = encoders.bow_encoder(features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE) logits = tf.contrib.layers.fully_connected(features, 15, activation_fn=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return ({ 'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits) }, loss, train_op) from tensorflow.contrib.learn.python import SKCompat model_fn = bag_of_words_model classifier = SKCompat(learn.Estimator(model_fn=model_fn)) # Train and predict classifier.fit(x_train, y_train, steps=1000) y_predicted = classifier.predict(x_test)['class'] score = metrics.accuracy_score(y_test, y_predicted) print('Accuracy: {0:f}'.format(score))
# every_n_steps=PRINT_STEPS,) # early_stopping_rounds=1000) # print(X['train']) # print(y['train']) SKCompat(regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', X['train'].shape) print('y train shape', y['train'].shape) print('X test shape', X['test'].shape) print('y test shape', y['test'].shape) predicted = np.asmatrix(regressor.predict(X['test']), dtype=np.float32) # ,as_iterable=False)) predicted = np.transpose(predicted) rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test']))) ** 2).mean()) # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean()) score = mean_squared_error(predicted, y['test']) nmse = score / np.var( y['test']) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse) print("NSME: %f" % nmse) print("MSE: %f" % score) plot_test, = plt.plot(y['test'], label='test') plot_predicted, = plt.plot(predicted, label='predicted') plt.legend(handles=[plot_predicted, plot_test])
y['val'], ) # Get the RNN model model = SKCompat( learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, fcDim))) # TRAIN START HERE =========================================== model.fit(x['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS) # TEST START HERE =========================================== predicted = np.asmatrix(model.predict(x['test']), dtype=np.float32) # Analyse Test Result rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean()) score = mean_squared_error(predicted, y['test']) nmse = score / np.var( y['test'] ) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse) print("NSME: %f" % nmse) print("MSE: %f" % score) # Plot the result plot_test1, = plt.plot(y['test'][:, 0], label='y0_actual') plot_predicted1, = plt.plot(predicted[:, 0], label='y0_predicted')
X['train'] = arrayX[0:12000] X['test'] = arrayX[12000:13000] X['val'] = arrayX[13000:14000] y['train'] = arrayy[0:12000] y['test'] = arrayy[12000:13000] y['val'] = arrayy[13000:14000] # print y['test'][0] # print y2['test'][0] # X1, y2 = generate_data(np.sin, np.linspace(0, 100, 10000), TIMESTEPS, seperate=False) # create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor( X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000) regressor.fit(X['train'], y['train'], monitors=[validation_monitor]) predicted = regressor.predict(X['test']) rmse = np.sqrt(((predicted - y['test'])**2).mean(axis=0)) score = mean_squared_error(predicted, y['test']) print(("MSE: %f" % score)) # plot_predicted, = plt.plot(array[:1000], label='predicted') plt.subplot() plot_predicted, = plt.plot(predicted, label='predicted') plot_test, = plt.plot(y['test'], label='test') plt.legend(handles=[plot_predicted, plot_test])
class Cloud_Forecaster(Base_Forecaster): ''' Description: Cloud cover forecast based on LSTM - RNN ''' def __init__(self, raw_data, configs, mode, scale=1, outpath='../outputs'): ''' scale: relationship bet. time steps and data interval (1/60) ''' logging.info('Initializing Cloud Forecaster ...') super(Cloud_Forecaster, self).__init__(raw_data, configs, outpath) # load configurations self.scale = scale self.mode = mode logging.info('Cloud Forecaster is initialized.') def fit(self, datakey='total_cloud_fraction', ubd_min=8, lbd_max=15): logging.info('Start fitting data ...') # create tensorflow model self._init_model() # preprocess data ccp = Cloud_Cover_Preprocessor(self.raw_data, self.configs['time_steps'], datakey, self.scale, ubd_min, lbd_max, self.mode) self.feats, self.labels = ccp.preprocess() self._fit() logging.info('Fitting data is complete.') def _init_model(self): logging.info('Initializing LSTM model ...') self.regressor = SKCompat( learn.Estimator(model_fn=lstm_model( self.configs['time_steps'], self.configs['rnn_layers'], dense_layers=self.configs['dense_layers'], learning_rate=self.configs['learning_rate']))) logging.info('LSTM model is initialized.') def _fit(self): logging.info('Fitting training data ...') # adjust matrix dimensions x_train = np.expand_dims(self.feats['train'], axis=2) y_train = np.expand_dims(self.labels['train'], axis=2) logging.debug('Shape of x_train is: %s' % str(x_train.shape)) logging.debug('Shape of y_train is: %s' % str(y_train.shape)) # start training self.regressor.fit(x_train, y_train, batch_size=self.configs['batch_size'], steps=self.configs['training_steps']) logging.info('Training data is fitted.') def get_test_score(self): logging.info('Testing on test data sets ...') x_test = np.expand_dims(self.feats['test'], axis=2) # start prediction preds = self.regressor.predict(x_test) # calculate MSE error mse = mean_squared_error(self.labels['test'], preds) rst_dict = {'preds': preds, 'labels': self.labels['test']} sio.savemat('../outputs/data_%s_fmt_%s.mat' %\ (self.configs['data_name'], self.mode), rst_dict) logging.info('Testing is completed.') return np.sqrt(mse)
# print(X['train']) # print(y['train']) SKCompat( regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', X['train'].shape) print('y train shape', y['train'].shape) print('X test shape', X['test'].shape) print('y test shape', y['test'].shape) predicted = np.asmatrix(regressor.predict(X['test']), dtype=np.float32) #,as_iterable=False)) predicted = np.transpose(predicted) rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean()) # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean()) score = mean_squared_error(predicted, y['test']) nmse = score / np.var( y['test'] ) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse) print("NSME: %f" % nmse) print("MSE: %f" % score) plot_test, = plt.plot(y['test'], label='test')
#print(y['train'].shape) SKCompat(regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', X['train'].shape) print('y train shape', y['train'].shape) #print_tab(y['test']) y['test'] = y['test'].reshape(399,8) #print_tab(y['test']) print('X test shape', X['test'].shape) print('y test shape', y['test'].shape) print('C\'est un pic !',X['test'].shape) lol2 = regressor.predict(X['test']) print('C\est un roc ! ', lol2.shape) predicted = np.asmatrix(lol2,dtype = np.float32) #,as_iterable=False)) print('C\est un cap ! ',predicted.shape) #predicted = np.transpose(predicted) print('Que dis-je ? ', predicted.shape, 'C\'est une péninsule ! ', y['test'].shape) lol = np.asarray((predicted - y['test'])) ** 2 print ('substract array : ', lol.shape) rmse = np.sqrt((lol).mean()) print(rmse.shape) # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean()) score = mean_squared_error(predicted, y['test']) #.reshape(397,8)) nmse = score / np.var(y['test']) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse)
def main(unused_arguments): localtime = time.asctime(time.localtime(time.time())) print(localtime) content = np.loadtxt('train_data') train_data = [] temp = 0 tmp = [] for x in content: if temp == 255: tmp.append(x) train_data.append(tmp) temp = 0 tmp = [] else: tmp.append(x) temp += 1 with open('train_labels', 'rb') as fp: train_labels = pickle.load(fp) train_data = np.array(train_data, dtype=np.float32) train_labels = np.array(train_labels, dtype=np.int32) # Create the Estimator mnist_classifier = SKCompat( learn.Estimator(model_fn=trainTypes, model_dir="/Users/praneet/Downloads/model")) # Set up logging for predictions tensors_to_log = {"probabilities": "softmax_tensor"} logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50) # Train the model mnist_classifier.fit(x=train_data, y=train_labels, batch_size=100, steps=2500, monitors=[logging_hook]) # Accuracy metric for evaluation metrics = { "accuracy": learn.MetricSpec(metric_fn=tf.metrics.accuracy, prediction_key="classes"), } # Evaluation print('Evaluation: ') content = np.loadtxt('eval_data') eval_data = [] temp = 0 tmp = [] for x in content: if temp == 255: tmp.append(x) eval_data.append(tmp) temp = 0 tmp = [] else: tmp.append(x) temp += 1 with open('eval_labels', 'rb') as fp: eval_labels = pickle.load(fp) eval_data = np.array(eval_data, dtype=np.float32) eval_labels = np.array(eval_labels, dtype=np.int32) # Evaluate the model eval_results = mnist_classifier.score(x=eval_data, y=eval_labels, metrics=metrics) print(eval_results) # Predictions print('Predictions: ') evallst = [] outputlst = [] fileCount = 1 evaluatePath = "/Users/praneet/Downloads/test/" lst = [] lst.append("image_name") lst.append("Type_1") lst.append("Type_2") lst.append("Type_3") evallst.append(lst) outputlst.append(lst) for root, dirs, files in os.walk(evaluatePath): for fileName in files: if fileName.endswith(".jpg"): eval_data = [] filePath = os.path.abspath(os.path.join(root, fileName)) img = cv2.imread(filePath) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = processImage(img) fixedSize = (256, 256) img = cv2.resize(img, dsize=fixedSize) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) tmp_data = np.zeros((256, 256), dtype=np.float32) for x in range(img.shape[0]): for y in range(img.shape[1]): tmp_data[x][y] = img[x][y] / 255.0 eval_data.append(tmp_data) eval_data = np.array(eval_data) # Predict values for each image predictions = mnist_classifier.predict(x=eval_data) print(fileName, predictions) lst = [] lst.append(fileName) for x in predictions['probabilities']: for y in x: lst.append(y) outputlst.append(lst) if lst[1] > 0.25: lst = [] lst.append(fileName) lst.append(1) lst.append(0) lst.append(0) if lst[3] > 0.29: lst = [] lst.append(fileName) lst.append(0) lst.append(0) lst.append(1) if lst[2] > 0.58: lst = [] lst.append(fileName) lst.append(0) lst.append(1) lst.append(0) if lst[1] == 0 or lst[1] == 1: print("Non Ambiguous Prediction") evallst.append(lst) fileCount += 1 print('Total files: ', fileCount) df = pd.DataFrame(evallst) df.to_csv('output_normalized.csv', index=False, header=False) df = pd.DataFrame(outputlst) df.to_csv('output_integers.csv', index=False, header=False) localtime = time.asctime(time.localtime(time.time())) print(localtime)