def predict_done(): LOG_DIR = './ops_logs' TIMESTEPS = 10 RNN_LAYERS = [{'num_units': TIMESTEPS}] DENSE_LAYERS = [10, 10] TRAINING_STEPS = 1000 BATCH_SIZE = 100 PRINT_STEPS = TRAINING_STEPS / 100 dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M') rawdata = pd.read_csv( "./model/input/ElectricityPrice/RealMarketPriceDataPT.csv", parse_dates={'timeline': ['date', '(UTC)']}, index_col='timeline', date_parser=dateparse) X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False) regressor = SKCompat( learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), )) validation_monitor = learn.monitors.ValidationMonitor( X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000) SKCompat( regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) predicted = regressor.predict(X['test']) mse = mean_absolute_error(y['test'], predicted) plot_predicted, = plt.plot(predicted, label='predicted') plt.legend(handles=[plot_predicted]) plt.show()
BATCH_SIZE = 100 PRINT_STEPS = TRAINING_STEPS / 100 # ## Parameter definitions # # - LOG_DIR: log file # - TIMESTEPS: RNN time steps # - RNN_LAYERS: RNN layer 정보 # - DENSE_LAYERS: DNN 크기 [10, 10]: Two dense layer with 10 hidden units # - TRAINING_STEPS: 학습 스텝 # - BATCH_SIZE: 배치 학습 크기 # - PRINT_STEPS: 학습 과정 중간 출력 단계 (전체의 1% 해당하는 구간마다 출력) # In[15]: regressor = learn.TensorFlowEstimator(model_fn=lstm_model( TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), n_classes=0, verbose=1, steps=TRAINING_STEPS, optimizer='Adagrad', learning_rate=0.03, batch_size=BATCH_SIZE) # ## Create a regressor with TF Learn # # : 예측을 위한 모델 생성. TF learn 라이브러리에 제공되는 TensorFlowEstimator를 사용. # # **Parameters**: # # - model_fn: 학습 및 예측에 사용할 모델 # - n_classes: label에 해당하는 클래스 수 (0: prediction, 1이상: classification) 확인필요
from sklearn.metrics import mean_squared_error from lstm_predictor import generate_data, lstm_model warnings.filterwarnings("ignore") LOG_DIR = 'resources/logs/' TIMESTEPS = 1 RNN_LAYERS = [{'num_units': 400}] DENSE_LAYERS = None TRAINING_STEPS = 500 PRINT_STEPS = TRAINING_STEPS # / 10 BATCH_SIZE = 100 regressor = SKCompat( learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), )) # model_dir=LOG_DIR) X, y = generate_data(np.sin, np.linspace(0, 100, 10000, dtype=np.float32), TIMESTEPS, seperate=False) noise_train = np.asmatrix(np.random.normal(0, 0.2, len(y['train'])), dtype=np.float32) noise_val = np.asmatrix(np.random.normal(0, 0.2, len(y['val'])), dtype=np.float32) noise_test = np.asmatrix(np.random.normal(0, 0.2, len(y['test'])), dtype=np.float32) #asmatrix noise_train = np.transpose(noise_train)
dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M') rawdata = pd.read_csv("./TotalLoad.csv",parse_dates={'timeline':['date', '(UTC)']},index_col='timeline', date_parser=dateparse) rawdata = pd.read_csv("./TotalLoad.csv",index_col=['date', '(UTC)']) # parsing errors in pandas df = pd.read_csv("./TotalLoad.csv") df X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False) regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),n_classes=0,verbose=1,steps=TRAINING_STEPS,optimizer='Adagrad',learning_rate=0.03, batch_size=BATCH_SIZE) regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),n_classes=0,verbose=1,steps=TRAINING_STEPS,optimizer='Adagrad',learning_rate=0.03, batch_size=BATCH_SIZE) https://www.tensorflow.org/api_guides/python/contrib.learn regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS)) WARNING:tensorflow:Using temporary folder as model directory: C:\Users\aroug\AppData\Local\Temp\tmp3nwb8sg9 WARNING:tensorflow:Using temporary folder as model directory: C:\Users\aroug\AppData\Local\Temp\tmp3nwb8sg9 INFO:tensorflow:Using default config. INFO:tensorflow:Using default config. INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_task_type': None, '_master': '', '_tf_config': gpu_options { per_process_gpu_memory_fraction: 1 } , '_environment': 'local', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000014792A352E8>, '_save_checkpoints_secs': 600, '_task_id': 0, '_keep_checkpoint_ev
def learningpercell(cell_name): warnings.filterwarnings("ignore") LOG_DIR = 'resources/logs/' TIMESTEPS = 100 RNN_LAYERS = [{'num_units': 256}, {'num_units': 256}] DENSE_LAYERS = [32] TRAINING_STEPS = 100 PRINT_STEPS = TRAINING_STEPS # / 10 BATCH_SIZE = 64 regressor = skflow( learn.Estimator( model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS))) # model_dir=LOG_DIR) # 201512200045 dateparse = lambda dates: pd.datetime.strptime(dates, '%Y%m%d%H%M') rawdata = None df_list = [] for root, dirs, files in os.walk(working_dir): file_list = [] for filename in files: if filename.endswith('.csv'): file_list.append(os.path.join(root, filename)) for file in file_list: df = pd.read_csv( file, delimiter="|", usecols=[1, 2, 5, 6, 7], header=None, na_values=["NIL"], na_filter=True, names=["meas_info", "counter", "cellname", "value", "time"], index_col='time') # df = df[df["counter"] == 67179778] df = df[df["cellname"].str.contains(cell_name)] # df.drop('cellname', axis=1, inplace=True) # df = df[df["counter"] == 50331671] # print(df[["value"]]) if not df.empty: df_list.append(df[["value"]]) if df_list: rawdata = pd.concat(df_list) # else: # return print(len(rawdata)) if len(rawdata) <= 0: print("Den ftanni") return # rawdata = pd.read_csv("./input/fakehdfs/nms/ystr=2015/ymstr=12/ymdstr=20/hive_0_201512200030.csv", delimiter="|", # usecols=[7], header=None) X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False) # noise_train = np.asmatrix(np.random.normal(0, 0.2, len(y['train'])), dtype=np.float32) # noise_val = np.asmatrix(np.random.normal(0, 0.2, len(y['val'])), dtype=np.float32) # noise_test = np.asmatrix(np.random.normal(0, 0.2, len(y['test'])), dtype=np.float32) # asmatrix # noise_train = np.transpose(noise_train) # noise_val = np.transpose(noise_val) # noise_test = np.transpose(noise_test) # y['train'] = np.add(y['train'], noise_train) # y['val'] = np.add(y['val'], noise_val) # y['test'] = np.add(y['test'], noise_test) # print(type(y['train'])) print('-----------------------------------------') print('train y shape', y['train'].shape) print('train y shape_num', y['train'][1:5]) # print('noise_train shape', noise_train.shape) # print('noise_train shape_num', noise_train.shape[1:5]) # create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor( X['val'], y['val'], ) # every_n_steps=PRINT_STEPS,) # early_stopping_rounds=1000) # print(X['train']) # print(y['train']) skflow( regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)) print('X train shape', X['train'].shape) print('y train shape', y['train'].shape) print('X test shape', X['test'].shape) print('y test shape', y['test'].shape) predicted = np.asmatrix(regressor.predict(X['test']), dtype=np.float32) # ,as_iterable=False)) predicted = np.transpose(predicted) rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean()) # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y[ # 'test']) ** 2).mean()) score = mean_squared_error(predicted, y['test']) nmse = score / np.var( y['test'] ) # should be variance of original data and not data from fitted model, worth to double check print("RSME: %f" % rmse) print("NSME: %f" % nmse) print("MSE: %f" % score) plot_test, = plt.plot(y['test'], label='test') plot_predicted, = plt.plot(predicted, label='predicted') plt.legend(handles=[plot_predicted, plot_test]) plt.show()