def create(): # print(request) print(request.form) print("___") print(request.files.to_dict()) f = request.files.to_dict()['data'] fullname = f.filename filename, file_extension = os.path.splitext(fullname) from_path = 'tmp/' + fullname to_path = filename + '/raw/' + filename + file_extension f.save(from_path) Minio_Handler.upload(from_path, to_path) os.remove(from_path) msg = { "name": filename, "type": file_extension, "job": 'create', "date": request.form['date'], "file_uri": to_path } Message_Handler = MessageHandler(config.RABBITMQ_CONNECTION) Message_Handler.sendMessage('from_client', json.dumps(msg)) Message_Handler.close() Database_Handler.insert(msg) return 'Hello'
def callback(channel, method, properties, body): print(f'[x] Received {body} from {properties}') received_msg = json.loads(body) from_path = received_msg['file_uri'] files = received_msg['files'] for file in files: Minio_Handler.download(from_path + file, 'tmp/' + file) dest = received_msg['name'] + '/model/' for filename in os.listdir('tmp/'): S3_Handler.upload('tmp/' + filename, dest + filename) os.remove('tmp/' + filename) logs = { 'name': received_msg['name'], 'type': received_msg['type'], 'file_uri': received_msg['name'] + '/model/', 'date': time.strftime("%Y-%m-%d %H:%M:%S"), 'creator_id': received_msg.get('creator_id', '') } Database_Handler.insert(config.MONGO_COLLECTION, logs) data = { 'name': received_msg['name'], 'type': received_msg['type'], 'file_uri': received_msg['name'] + '/model/', 'files': files, 'S3_ACCESS_KEY': config.S3_ACCESS_KEY, 'S3_SECRET_KEY': config.S3_SECRET_KEY, 'S3_BUCKET': config.S3_BUCKET } r = requests.post(url=config.EDGE_ENDPOINT, data=json.dumps(data))
def save(stockname, filename): to_path = 'data/' + stockname + '/' + filename Minio_Handler.upload(filename, to_path) logs = { "name": stockname, "file_uri": to_path } os.remove(filename) Database_Handler.insert('edge-data', logs)
def update(): print(f'Received {request.data}') msg = json.loads(request.data) from_path = msg['file_uri'] # Download from S3 S3_Handler = DataStoreHandler(config.S3_ENDPOINT, msg['S3_ACCESS_KEY'], msg['S3_SECRET_KEY'], msg['S3_BUCKET']) files = msg['files'] for file in files: S3_Handler.download(from_path + file, 'tmp/' + file) if not os.path.exists('tmp/' + msg['name']): os.makedirs('tmp/' + msg['name']) # Upload to Minio dest = msg['name'] + '/model/' for filename in files: Minio_Handler.upload('tmp/' + filename, dest + filename) os.rename('tmp/' + filename, 'tmp/' + msg['name'] + '/' + filename) print("Save model to memory") model_graphs[msg['name']] = Graph() with model_graphs[msg['name']].as_default(): model_session[msg['name']] = Session() with model_session[msg['name']].as_default(): model = load_model('tmp/' + msg['name'] + '/' + 'model.h5') model._make_predict_function( ) # have to initialize before threading model_objects[msg['name']] = model # K.clear_session() logs = { 'name': msg['name'], 'type': msg['type'], 'file_uri': dest, 'files': files } Database_Handler.update_by_name(config.MONGO_COLLECTION, msg['name'], logs) return 'OK'
def callback(channel, method, properties, body): print(f'[x] Received {body} from {properties}') received_msg = json.loads(body) to_path = 'tmp/' + received_msg['name'] + received_msg['type'] DataStore_Handler.download(received_msg['file_uri'], to_path) # PREPROCESSING DATA convert_file_to_csv(to_path) csv_filename = to_path.replace(received_msg['type'], '.csv') data_cleaning = DataCleaning(csv_filename) data_cleaning.handle_missing_data() data_cleaning.handle_outlier_data() data_cleaning.drop_unwanted_columns() data_cleaning.save_preprocessed_file(to_path) # THEN UPLOAD TO MINIO filename = received_msg['name'] from_path = to_path # dummy test to_path = filename + '/preprocessed/' + filename + '.csv' DataStore_Handler.upload(from_path, to_path) os.remove(from_path) # SAVE LOGS TO MONGO logs = { "name": filename, "type": '.csv', 'date': time.strftime("%Y-%m-%d %H:%M:%S"), "file_uri": to_path, 'cloud_server_id': received_msg.get('cloud_server_id', '') } logged_info = Database_Handler.insert(config.MONGO_COLLECTION, logs) # SEND MESSAGE TO MODEL CREATOR msg = { "name": filename, "type": '.csv', 'date': time.strftime("%Y-%m-%d %H:%M:%S"), "file_uri": to_path, 'preprocessor_id': str(logged_info.inserted_id) } MessageHandler(config.RABBITMQ_CONNECTION).sendMessage( 'from_preprocessor', json.dumps(msg))
def callback(channel, method, properties, body): print(f'[x] Received {body} from {properties}') # Clear file in tmp/ folder for f in os.listdir('/tmp'): os.remove('tmp/' + f) ''' LOAD DATA FROM MINIO --> CREATE - TRAIN - SAVE MODEL --> UPLOAD MODEL TO MINIO ''' received_msg = json.loads(body) to_path = 'tmp/' + received_msg['name'] + received_msg['type'] from_path = received_msg['file_uri'] # download data from minio DataStore_Handler.download(from_path, to_path) # read data from downloaded file data = pandas.read_csv(to_path, header=None) data = data.to_numpy() print(data[0]) # split data to train set and test set train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False) scaler_file = 'scaler.pkl' model_file = 'model.h5' # ================================================================== # PREDICTION FOR THREE MONTHS # ================================================================== ''' train models ''' model_lstm = LSTMModel(train_data, test_data) model_lstm.compile() model_lstm.train() ''' save the best model ''' model_lstm.save() K.clear_session() # upload model and necessary files to minio files = [model_file, scaler_file] # filelist for forwarding to edge-server filename = received_msg['name'] file_extension = '.' + model_file.split('.')[-1] dest = filename + '/model/' for fname in files: if os.path.isfile('tmp/' + fname): # some models don't have scaler.pkl, etc. DataStore_Handler.upload('tmp/' + fname, dest + fname) os.remove('tmp/' + fname) else: files.remove(fname) # SAVE LOGS TO MONGO logs = { "name": filename, "type": file_extension, 'date': time.strftime("%Y-%m-%d %H:%M:%S"), "file_uri": dest, 'preprocessor_id': received_msg.get('preprocessor_id', '') } logged_info = Database_Handler.insert(config.MONGO_COLLECTION, logs) # send notification msg = { "name": filename, "type": file_extension, 'date': time.strftime("%Y-%m-%d %H:%M:%S"), "file_uri": dest, 'files': files, 'creator_id': str(logged_info.inserted_id) } Message_Handler.sendMessage('from_creator', json.dumps(msg))
def predict(model_name, data_name, periods): periods = int(periods) # Get Model by Model Name result = 0 model_info = Database_Handler.find_by_name(config.MONGO_COLLECTION, model_name) if model_info is None: return json.dumps(result) model_name = model_info['name'] stock_name = model_name.split('.')[0].upper() model_type = model_info['type'] model_path = model_info['file_uri'] files = model_info['files'] to_path = 'tmp/' + model_name + '/' # Download models if necessary if not os.path.exists(to_path + files[0]): for file in files: Minio_Handler.download(model_path + file, to_path + file) # Get data for prediction, download if necessary data_to_path = 'tmp/data/' + stock_name + '/' + data_name + '.csv' data_folder = 'tmp/data' + stock_name print("Data to path:", data_to_path) if not os.path.exists(data_to_path): if not os.path.exists(data_folder): os.makedirs(data_folder) params = { "function": "TIME_SERIES_DAILY", "symbol": stock_name, "apikey": config.API_KEY, "datatype": "csv" } do_job(params, stock_name, data_name) data_info = Database_Handler.find_by_name('edge-data', stock_name) data_name, data_path = data_info['name'], data_info['file_uri'] Minio_Handler.download(data_path, data_to_path) pred_data = pd.read_csv(data_to_path, header=None) # Predict if model_type == '.pkl': with open(to_path + 'model.pkl', 'rb') as pkl: result = pickle.load(pkl).predict(n_periods=periods) elif model_type == '.h5': # Keras model # Load scaler with open(to_path + 'scaler.pkl', 'rb') as pkl: scaler = pickle.load(pkl) # Scale data pred_data_expanded = np.expand_dims( pred_data, 0) # to fit with input shape of lstm model scaled_data = scaler.transform( pred_data_expanded.reshape( pred_data_expanded.shape[0] * pred_data_expanded.shape[1], pred_data_expanded.shape[2])) scaled_data = scaled_data.reshape(pred_data_expanded.shape[0], pred_data_expanded.shape[1], pred_data_expanded.shape[2]) # Load model if model_name not in model_objects: print("Load model from file") model_graphs[model_name] = Graph() with model_graphs[model_name].as_default(): model_session[model_name] = Session() with model_session[model_name].as_default(): model = load_model(to_path + 'model.h5') model._make_predict_function( ) # have to initialize before threading model_objects[model_name] = model else: print("Load model from memory") model = model_objects[model_name] # Predict with model_graphs[model_name].as_default(): with model_session[model_name].as_default(): result = predict_keras(scaled_data, model, scaler, periods) else: # Load other model type - currently there is no other model type result = None print("\n\n\nPrediction done!\n\n\n") # K.clear_session() return json.dumps({ "input": pred_data[-periods:][3].tolist(), "output": result.tolist() })
def hello(): models_info = list(Database_Handler.find_all(config.MONGO_COLLECTION)) models = (model['name'] for model in models_info) return render_template('home.html', models=models)
def save(filename): to_path = 'data/' + filename Minio_Handler.upload(filename, to_path) logs = {"name": filename, "file_uri": to_path} os.remove(filename) Database_Handler.insert(config.MONGO_COLLECTION, logs)