def summary(self, name): if _check_table_not_exist(self.query_handler, name): print("Warning: Table {} not exists!".format(name)) self.query_handler.flush_cursor() return select_query = "SHOW COLUMNS FROM " + name self.query_handler.flush_cursor() self.query_handler.run_query(select_query) rows = [] aggregate_col = [] data = [] for item in self.query_handler.cursor: rows.append(item[:2]) for item in rows: col_type = item[1].split("(") if col_type[0] == "int" or col_type[0] == "float": aggregate_col.append(item) for item in aggregate_col: select_query = ("SELECT AVG(" + item[0] + "), " + "MIN(" + item[0] + "), " + "MAX(" + item[0] + ")" + ", SUM(" + item[0] + ") " + "FROM " + name) self.query_handler.run_query(select_query) for result in self.query_handler.cursor: data.append(result) data = np.array(data) aggregate_col = np.array(aggregate_col) col_name = aggregate_col[:, 0].reshape(-1, 1) result = np.hstack((col_name, data)) columns = ['Column', 'AVG', 'MIN', 'MAX', 'SUM'] return pd.DataFrame(result, columns=columns)
def save_to_database(self, csv_file, table_name): if not _check_table_not_exist(self.query_handler, table_name): print("Warning: Table {} already existed!".format(table_name)) self.query_handler.flush_cursor() return data = pd.read_csv(csv_file, nrows=2) col_name = [] col_type = [] for name in data.columns: col_name.append(name) if data[name].dtype == "int64": col_type.append("INT") elif data[name].dtype == "float64": col_type.append("FLOAT") else: col_type.append("VARCHAR(100)") sql_query = "CREATE TABLE " + table_name + " (" for i in range(len(col_name)): attribute = col_name[i] + " " + col_type[i] + "," sql_query += attribute sql_query = sql_query[:-1] sql_query += ")" self.query_handler.flush_cursor() self.query_handler.run_query(sql_query) load_query = ("LOAD DATA LOCAL INFILE '" + csv_file + "' INTO TABLE " + table_name) load_query += (" FIELDS TERMINATED BY ',' ENCLOSED BY '\"' " + "LINES TERMINATED BY '\r\n' IGNORE 1 LINES") result = self.query_handler.run_query(load_query) if result is False: self.delete_data(table_name)
def _save_to_db(self, clf, data_table_name): if _check_table_not_exist(self.query_handler, self.table_name): self.query_handler.flush_cursor() _create_model_table(self.query_handler, self.table_name) else: print("Table already existed!") current_time = _current_time() saved_model_name = current_time + '_' + data_table_name + '_' + self.model_name + '.pickle' root_path = os.path.abspath('./../saved_model/') saved_model_path = os.path.join(root_path, saved_model_name) with open(saved_model_path, 'wb') as f: pickle.dump(clf, f) current_time = current_time.replace("T", ' ') query = "INSERT INTO {} VALUES ('{}','{}','{}','{}')".format( self.table_name, self.model_name, current_time, data_table_name, saved_model_path) self.query_handler.flush_cursor() self.query_handler.run_query(query) print(query) print("Trained model is saved in database {}, table {}.".format( self.query_handler.connector.database, self.table_name))
def reference(self, model_name, time, data_table_name, test_data_table, feature_col): if _check_table_not_exist(self.query_handler, self.table_name): print("Table not exists!") else: query_1 = "SELECT model_path FROM {} WHERE name='{}' " query_2 = "AND savetime='{}' AND dataset='{}'" query = query_1 + query_2 query = query.format(self.table_name, model_name, time, data_table_name) self.query_handler.flush_cursor() _db_result = self.query_handler.run_query(query) _model_path = '' # The return result is tuple for result in _db_result: _model_path = result[0] try: with open(_model_path, 'rb') as f: clf = pickle.load(f) except IOError as err: print('IOError({}): {}'.format(err.errno, err.strerror)) return test_data = self.dataset.load_from_database(test_data_table) test_feature = test_data[feature_col].values _ans = clf.predict(test_feature) print(_ans)
def delete_data(self, name): if _check_table_not_exist(self.query_handler, name): print("Warning: Table {} not exists!".format(name)) self.query_handler.flush_cursor() return delete_query = "DROP TABLE {};".format(name) self.query_handler.flush_cursor() self.query_handler.run_query(delete_query)
def _show_model(self, model_table_name): if _check_table_not_exist(self.query_handler, model_table_name): print("Table not exists!") else: query = "SELECT * FROM {}".format(model_table_name) self.query_handler.flush_cursor() _result = self.query_handler.run_query(query) for result in _result: print(result)
def load_from_database(self, name): if _check_table_not_exist(self.query_handler, name): print("Warning: Table {} not exists!".format(name)) self.query_handler.flush_cursor() return select_query = "SHOW COLUMNS FROM " + name self.query_handler.flush_cursor() self.query_handler.run_query(select_query) rows = [] data = [] for item in self.query_handler.cursor: rows.append(item[0]) select_query = "SELECT * FROM " + name self.query_handler.run_query(select_query) for item in self.query_handler.cursor: data.append(item) data = np.array(data) return pd.DataFrame(data, columns=rows)
def reference(self, model_name, time, data_table_name, test_data_table, feature_col): if _check_table_not_exist(self.query_handler, self.model_table_name): print("Table not exists!") else: query_1 = "SELECT model_path FROM {} WHERE name='{}' " query_2 = "AND savetime='{}' AND dataset='{}'" query = query_1 + query_2 query = query.format(self.model_table_name, model_name, time, data_table_name) # load testing data first because the it is the same for all type of model test_data = self.dataset.load_from_database(test_data_table) test_feature = test_data[feature_col].values self.query_handler.flush_cursor() _db_result = self.query_handler.run_query(query) _model_path = '' # The return result is tuple for result in _db_result: _model_path = result[0] if self.load_op == 'pickle': try: with open(_model_path, 'rb') as f: _model = pickle.load(f) except IOError as err: print('IOError({}): {}'.format(err.errno, err.strerror)) return _ans = _model.predict(test_feature) else: if callable(self.load_op): _model = self.load_op(_model_path) _ans = _model.predict(test_feature, verbose=1) else: print( "Currently not support this type of model, we will extend it ASAP." ) return print(_ans)
def _save_to_db(self, mode_object, data_table_name): if _check_table_not_exist(self.query_handler, self.table_name): self.query_handler.flush_cursor() _create_model_table(self.query_handler, self.table_name) else: print("Table already existed!") current_time = _current_time() saved_model_name = current_time + '_' + data_table_name + '_' + self.model_name root_path = os.path.abspath('./../saved_model/') saved_model_path = os.path.join(root_path, saved_model_name) model_bytes = 'NULL' # sklearn model needs pickle, so we have to check here if self.save_op == 'pickle': model_bytes = pickle.dump(self.model_object) # saved_model_path += '.pickle' # with open(saved_model_path, 'wb') as f: # pickle.dump(self.model_object, f) else: try: saved_model_path += '.h5' getattr(self.model_object, self.save_op)(saved_model_path) except IOError: print("Something wrong while saving model {} (type {})".format( self.model_name, str(type(self.model_object)))) current_time = current_time.replace("T", ' ') query = "INSERT INTO {} VALUES ('{}','{}','{}','{}','{}')".format( self.table_name, self.model_name, current_time, data_table_name, saved_model_path, model_bytes) self.query_handler.flush_cursor() self.query_handler.run_query(query) print(query) print("Trained model is saved in database {}, table {}.".format( self.query_handler.connector.database, self.table_name))