Esempio n. 1
0
 def summary(self, name):
     if _check_table_not_exist(self.query_handler, name):
         print("Warning: Table {} not exists!".format(name))
         self.query_handler.flush_cursor()
         return
     select_query = "SHOW COLUMNS FROM " + name
     self.query_handler.flush_cursor()
     self.query_handler.run_query(select_query)
     rows = []
     aggregate_col = []
     data = []
     for item in self.query_handler.cursor:
         rows.append(item[:2])
     for item in rows:
         col_type = item[1].split("(")
         if col_type[0] == "int" or col_type[0] == "float":
             aggregate_col.append(item)
     for item in aggregate_col:
         select_query = ("SELECT AVG(" + item[0] + "), "
                         + "MIN(" + item[0] + "), " + "MAX(" + item[0] + ")"
                         + ", SUM(" + item[0] + ") " + "FROM " + name)
         self.query_handler.run_query(select_query)
         for result in self.query_handler.cursor:
             data.append(result)
     data = np.array(data)
     aggregate_col = np.array(aggregate_col)
     col_name = aggregate_col[:, 0].reshape(-1, 1)
     result = np.hstack((col_name, data))
     columns = ['Column', 'AVG', 'MIN', 'MAX', 'SUM']
     return pd.DataFrame(result, columns=columns)
Esempio n. 2
0
 def save_to_database(self, csv_file, table_name):
     if not _check_table_not_exist(self.query_handler, table_name):
         print("Warning: Table {} already existed!".format(table_name))
         self.query_handler.flush_cursor()
         return
     data = pd.read_csv(csv_file, nrows=2)
     col_name = []
     col_type = []
     for name in data.columns:
         col_name.append(name)
         if data[name].dtype == "int64":
             col_type.append("INT")
         elif data[name].dtype == "float64":
             col_type.append("FLOAT")
         else:
             col_type.append("VARCHAR(100)")
     sql_query = "CREATE TABLE " + table_name + " ("
     for i in range(len(col_name)):
         attribute = col_name[i] + " " + col_type[i] + ","
         sql_query += attribute
     sql_query = sql_query[:-1]
     sql_query += ")"
     self.query_handler.flush_cursor()
     self.query_handler.run_query(sql_query)
     load_query = ("LOAD DATA LOCAL INFILE '" + csv_file + "' INTO TABLE "
                   + table_name)
     load_query += (" FIELDS TERMINATED BY ',' ENCLOSED BY '\"' "
                    + "LINES TERMINATED BY '\r\n' IGNORE 1 LINES")
     result = self.query_handler.run_query(load_query)          
     if result is False:
         self.delete_data(table_name)
Esempio n. 3
0
    def _save_to_db(self, clf, data_table_name):
        if _check_table_not_exist(self.query_handler, self.table_name):
            self.query_handler.flush_cursor()
            _create_model_table(self.query_handler, self.table_name)
        else:
            print("Table already existed!")

        current_time = _current_time()
        saved_model_name = current_time + '_' + data_table_name + '_' + self.model_name + '.pickle'

        root_path = os.path.abspath('./../saved_model/')
        saved_model_path = os.path.join(root_path, saved_model_name)
        with open(saved_model_path, 'wb') as f:
            pickle.dump(clf, f)

        current_time = current_time.replace("T", ' ')

        query = "INSERT INTO {} VALUES ('{}','{}','{}','{}')".format(
            self.table_name, self.model_name, current_time, data_table_name,
            saved_model_path)
        self.query_handler.flush_cursor()
        self.query_handler.run_query(query)

        print(query)

        print("Trained model is saved in database {}, table {}.".format(
            self.query_handler.connector.database, self.table_name))
Esempio n. 4
0
    def reference(self, model_name, time, data_table_name, test_data_table,
                  feature_col):
        if _check_table_not_exist(self.query_handler, self.table_name):
            print("Table not exists!")
        else:
            query_1 = "SELECT model_path FROM {} WHERE name='{}' "
            query_2 = "AND savetime='{}' AND dataset='{}'"
            query = query_1 + query_2
            query = query.format(self.table_name, model_name, time,
                                 data_table_name)

            self.query_handler.flush_cursor()
            _db_result = self.query_handler.run_query(query)

            _model_path = ''
            # The return result is tuple
            for result in _db_result:
                _model_path = result[0]

            try:
                with open(_model_path, 'rb') as f:
                    clf = pickle.load(f)
            except IOError as err:
                print('IOError({}): {}'.format(err.errno, err.strerror))
                return

            test_data = self.dataset.load_from_database(test_data_table)
            test_feature = test_data[feature_col].values
            _ans = clf.predict(test_feature)
            print(_ans)
Esempio n. 5
0
 def delete_data(self, name):
     if _check_table_not_exist(self.query_handler, name):
         print("Warning: Table {} not exists!".format(name))
         self.query_handler.flush_cursor()
         return
     delete_query = "DROP TABLE {};".format(name)
     self.query_handler.flush_cursor()
     self.query_handler.run_query(delete_query)
Esempio n. 6
0
 def _show_model(self, model_table_name):
     if _check_table_not_exist(self.query_handler, model_table_name):
         print("Table not exists!")
     else:
         query = "SELECT * FROM {}".format(model_table_name)
         self.query_handler.flush_cursor()
         _result = self.query_handler.run_query(query)
         for result in _result:
             print(result)
Esempio n. 7
0
    def load_from_database(self, name):
        if _check_table_not_exist(self.query_handler, name):
            print("Warning: Table {} not exists!".format(name))
            self.query_handler.flush_cursor()
            return
        select_query = "SHOW COLUMNS FROM " + name
        self.query_handler.flush_cursor()
        self.query_handler.run_query(select_query)
        rows = []
        data = []
        for item in self.query_handler.cursor:
            rows.append(item[0])
        select_query = "SELECT * FROM " + name
        self.query_handler.run_query(select_query)
        for item in self.query_handler.cursor:
            data.append(item)
        data = np.array(data)

        return pd.DataFrame(data, columns=rows)
Esempio n. 8
0
    def reference(self, model_name, time, data_table_name, test_data_table,
                  feature_col):
        if _check_table_not_exist(self.query_handler, self.model_table_name):
            print("Table not exists!")
        else:
            query_1 = "SELECT model_path FROM {} WHERE name='{}' "
            query_2 = "AND savetime='{}' AND dataset='{}'"
            query = query_1 + query_2
            query = query.format(self.model_table_name, model_name, time,
                                 data_table_name)

            # load testing data first because the it is the same for all type of model
            test_data = self.dataset.load_from_database(test_data_table)
            test_feature = test_data[feature_col].values

            self.query_handler.flush_cursor()
            _db_result = self.query_handler.run_query(query)

            _model_path = ''
            # The return result is tuple
            for result in _db_result:
                _model_path = result[0]

            if self.load_op == 'pickle':
                try:
                    with open(_model_path, 'rb') as f:
                        _model = pickle.load(f)
                except IOError as err:
                    print('IOError({}): {}'.format(err.errno, err.strerror))
                    return

                _ans = _model.predict(test_feature)
            else:
                if callable(self.load_op):
                    _model = self.load_op(_model_path)
                    _ans = _model.predict(test_feature, verbose=1)
                else:
                    print(
                        "Currently not support this type of model, we will extend it ASAP."
                    )
                    return

            print(_ans)
Esempio n. 9
0
    def _save_to_db(self, mode_object, data_table_name):
        if _check_table_not_exist(self.query_handler, self.table_name):
            self.query_handler.flush_cursor()
            _create_model_table(self.query_handler, self.table_name)
        else:
            print("Table already existed!")

        current_time = _current_time()
        saved_model_name = current_time + '_' + data_table_name + '_' + self.model_name

        root_path = os.path.abspath('./../saved_model/')
        saved_model_path = os.path.join(root_path, saved_model_name)

        model_bytes = 'NULL'

        # sklearn model needs pickle, so we have to check here
        if self.save_op == 'pickle':
            model_bytes = pickle.dump(self.model_object)
            # saved_model_path += '.pickle'
            # with open(saved_model_path, 'wb') as f:
            # pickle.dump(self.model_object, f)
        else:
            try:
                saved_model_path += '.h5'
                getattr(self.model_object, self.save_op)(saved_model_path)
            except IOError:
                print("Something wrong while saving model {} (type {})".format(
                    self.model_name, str(type(self.model_object))))

        current_time = current_time.replace("T", ' ')

        query = "INSERT INTO {} VALUES ('{}','{}','{}','{}','{}')".format(
            self.table_name, self.model_name, current_time, data_table_name,
            saved_model_path, model_bytes)
        self.query_handler.flush_cursor()
        self.query_handler.run_query(query)

        print(query)

        print("Trained model is saved in database {}, table {}.".format(
            self.query_handler.connector.database, self.table_name))