예제 #1
0
 def __init__(self,
              dataID,
              b_allow_repeated_value=True,
              logger_file=logger_file,
              num_of_points=None,
              base_models=[tools.app_linear]):
     self.logger = logs.QueryLogs(log=logger_file)
     # self.logger.set_no_output()
     self.data = dl.load2d(dataID)
     if not b_allow_repeated_value:
         self.data.remove_repeated_x_1d()
     self.cRegression = CRegression(logger_object=self.logger,
                                    base_models=base_models)
     self.cRegression.fit(self.data)
     # self.logger.set_logging(file_name=logger_file)
     if num_of_points is None:
         self.qe = QueryEngine(self.cRegression, logger_object=self.logger)
     else:
         self.qe = QueryEngine(self.cRegression,
                               logger_object=self.logger,
                               num_training_points=num_of_points)
     self.qe.density_estimation()
     self.q_min = min(self.data.features)
     self.q_max = max(self.data.features)
     self.dataID = dataID
     del self.data
예제 #2
0
    def __init__(self,logger_object=None):

        self.num_datasetaset = 8

        if not logger_object:
            logger_object = logs.QueryLogs()
        self.logger = logger_object
        self.logger_name = logger_object.logger_name
예제 #3
0
파일: DBEst.py 프로젝트: qingzma/DBEstPy
 def __init__(self, dataset="tpcds", logger_file=logger_file,base_models=[tools.app_xgboost]):
     self.dataset = dataset
     self.logger = logs.QueryLogs(log=logger_file)
     self.logger.set_level("INFO")
     self.logger.logger.info("Initialising DBEst...")
     self.group_names = {}
     self.num_of_points_per_group_tbls = {}
     self.num_of_points = {}
     self.df = {}
     self.DBEstClients = {}
     self.tableColumnSets = []  # store all QeuryEngines, for each table
     self.base_models = base_models
예제 #4
0
    def __init__(self,
                 cregression,
                 logger_object=None,
                 b_print_time_cost=True,
                 num_training_points=None):
        if num_training_points is None:
            self.num_training_points = cregression.num_total_training_points
        else:
            self.num_training_points = num_training_points
        self.log_dens = None
        self.training_data = cregression.training_data
        self.kde = None  # kernel density object
        self.dimension = self.training_data.features.shape[1]
        self.cregression = cregression
        if logger_object:
            self.logger = logger_object.logger
        else:
            self.logger = logs.QueryLogs().logger
        self.b_print_time_cost = b_print_time_cost

        warnings.filterwarnings(action='ignore', category=DeprecationWarning)
예제 #5
0
                                            q_max_boundary,
                                            steps=1000,
                                            n_bisect=50)
        end = datetime.now()
        time_cost = (end - start).total_seconds()
        if self.b_print_time_cost:
            self.logger.info("Time spent for approximate CORR: %.4fs." %
                             time_cost)
        return result, time_cost


if __name__ == "__main__":
    import generate_random
    warnings.filterwarnings(action='ignore', category=DeprecationWarning)

    logger = logs.QueryLogs()
    logger.set_no_output()
    data = dl.load2d(5)
    cRegression = CRegression(logger_object=logger)
    cRegression.fit(data)

    # cRegression.plot_training_data_2d()
    logger.set_logging()
    qe = QueryEngine(cRegression, logger_object=logger)
    qe.density_estimation()
    # qe.desngity_estimation_plt2d()

    r = generate_random.percentile(0.9,
                                   qe.kde,
                                   30,
                                   100,