def __init__(self, dataID, b_allow_repeated_value=True, logger_file=logger_file, num_of_points=None, base_models=[tools.app_linear]): self.logger = logs.QueryLogs(log=logger_file) # self.logger.set_no_output() self.data = dl.load2d(dataID) if not b_allow_repeated_value: self.data.remove_repeated_x_1d() self.cRegression = CRegression(logger_object=self.logger, base_models=base_models) self.cRegression.fit(self.data) # self.logger.set_logging(file_name=logger_file) if num_of_points is None: self.qe = QueryEngine(self.cRegression, logger_object=self.logger) else: self.qe = QueryEngine(self.cRegression, logger_object=self.logger, num_training_points=num_of_points) self.qe.density_estimation() self.q_min = min(self.data.features) self.q_max = max(self.data.features) self.dataID = dataID del self.data
def __init__(self,logger_object=None): self.num_datasetaset = 8 if not logger_object: logger_object = logs.QueryLogs() self.logger = logger_object self.logger_name = logger_object.logger_name
def __init__(self, dataset="tpcds", logger_file=logger_file,base_models=[tools.app_xgboost]): self.dataset = dataset self.logger = logs.QueryLogs(log=logger_file) self.logger.set_level("INFO") self.logger.logger.info("Initialising DBEst...") self.group_names = {} self.num_of_points_per_group_tbls = {} self.num_of_points = {} self.df = {} self.DBEstClients = {} self.tableColumnSets = [] # store all QeuryEngines, for each table self.base_models = base_models
def __init__(self, cregression, logger_object=None, b_print_time_cost=True, num_training_points=None): if num_training_points is None: self.num_training_points = cregression.num_total_training_points else: self.num_training_points = num_training_points self.log_dens = None self.training_data = cregression.training_data self.kde = None # kernel density object self.dimension = self.training_data.features.shape[1] self.cregression = cregression if logger_object: self.logger = logger_object.logger else: self.logger = logs.QueryLogs().logger self.b_print_time_cost = b_print_time_cost warnings.filterwarnings(action='ignore', category=DeprecationWarning)
q_max_boundary, steps=1000, n_bisect=50) end = datetime.now() time_cost = (end - start).total_seconds() if self.b_print_time_cost: self.logger.info("Time spent for approximate CORR: %.4fs." % time_cost) return result, time_cost if __name__ == "__main__": import generate_random warnings.filterwarnings(action='ignore', category=DeprecationWarning) logger = logs.QueryLogs() logger.set_no_output() data = dl.load2d(5) cRegression = CRegression(logger_object=logger) cRegression.fit(data) # cRegression.plot_training_data_2d() logger.set_logging() qe = QueryEngine(cRegression, logger_object=logger) qe.density_estimation() # qe.desngity_estimation_plt2d() r = generate_random.percentile(0.9, qe.kde, 30, 100,