def evaluate_spn_statistics(spn_path, target_csv_path, build_time_path): csv_list = [] # SPN learn times for filename in os.listdir(spn_path): logger.debug(f'Reading {filename}') if not filename.startswith("ensemble") or filename.endswith('.zip'): continue spn_ensemble = read_ensemble(os.path.join(spn_path, filename)) for spn in spn_ensemble.spns: num_nodes = len(get_nodes_by_type(spn.mspn, Node)) upper_bound = 200 * len(spn.column_names) - 1 # assert num_nodes <= upper_bound, "Num of nodes upper bound is wrong" csv_list.append((filename, spn.learn_time, spn.full_sample_size, spn.min_instances_slice, spn.rdc_threshold, len(spn.relationship_set), len(spn.table_set), " - ".join([table for table in spn.table_set]), len(spn.column_names), num_nodes, upper_bound)) # HDF create times with open(build_time_path) as f: hdf_preprocessing_time = int(f.readlines()[0]) csv_list += [('generate_hdf', hdf_preprocessing_time, 0, 0, 0, 0, 0, "")] with open(target_csv_path, 'w', newline='') as f: writer = csv.writer(f) writer.writerow([ 'filename', 'learn_time', 'full_sample_size', 'min_instances_slice', 'rdc_threshold', 'no_joins', 'no_tables', 'tables', 'no_columns', 'structure_stats', 'upper_bound' ]) writer.writerows(csv_list)
def load_deepdb(dataset: str, model_name: str) -> Tuple[Estimator, Dict[str, Any]]: model_file = MODEL_ROOT / dataset / f"{model_name}.pkl" L.info(f"load model from {model_file} ...") spn_ensemble = read_ensemble(model_file, build_reverse_dict=True) L.info(f'Get SPN: {get_structure_stats_dict(spn_ensemble.spns[0].mspn)}') state = spn_ensemble.state table = load_table(state['dataset'], state['version']) schema = construct_schema(table) estimator = DeepDB(spn_ensemble, table, schema, model_name) return estimator, state
def test_deepdb(dataset: str, version: str, workload: str, params: Dict[str, Any], overwrite: bool) -> None: """ params: model: model file name """ model_file = MODEL_ROOT / dataset / f"{params['model']}.pkl" L.info(f"load model from {model_file} ...") spn_ensemble = read_ensemble(model_file, build_reverse_dict=True) L.info(f'Get SPN: {get_structure_stats_dict(spn_ensemble.spns[0].mspn)}') state = spn_ensemble.state table = load_table(state['dataset'], state['version']) schema = construct_schema(table) estimator = DeepDB(spn_ensemble, table, schema, params['model']) run_test(dataset, version, workload, estimator, overwrite)
args.ensemble_max_no_joins, args.rdc_threshold, args.pairwise_rdc_path, incremental_learning_rate=args.incremental_learning_rate, incremental_condition=args.incremental_condition ) # Add a parameter else: raise NotImplementedError # Traintime timetrain = time.time() # fmetric.write('Traintime: '+ str(timetrain-time1) + '\n') print('Traintime: \n', timetrain - time1) # fmetric.close() # Read pre-trained ensemble and evaluate cardinality queries scale if args.code_generation: spn_ensemble = read_ensemble(args.ensemble_path, build_reverse_dict=True) generate_ensemble_code(spn_ensemble, floating_data_type='float', ensemble_path=args.ensemble_path) # Read pre-trained ensemble and evaluate cardinality queries scale if args.evaluate_cardinalities_scale: from evaluation.cardinality_evaluation import evaluate_cardinalities for i in [3, 4, 5, 6]: for j in [1, 2, 3, 4, 5]: target_path = args.target_path.format(i, j) query_file_location = args.query_file_location.format(i, j) true_cardinalities_path = args.ground_truth_file_location.format( i, j) evaluate_cardinalities(
def evaluate_aqp_queries(ensemble_location, query_filename, target_path, schema, ground_truth_path, rdc_spn_selection, pairwise_rdc_path, max_variants=5, merge_indicator_exp=False, exploit_overlapping=False, min_sample_ratio=0, debug=False, show_confidence_intervals=True): """ Loads ensemble and computes metrics for AQP query evaluation :param ensemble_location: :param query_filename: :param target_csv_path: :param schema: :param max_variants: :param merge_indicator_exp: :param exploit_overlapping: :param min_sample_ratio: :return: """ spn_ensemble = read_ensemble(ensemble_location, build_reverse_dict=True) csv_rows = [] # read all queries with open(query_filename) as f: queries = f.readlines() # read ground truth with open(ground_truth_path, 'rb') as handle: ground_truth = pickle.load(handle) for query_no, query_str in enumerate(queries): query_str = query_str.strip() logger.info(f"Evaluating AQP query {query_no}: {query_str}") query = parse_query(query_str.strip(), schema) aqp_start_t = perf_counter() confidence_intervals, aqp_result = spn_ensemble.evaluate_query(query, rdc_spn_selection=rdc_spn_selection, pairwise_rdc_path=pairwise_rdc_path, merge_indicator_exp=merge_indicator_exp, max_variants=max_variants, exploit_overlapping=exploit_overlapping, debug=debug, confidence_intervals=show_confidence_intervals) aqp_end_t = perf_counter() latency = aqp_end_t - aqp_start_t logger.info(f"\t\t{'total_time:':<32}{latency} secs") if ground_truth is not None: true_result = ground_truth[query_no] if isinstance(aqp_result, list): average_relative_error, bin_completeness, false_bin_percentage, total_bins, \ confidence_interval_precision, confidence_interval_length, _ = \ evaluate_group_by(aqp_result, true_result, confidence_intervals) logger.info(f"\t\t{'total_bins: ':<32}{total_bins}") logger.info(f"\t\t{'bin_completeness: ':<32}{bin_completeness * 100:.2f}%") logger.info(f"\t\t{'average_relative_error: ':<32}{average_relative_error * 100:.2f}%") logger.info(f"\t\t{'false_bin_percentage: ':<32}{false_bin_percentage * 100:.2f}%") if show_confidence_intervals: logger.info( f"\t\t{'confidence_interval_precision: ':<32}{confidence_interval_precision * 100:>.2f}%") logger.info(f"\t\t{'confidence_interval_length: ':<32}{confidence_interval_length * 100:>.2f}%") else: true_result = true_result[0][0] predicted_value = aqp_result logger.info(f"\t\t{'predicted:':<32}{predicted_value}") logger.info(f"\t\t{'true:':<32}{true_result}") # logger.info(f"\t\t{'confidence_interval:':<32}{confidence_intervals}") relative_error = compute_relative_error(true_result, predicted_value) logger.info(f"\t\t{'relative_error:':<32}{relative_error * 100:.2f}%") if show_confidence_intervals: confidence_interval_precision, confidence_interval_length = evaluate_confidence_interval( confidence_intervals, true_result, predicted_value) logger.info( f"\t\t{'confidence_interval_precision:':<32}{confidence_interval_precision * 100:>.2f}") logger.info(f"\t\t{'confidence_interval_length: ':<32}{confidence_interval_length * 100:>.2f}%") total_bins = 1 bin_completeness = 1 average_relative_error = relative_error csv_rows.append({'approach': ApproachType.MODEL_BASED, 'query_no': query_no, 'latency': latency, 'average_relative_error': average_relative_error * 100, 'bin_completeness': bin_completeness * 100, 'total_bins': total_bins, 'query': query_str, 'sample_percentage': 100 }) else: logger.info(f"\t\tpredicted: {aqp_result}") save_csv(csv_rows, target_path)
def evaluate_cardinalities( version, ensemble_location, physical_db_name, query_filename, target_csv_path, schema, rdc_spn_selection, pairwise_rdc_path, use_generated_code=False, true_cardinalities_path='./benchmarks/job-light/sql/job_light_true_cardinalities.csv', max_variants=1, merge_indicator_exp=False, exploit_overlapping=False, min_sample_ratio=0): """ Loads ensemble and evaluates cardinality for every query in query_filename :param exploit_overlapping: :param min_sample_ratio: :param max_variants: :param merge_indicator_exp: :param target_csv_path: :param query_filename: :param true_cardinalities_path: :param ensemble_location: :param physical_db_name: :param schema: :return: """ if true_cardinalities_path is not None: df_true_card = pd.read_csv(true_cardinalities_path) # 真实基数 else: # True cardinality via DB db_connection = DBConnection(db=physical_db_name) true_estimator = TrueCardinalityEstimator(schema, db_connection) # load ensemble spn_ensemble = read_ensemble(ensemble_location, build_reverse_dict=True) csv_rows = [] q_errors = [] # read all queries with open(query_filename) as f: queries = f.readlines() if use_generated_code: spn_ensemble.use_generated_code() latencies = [] # MSE,MAPE met = [] mee = [] for query_no, query_str in enumerate(queries): query_str = query_str.strip() logger.debug( f"Predicting cardinality for query {query_no}: {query_str}") query = parse_query(query_str.strip(), schema) # 解析 # print('query:\n') #重要 # print(query) #重要 assert query.query_type == QueryType.CARDINALITY if df_true_card is None: assert true_estimator is not None _, cardinality_true = true_estimator.true_cardinality(query) else: print( 'df_tcard:\n', df_true_card.loc[df_true_card['query_no'] == query_no, ['cardinality_true']].values) cardinality_true = df_true_card.loc[ df_true_card['query_no'] == query_no, ['cardinality_true']].values[0][0] # only relevant for generated code gen_code_stats = GenCodeStats() card_start_t = perf_counter() _, factors, cardinality_predict, factor_values = spn_ensemble \ .cardinality(query, rdc_spn_selection=rdc_spn_selection, pairwise_rdc_path=pairwise_rdc_path, merge_indicator_exp=merge_indicator_exp, max_variants=max_variants, exploit_overlapping=exploit_overlapping, return_factor_values=True, gen_code_stats=gen_code_stats) # 估计 card_end_t = perf_counter() latency_ms = (card_end_t - card_start_t) * 1000 mee.append(cardinality_predict) met.append(cardinality_true) logger.debug(f"\t\tLatency: {latency_ms:.2f}ms") logger.debug(f"\t\tTrue: {cardinality_true}") logger.debug(f"\t\tPredicted: {cardinality_predict}") q_error = max(cardinality_predict / cardinality_true, cardinality_true / cardinality_predict) if cardinality_predict == 0 and cardinality_true == 0: q_error = 1.0 logger.debug(f"Q-Error was: {q_error}") q_errors.append(q_error) csv_rows.append({ 'query_no': query_no, 'query': query_str, 'cardinality_predict': cardinality_predict, 'cardinality_true': cardinality_true, 'latency_ms': latency_ms, 'generated_spn_calls': gen_code_stats.calls, 'latency_generated_code': gen_code_stats.total_time * 1000 }) latencies.append(latency_ms) # fmetric = open('/home/zhangjintao/Benchmark3/metric_result/' + version + '.deepdb.txt', 'a') mse = mean_squared_error(mee, met) mee = np.array(mee) met = np.array(met) PCCs = sc.stats.pearsonr(mee, met) # 皮尔逊相关系数 # fmetric.write('PCCs:'+str(PCCs[0])+'\n') print('PCCs:', PCCs[0]) # mse = sum(np.square(met - mee))/len(met) mape = sum(np.abs((met - mee) / met)) / len(met) * 100 # fmetric.write('MSE: '+ str(mse)+'\n') # fmetric.write('MAPE: '+ str(mape)+'\n') print('MSE: ', mse) print('MAPE: ', mape) # print percentiles of published JOB-light q_errors = np.array(q_errors) q_errors.sort() logger.info(f"{q_errors[-10:]}") # https://arxiv.org/pdf/1809.00677.pdf ibjs_vals = [1.59, 150, 3198, 14309, 590] mcsn_vals = [3.82, 78.4, 362, 927, 57.9] for i, percentile in enumerate([50, 95, 90, 99, 100]): fmetric.write( f"Q-Error {percentile}%-Percentile: {np.percentile(q_errors, percentile)}\n" ) logger.info( f"Q-Error {percentile}%-Percentile: {np.percentile(q_errors, percentile)} (vs. " f"MCSN: {mcsn_vals[i]} and IBJS: {ibjs_vals[i]})") # fmetric.write(f"Q-Mean wo inf {np.mean(q_errors[np.isfinite(q_errors)])}\n") logger.info( f"Q-Mean wo inf {np.mean(q_errors[np.isfinite(q_errors)])} (vs. " f"MCSN: {mcsn_vals[-1]} and IBJS: {ibjs_vals[-1]})") # fmetric.write(f"Latency avg: {np.mean(latencies):.2f}ms\n") logger.info(f"Latency avg: {np.mean(latencies):.2f}ms") # fmetric.close() # write to csv save_csv(csv_rows, target_csv_path)
def evaluate_confidence_intervals(ensemble_location, query_filename, target_path, schema, ground_truth_path, confidence_sample_size, rdc_spn_selection, pairwise_rdc_path, max_variants=5, merge_indicator_exp=False, exploit_overlapping=False, min_sample_ratio=0, sample_size=10000000, true_result_upsampling_factor=300): # 100 """ Loads ensemble and computes metrics for confidence interval evaluation :param ensemble_location: :param query_filename: :param target_csv_path: :param schema: :param max_variants: :param merge_indicator_exp: :param exploit_overlapping: :param min_sample_ratio: :return: """ spn_ensemble = read_ensemble(ensemble_location, build_reverse_dict=True) csv_rows = [] # read all queries with open(query_filename) as f: queries = f.readlines() # read ground truth with open(ground_truth_path, 'rb') as handle: ground_truth = pickle.load(handle) for query_no, query_str in enumerate(queries): query_str = query_str.strip() logger.info( f"Evaluating the confidence intervals for query {query_no}: {query_str}" ) query = parse_query(query_str.strip(), schema) aqp_start_t = perf_counter() confidence_intervals, aqp_result = spn_ensemble.evaluate_query( query, rdc_spn_selection=rdc_spn_selection, pairwise_rdc_path=pairwise_rdc_path, merge_indicator_exp=merge_indicator_exp, max_variants=max_variants, exploit_overlapping=exploit_overlapping, debug=False, confidence_intervals=True, confidence_sample_size=confidence_sample_size) aqp_end_t = perf_counter() latency = aqp_end_t - aqp_start_t logger.info(f"\t\t{'total_time:':<32}{latency} secs") true_result = ground_truth[query_no] type_all_ops = None if all_operations_of_type(AggregationType.SUM, query): type_all_ops = AggregationType.SUM elif all_operations_of_type(AggregationType.AVG, query): type_all_ops = AggregationType.AVG elif all_operations_of_type(AggregationType.COUNT, query): type_all_ops = AggregationType.COUNT if isinstance(aqp_result, list): for result_row in true_result: group_by_attributes = result_row[:-3] matching_aqp_rows = [ (matching_idx, aqp_row) for matching_idx, aqp_row in enumerate(aqp_result) if aqp_row[:-1] == group_by_attributes ] assert len( matching_aqp_rows ) <= 1, "Multiple possible group by attributes found." if len(matching_aqp_rows) == 1: matching_idx, matching_aqp_row = matching_aqp_rows[0] true_aggregate, std, count = result_row[-3:] if count <= 1: # std is not defined in this case continue interval = confidence_intervals[matching_idx] aqp_std, true_std, relative_confidence_interval_error, true_result, aqp_aggregate = evaluate_stds( matching_aqp_row[-1], interval, count, sample_size, std, true_aggregate, type_all_ops, true_result_upsampling_factor) logger.debug(f"\t\taqp_std: {aqp_std}") logger.debug(f"\t\ttrue_std: {true_std}") csv_rows.append({ 'query_no': query_no, 'latency': latency, 'aqp_std': aqp_std, 'aqp_aggregate': aqp_aggregate, 'true_std': true_std, 'true_aggregate': true_result, 'count': count, 'relative_confidence_interval_error': relative_confidence_interval_error }) else: true_aggregate, std, count = true_result[0][-3:] aqp_std, true_std, relative_confidence_interval_error, true_result, aqp_aggregate = evaluate_stds( aqp_result, confidence_intervals, count, sample_size, std, true_aggregate, type_all_ops, true_result_upsampling_factor) logger.debug(f"\t\taqp_std: {aqp_std}") logger.debug(f"\t\ttrue_std: {true_std}") csv_rows.append({ 'query_no': query_no, 'latency': latency, 'aqp_std': aqp_std, 'aqp_aggregate': aqp_aggregate, 'true_std': true_std, 'true_aggregate': true_result, 'count': count, 'relative_confidence_interval_error': relative_confidence_interval_error }) save_csv(csv_rows, target_path)