Esempio n. 1
0
    def evaluate(self, num_queries, done, estimators=None):
        model = self.model
        if isinstance(model, DataParallelPassthrough):
            model = model.module
        model.eval()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()

        results = {}
        if num_queries:
            if estimators is None:
                estimators = self.MakeProgressiveSamplers(
                    model,
                    self.train_data if self.factorize else self.table,
                    do_fanout_scaling=(self.dataset == 'tpcds'))
                if self.eval_join_sampling:  # None or an int.
                    estimators = [
                        estimators_lib.JoinSampling(self.train_data,
                                                    self.table,
                                                    self.eval_join_sampling)
                    ]

            assert self.loaded_queries is not None
            num_queries = min(len(self.loaded_queries), num_queries)
            for i in range(num_queries):
                print('Query {}:'.format(i), end=' ')
                query = self.loaded_queries[i]
                self.Query(estimators,
                           oracle_card=None if self.oracle_cards is None else
                           self.oracle_cards[i],
                           query=query,
                           table=self.table,
                           oracle_est=self.oracle)
                if i % 100 == 0:
                    for est in estimators:
                        est.report()

            for est in estimators:
                results[str(est) + '_max'] = np.max(est.errs)
                results[str(est) + '_p99'] = np.quantile(est.errs, 0.99)
                results[str(est) + '_p95'] = np.quantile(est.errs, 0.95)
                results[str(est) + '_median'] = np.median(est.errs)
                est.report()

                series = pd.Series(est.query_dur_ms)
                print(series.describe())
                series.to_csv(str(est) + '.csv', index=False, header=False)

        return results
    def evaluate(self, num_queries, done, estimators=None):
        global met0, mee0, met1, mee1
        met0 = []
        mee0 = []
        met1 = []
        mee1 = []
        model = self.model
        if isinstance(model, DataParallelPassthrough):
            model = model.module
        model.eval()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()

        results = {}
        if num_queries:
            if estimators is None:
                estimators = self.MakeProgressiveSamplers(
                    model,
                    self.train_data if self.factorize else self.table,
                    do_fanout_scaling=(self.dataset == 'imdb'))
                if self.eval_join_sampling:  # None or an int.
                    estimators = [
                        estimators_lib.JoinSampling(self.train_data, self.table,
                                                    self.eval_join_sampling)
                    ]

            assert self.loaded_queries is not None
            num_queries = min(len(self.loaded_queries), num_queries)
            for i in range(num_queries):
                print('Query {}:'.format(i), end=' ')
                query = self.loaded_queries[i]
                self.Query(estimators,
                           oracle_card=None if self.oracle_cards is None else
                           self.oracle_cards[i],
                           query=query,
                           table=self.table,
                           oracle_est=self.oracle)
                if i % 100 == 0:
                    for est in estimators:
                        est.report()

            # for est in estimators:  # 鏆傛椂娉ㄩ噴
            # MSE, MAPE, PCCs
            print('len0: ', len(mee0))
            print('len1: ', len(mee1))
            mse0 = mean_squared_error(mee0, met0)
            if (len(mee1) != 0):
                mse1 = mean_squared_error(mee1, met1)
            met0 = np.array(met0)
            mee0 = np.array(mee0)
            met1 = np.array(met1)
            mee1 = np.array(mee1)
            PCCs0 = sc.stats.pearsonr(mee0, met0)  # 鐨皵閫婄浉鍏崇郴鏁?            print('PCCs0:',PCCs0[0])
            if (len(mee1) != 0):
                PCCs1 = sc.stats.pearsonr(mee1, met1)  # 鐨皵閫婄浉鍏崇郴鏁?                print('PCCs1:',PCCs1[0])
            # mse = sum(np.square(met - mee))/len(met)
            mape0 = sum(np.abs((met0 - mee0) / met0)) / len(met0) * 100
            if (len(mee1) != 0):
                mape1 = sum(np.abs((met1 - mee1) / met1)) / len(met1) * 100
            print('MSE0: ', mse0)
            print('MAPE0: ', mape0)
            if (len(mee1) != 0):
                print('MSE1: ', mse1)
                print('MAPE1: ', mape1)

            dictest = {'est': mee0, 'tr': met0}
            dfest = pd.DataFrame(dictest)
            dfest.to_csv('result' + str(args.update) + '.csv', index=False, header=False)

            for est in estimators:
                results[str(est) + '_max'] = np.max(est.errs)
                results[str(est) + '_p99'] = np.quantile(est.errs, 0.99)
                results[str(est) + '_p95'] = np.quantile(est.errs, 0.95)
                results[str(est) + '_p90'] = np.quantile(est.errs, 0.90)
                results[str(est) + '_mean'] = np.mean(est.errs)
                results[str(est) + '_median'] = np.median(est.errs)
                est.report()

                series = pd.Series(est.query_dur_ms)
                print(series.describe())
                series.to_csv(str(est) + '.csv', index=False, header=False)

        return results