Exemple #1
0
    def calc_reference_per_predictor(self, predictor, query_group, oracle=False):
        max_list = []
        _results = defaultdict()

        ap_file = os.path.normpath(f'{self.test_dir}/ref/QLmap1000-{query_group}')
        ensure_file(ap_file)
        base_pred_dir = f'{self.base_dir}/basicPredictions/{query_group}'

        """This part calculates and adds title queries column"""
        _base_predictions_dir = os.path.normpath('{}/{}/predictions'.format(base_pred_dir, predictor))
        cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_base_predictions_dir,
                                           folds_map_file=self.cv_map_f, load=True, ap_file=ap_file, test=self.corr_measure)
        _mean = cv_obj.calc_test_results()
        max_list.append(_mean)

        for quant in QUANTILES:

            if oracle:
                predictions_dir = f'{self.base_dir}/uqvPredictions/referenceLists/{query_group}/{quant}_vars/oracle'
            else:
                predictions_dir = f'{self.base_dir}/uqvPredictions/referenceLists/{query_group}/{quant}_vars/general'
            # list to save results for a specific predictor with different quantile variations
            _quant_res = [_mean]
            _index = [QUERY_GROUPS[query_group]]

            for ref_func, func_name in zip(REFERENCE_FUNCTIONS, REFERENCE_TITLES):
                _predictions_dir = os.path.normpath(f'{predictions_dir}/{ref_func}/{predictor}/predictions')
                _uef_predictions_dir = os.path.normpath(f'{predictions_dir}/{ref_func}/uef/{predictor}/predictions')
                cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_predictions_dir,
                                                   folds_map_file=self.cv_map_f, load=True, ap_file=ap_file, test=self.corr_measure)
                mean = cv_obj.calc_test_results()
                max_list.append(mean)
                _quant_res.append(mean)
                _index.append(func_name)

            sr = pd.Series(_quant_res)
            sr.index = _index
            sr.name = quant
            _results[quant] = sr

        res_df = pd.DataFrame.from_dict(_results, orient='index')
        res_df = res_df.reindex(index=QUANTILES)
        res_df.index = res_df.index.str.title()
        res_df.index.name = 'Quantile'
        res_df.reset_index(inplace=True)
        res_df = res_df.reindex(['Quantile', QUERY_GROUPS[query_group]] + REFERENCE_TITLES, axis='columns')
        res_df.insert(loc=0, column='Predictor', value=predictor.upper())
        return res_df, max(max_list)
Exemple #2
0
    def _calc_general_model_result(self, direct, predictor, sim_func):
        print(f'\n---Generating {predictor}-{sim_func} {direct} results---\n')
        _dict = defaultdict(list)

        def append_to_full_results_dict(_mean, _n):
            _dict['direction'].append(direct)
            _dict['predictor'].append(predictor)
            _dict['sim_func'].append(sim_func)
            _dict['n_vars'].append(_n)
            _dict['result'].append(_mean)

        mean = self.basic_results_dict.get(predictor, None)
        assert mean, f'self.basic_results_dict couldn\'t get {predictor}'
        append_to_full_results_dict(mean, 0)
        _dir = f'{self.results_dir}/{direct}'
        for n in range(1, self.max_n + 1):
            _predictions_dir = dp.ensure_dir(
                f'{_dir}/{n}_vars/general/{sim_func}/{predictor}/predictions')
            cv_obj = InterTopicCrossValidation(
                k=2,
                rep=30,
                folds_map_file=self.cv_map_file,
                predictions_dir=_predictions_dir,
                load=True,
                ap_file=self.query_ap_file,
                test=self.corr_measure)
            mean = cv_obj.calc_test_results()
            append_to_full_results_dict(mean, n)
        _df = pd.DataFrame.from_dict(_dict)
        return _df
Exemple #3
0
 def calc_sim_ref_per_predictor(self, predictor):
     _results = defaultdict()
     ref_dir = f'{self.base_dir}/uqvPredictions/referenceLists'
     for quant in ['all', 'med', 'top', 'low', 'low-0']:
         # list to save results for a specific predictor with different quantile variations
         _quant_res = list()
         _index = list()
         for qgroup, query_group in QUERY_GROUPS.items():
             _predictions_dir = os.path.normpath(
                 f'{ref_dir}/{qgroup}/{quant}_vars/sim_as_pred/{predictor}/predictions')
             ap_file = os.path.normpath(f'{self.test_dir}/ref/QLmap1000-{qgroup}')
             ensure_file(ap_file)
             cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_predictions_dir,
                                                folds_map_file=self.cv_map_f, load=True, ap_file=ap_file, test=self.corr_measure)
             mean = cv_obj.calc_test_results()
             _quant_res.append('${}$'.format(mean))
             _index.append(query_group)
         sr = pd.Series(_quant_res)
         sr.name = quant
         sr.index = _index
         _results[quant] = sr
     res_df = pd.DataFrame.from_dict(_results, orient='index')
     res_df = res_df.reindex(index=['all', 'med', 'top', 'low', 'low-0'])
     res_df.index = res_df.index.str.title()
     res_df.index.name = 'Quantile'
     res_df.reset_index(inplace=True)
     res_df = res_df.reindex(['Quantile'] + _index, axis='columns')
     # res_df.rename(QUERY_GROUPS)
     res_df.insert(loc=0, column='Predictor', value=predictor)
     return res_df
Exemple #4
0
 def calc_sim_ref_per_group(self, qgroup):
     max_list = []
     _results = defaultdict()
     ref_dir = f'{self.base_dir}/uqvPredictions/referenceLists'
     ap_file = os.path.normpath(f'{self.test_dir}/ref/QLmap1000-{qgroup}')
     ensure_file(ap_file)
     for quant in QUANTILES:
         # list to save results for a specific predictor with different quantile variations
         _quant_res = list()
         _index = list()
         for predictor in SIM_REF_PREDICTORS:
             _predictions_dir = os.path.normpath(
                 f'{ref_dir}/{qgroup}/{quant}_vars/sim_as_pred/{predictor}/predictions')
             cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_predictions_dir,
                                                folds_map_file=self.cv_map_f, load=True, ap_file=ap_file, test=self.corr_measure)
             mean = cv_obj.calc_test_results()
             max_list.append(mean)
             _quant_res.append(mean)
             _index.append(SIM_REF_PREDICTORS[predictor])
         sr = pd.Series(_quant_res)
         sr.name = quant
         sr.index = _index
         _results[quant] = sr
     res_df = pd.DataFrame.from_dict(_results, orient='index')
     res_df = res_df.reindex(index=QUANTILES)
     res_df.index = res_df.index.str.title()
     res_df.index.name = 'Quantile'
     res_df.reset_index(inplace=True)
     res_df.insert(loc=0, column='Uniform', value='-')
     res_df = res_df.reindex(['Quantile'] + REFERENCE_TITLES, axis='columns')
     res_df.insert(loc=1, column=QUERY_GROUPS[qgroup], value='-')
     res_df.insert(loc=0, column='Predictor', value='SimilarityOnly')
     return res_df, max(max_list)
Exemple #5
0
    def calc_basic(self):
        test_dir = os.path.normpath('{}/basic'.format(self.test_dir))
        predictions_dir = os.path.normpath('{}/basicPredictions/title/'.format(self.base_dir))
        ap_score = ensure_file('{}/QLmap1000'.format(test_dir))

        _results = defaultdict()

        for p in PREDICTORS:
            # dir of aggregated prediction results
            _predictions_dir = os.path.normpath('{}/{}/predictions'.format(predictions_dir, p))
            # dir of aggregated uef prediction results
            _uef_predictions_dir = os.path.normpath('{}/uef/{}/predictions'.format(predictions_dir, p))
            # list to save non uef results for a specific predictor with different AP files
            _p_res = list()
            # list to save uef results
            _uef_p_res = list()
            for measure in CORR_MEASURES:
                cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_predictions_dir,
                                                   folds_map_file=self.cv_map_f, load=True, test=measure,
                                                   ap_file=ap_score)
                uef_cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_uef_predictions_dir,
                                                       folds_map_file=self.cv_map_f, load=True, test=measure,
                                                       ap_file=ap_score)
                mean = cv_obj.calc_test_results()
                uef_mean = uef_cv_obj.calc_test_results()
                _p_res.append('${}$'.format(mean))
                _uef_p_res.append('${}$'.format(uef_mean))

            sr = pd.Series(_p_res)
            uef_sr = pd.Series(_uef_p_res)
            sr.name = p
            uef_p = 'uef({})'.format(p)
            uef_sr.name = uef_p
            _results[p] = sr
            _results[uef_p] = uef_sr

        res_df = pd.DataFrame.from_dict(_results, orient='index')
        _uef_predictors = ['uef({})'.format(p) for p in PREDICTORS]
        res_df = res_df.reindex(index=PREDICTORS + _uef_predictors)
        res_df.index = res_df.index.str.upper()
        res_df.reset_index(inplace=True)
        res_df.columns = ['predictor'] + CORR_MEASURES
        res_df.insert(loc=0, column='Function', value='basic')
        return res_df
Exemple #6
0
 def calc_single_query_result(self, predictor):
     print(f'\n---Generating {predictor} 0 vars results---\n')
     _predictions_dir = dp.ensure_dir(
         f'{self.basic_predictions_dir}/{predictor}/predictions')
     cv_obj = InterTopicCrossValidation(k=2,
                                        rep=30,
                                        folds_map_file=self.cv_map_file,
                                        predictions_dir=_predictions_dir,
                                        load=True,
                                        ap_file=self.query_ap_file,
                                        test=self.corr_measure)
     mean = cv_obj.calc_test_results()
     self.basic_results_dict[predictor] = mean
Exemple #7
0
    def calc_aggregated(self, aggregation):
        test_dir = os.path.normpath('{}/aggregated'.format(self.test_dir))
        predictions_dir = '{}/uqvPredictions/aggregated/{}'.format(self.base_dir, aggregation)
        _results = defaultdict()

        for p in PREDICTORS + UEF_PREDICTORS:
            # dir of aggregated prediction results
            _predictions_dir = os.path.normpath('{}/{}/predictions'.format(predictions_dir, p))
            # dir of aggregated uef prediction results
            # _uef_predictions_dir = os.path.normpath('{}/uef/{}/predictions'.format(predictions_dir, p))
            # list to save non uef results for a specific predictor with different AP files
            _p_res = list()
            # list to save uef results
            # _uef_p_res = list()
            _index = list()
            for agg in AGGREGATE_FUNCTIONS + ['combsum']:
                ap_score = ensure_file('{}/map1000-{}'.format(test_dir, agg))
                cv_obj = InterTopicCrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_predictions_dir,
                                                   folds_map_file=self.cv_map_f, load=True, test=self.corr_measure,
                                                   ap_file=ap_score)
                # uef_cv_obj = CrossValidation(k=SPLITS, rep=REPEATS, predictions_dir=_uef_predictions_dir,
                #                              file_to_load=self.cv_map_f, load=True, test=self.corr_measure,
                #                              ap_file=ap_score)
                mean = cv_obj.calc_test_results()
                # uef_mean = uef_cv_obj.calc_test_results()
                # _p_res.append('${}$'.format(mean))
                _p_res.append(mean)
                # _uef_p_res.append('${}$'.format(uef_mean))
                _index.append(agg)

            sr = pd.Series(_p_res)
            # uef_sr = pd.Series(_uef_p_res)
            sr.name = p
            sr.index = _index
            # uef_p = 'uef({})'.format(p)
            # uef_sr.name = uef_p
            # uef_sr.index = _index
            _results[p] = sr
            # _results[uef_p] = uef_sr

        res_df = pd.DataFrame.from_dict(_results, orient='index')
        # _uef_predictors = ['uef({})'.format(p) for p in PREDICTORS]
        res_df = res_df.reindex(index=PREDICTORS + UEF_PREDICTORS)
        res_df.index.name = 'predictor'
        res_df.index = res_df.index.str.upper()
        res_df.reset_index(inplace=True)
        res_df.insert(loc=0, column='predictor-agg', value=aggregation)
        # res_df.columns = ['predictor-agg', 'predictor'] + AGGREGATE_FUNCTIONS + ['combsum']
        res_df = res_df.reindex(['predictor-agg', 'predictor'] + AGGREGATE_FUNCTIONS + ['combsum'], axis='columns')
        return res_df
Exemple #8
0
def calc_s(cv_obj: InterTopicCrossValidation, full_scores_df: pd.DataFrame):
    if hasattr(cv_obj, 'corr_df'):
        cv_obj.__delattr__('corr_df')
    cv_obj.full_set = full_scores_df
    score = cv_obj.calc_test_results()
    return float(score)
Exemple #9
0
 def _calc_cv_result(self, similarity, predictor, lambda_param):
     predictions_dir = self.results_dirs_dict.get((similarity, predictor, lambda_param))
     cv_obj = InterTopicCrossValidation(k=2, rep=30, folds_map_file=self.cv_map_file, predictions_dir=predictions_dir, load=True,
                                        ap_file=self.query_ap_file, test=self.corr_measure)
     mean = cv_obj.calc_test_results()
     return mean