Beispiel #1
0
class _nERR_IA(measures.BaseMeasure):
    """
    Intent-Aware Expected Reciprocal Rank with collection-dependent normalisation.

::

    @inproceedings{10.1145/1645953.1646033,
      author = {Chapelle, Olivier and Metlzer, Donald and Zhang, Ya and Grinspan, Pierre},
      title = {Expected Reciprocal Rank for Graded Relevance},
      booktitle = {CIKM},
      year = {2009}
    }
    """
    __name__ = 'nERR_IA'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=False,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        ),
        'judged_only':
        measures.ParamInfo(
            dtype=bool,
            default=False,
            desc=
            'calculate measure using only judged documents (i.e., discard unjudged documents)'
        ),
    }
Beispiel #2
0
class _P_IA(measures.BaseMeasure):
    """
    Intent-aware Precision@k.
    """
    __name__ = 'P_IA'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=False,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        ),
        'judged_only':
        measures.ParamInfo(
            dtype=bool,
            default=False,
            desc=
            'calculate measure using only judged documents (i.e., discard unjudged documents)'
        ),
    }
Beispiel #3
0
class _INST(measures.Measure):
    """
    INST

::

     @inproceedings{Moffat:2012:MMI:2407085.2407092,
       author = {Moffat, Alistair and Scholer, Falk and Thomas, Paul},
       title = {Models and Metrics: IR Evaluation As a User Process},
       booktitle = {Proceedings of the Seventeenth Australasian Document Computing Symposium},
       year = {2012},
       url = {http://doi.acm.org/10.1145/2407085.2407092}
     }
    """
    __name__ = 'INST'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'T':
        measures.ParamInfo(dtype=float, default=1.0, desc='TODO'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #4
0
class _nNRBP(measures.BaseMeasure):
    """
    Novelty- and Rank-Biased Precision with collection-dependent normalisation.

::

    @InProceedings{10.1007/978-3-642-04417-5_17,
      author="Clarke, Charles L. A. and Kolla, Maheedhar and Vechtomova, Olga",
      title="An Effectiveness Measure for Ambiguous and Underspecified Queries ",
      booktitle="ICTIR",
      year="2009"
    }
    """
    __name__ = 'nNRBP'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        ),
        'alpha':
        measures.ParamInfo(dtype=float,
                           default=0.5,
                           desc='Redundancy intolerance'),
        'beta':
        measures.ParamInfo(dtype=float, default=0.5, desc='Patience'),
    }
Beispiel #5
0
class _NERR11(measures.Measure):
    """
    Version of the Not (but Nearly) Expected Reciprocal Rank (NERR) measure, version from Equation (12) of the the following paper.

::

     @inproceedings{Azzopardi:2021:ECE:3471158.3472239,
       author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair},
       title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics},
       booktitle = {ICTIR},
       year = {2021},
       url = {https://doi.org/10.1145/3471158.3472239}
     }
    """
    __name__ = 'NERR11'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'T':
        measures.ParamInfo(dtype=float,
                           default=1.0,
                           desc='total desired gain (normalized)'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #6
0
class _SDCG(measures.Measure):
    """
    The Scaled Discounted Cumulative Gain (SDCG), a variant of nDCG that assumes more
    fully-relevant documents exist but are not labeled.
    """
    __name__ = 'SDCG'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='ranking cutoff threshold'),
        'dcg':
        measures.ParamInfo(dtype=str,
                           choices=['log2'],
                           default='log2',
                           desc='DCG formulation'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #7
0
class _Compat(measures.Measure):
    """
    Compatibility measure desribed in:

::

    @article{10.1145/3451161,
      author = {Clarke, Charles L. A. and Vtyurina, Alexandra and Smucker, Mark D.},
      title = {Assessing Top-k Preferences},
      journal = {ACM Transactions on Information Systems},
      volume = {39},
      number = {3},
      articleno = {33},
      numpages = {21},
      year = {2021},
      url = {https://doi.org/10.1145/3451161},
    }
    """
    __name__ = 'Compat'
    NAME = __name__
    PRETTY_NAME = 'Compatibility'
    SHORT_DESC = 'The Rank Biased Overlap between the results and an ideal ranking.'
    SUPPORTED_PARAMS = {
        'p':
        measures.ParamInfo(dtype=float, default=0.95, desc='persistence'),
        'normalize':
        measures.ParamInfo(
            dtype=bool,
            default=True,
            desc='apply normalization for finite ideal rankings'),
    }
Beispiel #8
0
class _NERR10(measures.Measure):
    """
    Version of the Not (but Nearly) Expected Reciprocal Rank (NERR) measure, version from Equation (10) of the the following paper.

::

     @inproceedings{Azzopardi:2021:ECE:3471158.3472239,
       author = {Azzopardi, Leif and Mackenzie, Joel and Moffat, Alistair},
       title = {{ERR} is not {C/W/L}: Exploring the Relationship Between Expected Reciprocal Rank and Other Metrics},
       booktitle = {ICTIR},
       year = {2021},
       url = {https://doi.org/10.1145/3471158.3472239}
     }
    """
    __name__ = 'NERR10'
    NAME = __name__
    PRETTY_NAME = 'Nearly Expected Reciprocal Rank Eq 10'
    SHORT_DESC = 'A C/W/L approximation of ERR using gain-based stopping and RBP patience (p).'
    SUPPORTED_PARAMS = {
        'p':
        measures.ParamInfo(dtype=float, default=0.9, desc='persistence'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #9
0
class _AP(measures.Measure):
    """
    The [Mean] Average Precision ([M]AP). The average precision of a single query is the mean
    of the precision scores at each relevant item returned in a search results list.
    
    AP is typically used for adhoc ranking tasks where getting as many relevant items as possible is. It is commonly referred to as MAP,
    by taking the mean of AP over the query set.

::

    @article{Harman:1992:ESIR,
      author = {Donna Harman},
      title = {Evaluation Issues in Information Retrieval},
      journal = {Information Processing and Management},
      volume = {28},
      number = {4},
      pages = {439 - -440},
      year = {1992},
    }
    """
    __name__ = 'AP'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=False,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #10
0
class _P(measures.Measure):
    """
    Basic measure for that computes the percentage of documents in the top cutoff results
    that are labeled as relevant. cutoff is a required parameter, and can be provided as
    P@cutoff.

::

    @misc{rijsbergen:1979:ir,
      title={Information Retrieval.},
      author={Van Rijsbergen, Cornelis J},
      year={1979},
      publisher={USA: Butterworth-Heinemann}
    }
    """
    __name__ = 'P'
    NAME = __name__
    PRETTY_NAME = 'Precision at k'
    SHORT_DESC = 'The percentage of documents in the top k results that are relevant.'
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #11
0
class _INST(measures.Measure):
    """
    INST, a variant of INSQ

::

     @inproceedings{10.1145/2766462.2767728,
       author = {Bailey, Peter and Moffat, Alistair and Scholer, Falk and Thomas, Paul},
       title = {User Variability and IR System Evaluation},
       year = {2015},
       booktitle = {Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval},
       pages = {625–634},
       series = {SIGIR '15},
       url = {https://doi.org/10.1145/2766462.2767728}
     }
    """
    __name__ = 'INST'
    NAME = __name__
    PRETTY_NAME = 'INST'
    SHORT_DESC = 'An improved version of INSQ that better handles when either no documents or all retrieved documents are relevant.'
    SUPPORTED_PARAMS = {
        'T':
        measures.ParamInfo(dtype=float,
                           default=1.0,
                           desc='total desired gain (normalized)'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #12
0
class _INSQ(measures.Measure):
    """
    INSQ

::

     @inproceedings{Moffat:2012:MMI:2407085.2407092,
       author = {Moffat, Alistair and Scholer, Falk and Thomas, Paul},
       title = {Models and Metrics: IR Evaluation As a User Process},
       booktitle = {Proceedings of the Seventeenth Australasian Document Computing Symposium},
       year = {2012},
       url = {http://doi.acm.org/10.1145/2407085.2407092}
     }
    """
    __name__ = 'INSQ'
    NAME = __name__
    PRETTY_NAME = 'INSQ'
    SHORT_DESC = 'A weighted precision measure based on the conditional probability of the user continuing to the next item.'
    SUPPORTED_PARAMS = {
        'T':
        measures.ParamInfo(dtype=float,
                           default=1.0,
                           desc='total desired gain (normalized)'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #13
0
class _nDCG(measures.Measure):
    """
    The normalized Discounted Cumulative Gain (nDCG).
    Uses graded labels - systems that put the highest graded documents at the top of the ranking.
    It is normalized wrt. the Ideal NDCG, i.e. documents ranked in descending order of graded label.

::

    @article{Jarvelin:2002:CGE:582415.582418,
      author = {J\"{a}rvelin, Kalervo and Kek\"{a}l\"{a}inen, Jaana},
      title = {Cumulated Gain-based Evaluation of IR Techniques},
      journal = {ACM Trans. Inf. Syst.},
      volume = {20},
      number = {4},
      year = {2002},
      pages = {422--446},
      numpages = {25},
      url = {http://doi.acm.org/10.1145/582415.582418},
    }
    """
    __name__ = 'nDCG'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff': measures.ParamInfo(dtype=int, required=False, desc='ranking cutoff threshold'),
        'dcg': measures.ParamInfo(dtype=str, choices=['log2', 'exp-log2'], default='log2', desc='DCG formulation')
    }
Beispiel #14
0
class _RR(measures.Measure):
    """
    The [Mean] Reciprocal Rank ([M]RR) is a precision-focused measure that scores based on the reciprocal of the rank of the
    highest-scoring relevance document. An optional cutoff can be provided to limit the
    depth explored. rel (default 1) controls which relevance level is considered relevant.

::

    @article{kantor2000trec,
      title={The TREC-5 Confusion Track},
      author={Kantor, Paul and Voorhees, Ellen},
      journal={Information Retrieval},
      volume={2},
      number={2-3},
      pages={165--176},
      year={2000}
    }
    """
    __name__ = 'RR'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=False,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #15
0
class _INSQ(measures.Measure):
    """
    INSQ, a variant of INST

::

     @inproceedings{Moffat:2015:IAM:2838931.2838938,
       author = {Moffat, Alistair and Bailey, Peter and Scholer, Falk and Thomas, Paul},
       title = {INST: An Adaptive Metric for Information Retrieval Evaluation},
       booktitle = {Proceedings of the 20th Australasian Document Computing Symposium},
       year = {2015},
       url = {http://doi.acm.org/10.1145/2838931.2838938}
     }
    """
    __name__ = 'INSQ'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'T':
        measures.ParamInfo(dtype=float, default=1.0, desc='TODO'),
        'min_rel':
        measures.ParamInfo(dtype=int,
                           default=0,
                           desc='minimum relevance score'),
        'max_rel':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='maximum relevance score'),
    }
Beispiel #16
0
class _RBP(measures.Measure):
    """
    The Rank-Biased Precision (RBP).

::

     @article{Moffat:2008:RPM:1416950.1416952,
       author = {Moffat, Alistair and Zobel, Justin},
       title = {Rank-biased Precision for Measurement of Retrieval Effectiveness},
       journal = {ACM Trans. Inf. Syst.},
       year = {2008},
       url = {http://doi.acm.org/10.1145/1416950.1416952}
     }
    """
    __name__ = 'RBP'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=False,
                           desc='ranking cutoff threshold'),
        'p':
        measures.ParamInfo(dtype=float, default=0.8, desc='persistence'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            required=False,
            desc=
            'minimum relevance score to be considered relevant (inclusive), or NOT_PROVIDED to use graded relevance'
        )
    }
Beispiel #17
0
class _SetF(measures.Measure):
    """
    The Set F measure (SetF); i.e., the harmonic mean of SetP and SetR
    """
    __name__ = 'SetF'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'rel': measures.ParamInfo(dtype=int, default=1, desc='minimum relevance score to be considered relevant (inclusive)'),
        'beta': measures.ParamInfo(dtype=float, default=1., desc='relative importance of R to P in the harmonic mean'),
    }
Beispiel #18
0
class _SetP(measures.Measure):
    """
    The Set Precision (SetP); i.e., the number of relevant docs divided by the total number retrieved
    """
    __name__ = 'SetP'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'rel': measures.ParamInfo(dtype=int, default=1, desc='minimum relevance score to be considered relevant (inclusive)'),
        'relative': measures.ParamInfo(dtype=bool, default=False, desc='calculate the measure using the maximum possible SetP for the provided result size'),
    }
Beispiel #19
0
class _StRecall(measures.BaseMeasure):
    """
    Subtopic recall (the number of subtopics covered by the top k docs)
    """
    __name__ = 'StRecall'
    NAME = __name__
    PRETTY_NAME = 'Subtopic Recall at k'
    SHORT_DESC = 'The percentage of subtopics covered by the top k documents.'
    SUPPORTED_PARAMS = {
        'cutoff': measures.ParamInfo(dtype=int, required=False, desc='ranking cutoff threshold'),
        'rel': measures.ParamInfo(dtype=int, default=1, desc='minimum relevance score to be considered relevant (inclusive)'),
    }
Beispiel #20
0
class _AP_IA(measures.BaseMeasure):
    """
    Intent-aware (Mean) Average Precision
    """
    __name__ = 'AP_IA'
    NAME = __name__
    PRETTY_NAME = 'Intent-Aware (Mean) Average Precision'
    SHORT_DESC = 'A version of AP that accounts for multiple possible query intents.'
    SUPPORTED_PARAMS = {
        'rel': measures.ParamInfo(dtype=int, default=1, desc='minimum relevance score to be considered relevant (inclusive)'),
        'judged_only': measures.ParamInfo(dtype=bool, default=False, desc='calculate measure using only judged documents (i.e., discard unjudged documents)'),
    }
Beispiel #21
0
class _Rprec(measures.Measure):
    """
    The precision at R, where R is the number of relevant documents for a given query. Has the cute property that
    it is also the recall at R.

::

    @misc{Buckley2005RetrievalSE,
      title={Retrieval System Evaluation},
      author={Chris Buckley and Ellen M. Voorhees},
      annote={Chapter 3 in TREC: Experiment and Evaluation in Information Retrieval},
      howpublished={MIT Press},
      year={2005}
    }
    """
    __name__ = 'Rprec'
    NAME = __name__
    PRETTY_NAME = 'Precsion at R'
    SHORT_DESC = 'Precsion at R, where R is the number of relevant documents for a given query.'
    SUPPORTED_PARAMS = {
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #22
0
class _SetR(measures.Measure):
    """
    The Set Recall (SetR); i.e., the number of relevant docs divided by the total number of relevant documents
    """
    __name__ = 'SetR'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'rel': measures.ParamInfo(dtype=int, default=1, desc='minimum relevance score to be considered relevant (inclusive)')
    }
Beispiel #23
0
class _SetAP(measures.Measure):
    """
    The unranked Set AP (SetAP); i.e., SetP * SetR
    """
    __name__ = 'SetAP'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'rel': measures.ParamInfo(dtype=int, default=1, desc='minimum relevance score to be considered relevant (inclusive)'),
    }
Beispiel #24
0
class _IPrec(measures.Measure):
    """
    Interpolated Precision at a given recall cutoff. Used for building precision-recall graphs.
    Unlike most measures, where @ indicates an absolute cutoff threshold, here @ sets the recall
    cutoff.
    """
    __name__ = 'IPrec'
    NAME = __name__
    AT_PARAM = 'recall'
    SUPPORTED_PARAMS = {
        'recall':
        measures.ParamInfo(dtype=float, required=True,
                           desc='recall threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #25
0
class _R(measures.Measure):
    """
    Recall@k (R@k). The fraction of relevant documents for a query that have been retrieved by rank k.

    NOTE: Some tasks define Recall@k as whether any relevant documents are found in the top k results.
    This software follows the TREC convention and refers to that measure as Success@k.
    """
    __name__ = 'R'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #26
0
class _Success(measures.Measure):
    """
    1 if a document with at least rel relevance is found in the first cutoff documents, else 0.

    NOTE: Some refer to this measure as Recall@k. This software follows the TREC convention, where
    Recall@k is defined as the proportion of known relevant documents retrieved in the top k results.
    """
    __name__ = 'Success'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='ranking cutoff threshold'),
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        )
    }
Beispiel #27
0
class _AP_IA(measures.BaseMeasure):
    """
    Intent-aware (Mean) Average Precision
    """
    __name__ = 'AP_IA'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'rel':
        measures.ParamInfo(
            dtype=int,
            default=1,
            desc='minimum relevance score to be considered relevant (inclusive)'
        ),
        'judged_only':
        measures.ParamInfo(
            dtype=bool,
            default=False,
            desc=
            'calculate measure using only judged documents (i.e., discard unjudged documents)'
        ),
    }
Beispiel #28
0
class _Judged(measures.Measure):
    """
    Percentage of results in the top k (cutoff) results that have relevance judgments. Equivalent to P@k with
    a rel lower than any judgment.
    """
    __name__ = 'Judged'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff':
        measures.ParamInfo(dtype=int,
                           required=True,
                           desc='ranking cutoff threshold'),
    }
Beispiel #29
0
class _ERR(measures.Measure):
    """
    The Expected Reciprocal Rank (ERR) is a precision-focused measure.
    In essence, an extension of reciprocal rank that encapsulates both graded relevance and
    a more realistic cascade-based user model of how users brwose a ranking.
    """
    __name__ = 'ERR'
    NAME = __name__
    PRETTY_NAME = 'Expected Reciprocal Rank'
    SHORT_DESC = 'An extension of Reciprocal Rank that accounts for both graded relevance and a more realistic user model.'
    SUPPORTED_PARAMS = {
        'cutoff': measures.ParamInfo(dtype=int, required=False, desc='ranking cutoff threshold'),
    }
Beispiel #30
0
class _BPM(measures.Measure):
    """
    The Bejeweled Player Model (BPM).

::

     @inproceedings{Zhang:2017:EWS:3077136.3080841,
       author = {Zhang, Fan and Liu, Yiqun and Li, Xin and Zhang, Min and Xu, Yinghui and Ma, Shaoping},
       title = {Evaluating Web Search with a Bejeweled Player Model},
       booktitle = {SIGIR},
       year = {2017},
       url = {http://doi.acm.org/10.1145/3077136.3080841}
     }
    """
    __name__ = 'BPM'
    NAME = __name__
    SUPPORTED_PARAMS = {
        'cutoff': measures.ParamInfo(dtype=int, required=True, desc='ranking cutoff threshold'),
        'T': measures.ParamInfo(dtype=float, default=1., desc='total desired gain (normalized)'),
        'min_rel': measures.ParamInfo(dtype=int, default=0, desc='minimum relevance score'),
        'max_rel': measures.ParamInfo(dtype=int, required=True, desc='maximum relevance score'),
    }