Example #1
0
 def __init__(self, field='contents'):
     Jclass = autoclass('io.anserini.ltr.feature.DocSize')
     self.extractor = Jclass(field)
Example #2
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.ProbalitySum')
     self.extractor = Jclass(field, qfield)
Example #3
0
 def __init__(self, path, field, tag, qfield):
     Jclass = autoclass('io.anserini.ltr.feature.IbmModel1')
     self.extractor = Jclass(path, field, tag, qfield)
Example #4
0
 def __init__(self, index_dir, worker_num=1):
     JFeatureExtractorUtils = autoclass('io.anserini.ltr.FeatureExtractorUtils')
     self.utils = JFeatureExtractorUtils(index_dir, worker_num)
     self.feature_name = []
Example #5
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.NormalizedTfIdf')
     self.extractor = Jclass(field, qfield)
Example #6
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.QueryCoverageRatio')
     self.extractor = Jclass(field, qfield)
Example #7
0
"""

import logging
from typing import Dict, List, Optional, Union

from ._base import Document, JQuery, JQueryGenerator
from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap, JString
from pyserini.trectools import TrecRun
from pyserini.fusion import FusionMethod, reciprocal_rank_fusion
from pyserini.util import download_prebuilt_index, get_sparse_indexes_info

logger = logging.getLogger(__name__)


# Wrappers around Anserini classes
JSimpleSearcher = autoclass('io.anserini.search.SimpleSearcher')
JSimpleSearcherResult = autoclass('io.anserini.search.SimpleSearcher$Result')


class SimpleSearcher:
    """Wrapper class for ``SimpleSearcher`` in Anserini.

    Parameters
    ----------
    index_dir : str
        Path to Lucene index directory.
    """

    def __init__(self, index_dir: str):
        self.index_dir = index_dir
        self.object = JSimpleSearcher(JString(index_dir))
Example #8
0
 def __init__(self):
     Jclass = autoclass('io.anserini.ltr.MaxMinRatioPooler')
     self.extractor = Jclass()
Example #9
0
 def __init__(self, sublinear, pooler, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.TfIdfStat')
     JBoolean = autoclass('java.lang.Boolean')
     self.extractor = Jclass(JBoolean(sublinear), pooler.extractor, field, qfield)
Example #10
0
 def __init__(self):
     Jclass = autoclass('io.anserini.ltr.MedianPooler')
     self.extractor = Jclass()
Example #11
0
 def __init__(self):
     Jclass = autoclass('io.anserini.ltr.ConfidencePooler')
     self.extractor = Jclass()
Example #12
0
 def __init__(self, gap=8, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.OrderedQueryPairs')
     self.extractor = Jclass(gap, field, qfield)
Example #13
0
 def qld(mu=1000):
     return autoclass('org.apache.lucene.search.similarities.LMDirichletSimilarity')(mu)
Example #14
0
 def bm25(k1=0.9, b=0.4):
     return autoclass('org.apache.lucene.search.similarities.BM25Similarity')(k1, b)
Example #15
0
 def __init__(self, qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.QueryLength')
     self.extractor = Jclass(qfield)
Example #16
0
 def __init__(self, pooler, k1=0.9, b=0.4, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.BM25Stat')
     self.extractor = Jclass(pooler.extractor, k1, b, field, qfield)
Example #17
0
 def __init__(self, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.SumMatchingTF')
     self.extractor = Jclass(field, qfield)
Example #18
0
 def __init__(self, pooler, field='contents', qfield='analyzed'):
     Jclass = autoclass('io.anserini.ltr.feature.DfrGl2Stat')
     self.extractor = Jclass(pooler.extractor, field, qfield)
Example #19
0
 def __init__(self,filename,tag):
     Jclass = autoclass('io.anserini.ltr.feature.RunList')
     self.extractor = Jclass(filename,tag)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import json
import math
import struct
import subprocess
import sys
sys.path.append('.')
from multiprocessing.pool import ThreadPool
from pyserini.pyclass import autoclass, JString
from typing import List, Set, Dict

JSimpleSearcher = autoclass('io.anserini.search.SimpleSearcher')
JIndexReader = autoclass('io.anserini.index.IndexReaderUtils')
JTerm = autoclass('org.apache.lucene.index.Term')

SELF_TRAN = 0.35
MIN_PROB = 0.0025
LAMBDA_VALUE = 0.3
MIN_COLLECT_PROB = 1e-9


def normalize(scores: List[float]):
    low = min(scores)
    high = max(scores)
    width = high - low
    if width != 0:
        return [(s - low) / width for s in scores]