def __init__(self, field='contents'): Jclass = autoclass('io.anserini.ltr.feature.DocSize') self.extractor = Jclass(field)
def __init__(self, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.ProbalitySum') self.extractor = Jclass(field, qfield)
def __init__(self, path, field, tag, qfield): Jclass = autoclass('io.anserini.ltr.feature.IbmModel1') self.extractor = Jclass(path, field, tag, qfield)
def __init__(self, index_dir, worker_num=1): JFeatureExtractorUtils = autoclass('io.anserini.ltr.FeatureExtractorUtils') self.utils = JFeatureExtractorUtils(index_dir, worker_num) self.feature_name = []
def __init__(self, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.NormalizedTfIdf') self.extractor = Jclass(field, qfield)
def __init__(self, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.QueryCoverageRatio') self.extractor = Jclass(field, qfield)
""" import logging from typing import Dict, List, Optional, Union from ._base import Document, JQuery, JQueryGenerator from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap, JString from pyserini.trectools import TrecRun from pyserini.fusion import FusionMethod, reciprocal_rank_fusion from pyserini.util import download_prebuilt_index, get_sparse_indexes_info logger = logging.getLogger(__name__) # Wrappers around Anserini classes JSimpleSearcher = autoclass('io.anserini.search.SimpleSearcher') JSimpleSearcherResult = autoclass('io.anserini.search.SimpleSearcher$Result') class SimpleSearcher: """Wrapper class for ``SimpleSearcher`` in Anserini. Parameters ---------- index_dir : str Path to Lucene index directory. """ def __init__(self, index_dir: str): self.index_dir = index_dir self.object = JSimpleSearcher(JString(index_dir))
def __init__(self): Jclass = autoclass('io.anserini.ltr.MaxMinRatioPooler') self.extractor = Jclass()
def __init__(self, sublinear, pooler, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.TfIdfStat') JBoolean = autoclass('java.lang.Boolean') self.extractor = Jclass(JBoolean(sublinear), pooler.extractor, field, qfield)
def __init__(self): Jclass = autoclass('io.anserini.ltr.MedianPooler') self.extractor = Jclass()
def __init__(self): Jclass = autoclass('io.anserini.ltr.ConfidencePooler') self.extractor = Jclass()
def __init__(self, gap=8, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.OrderedQueryPairs') self.extractor = Jclass(gap, field, qfield)
def qld(mu=1000): return autoclass('org.apache.lucene.search.similarities.LMDirichletSimilarity')(mu)
def bm25(k1=0.9, b=0.4): return autoclass('org.apache.lucene.search.similarities.BM25Similarity')(k1, b)
def __init__(self, qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.QueryLength') self.extractor = Jclass(qfield)
def __init__(self, pooler, k1=0.9, b=0.4, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.BM25Stat') self.extractor = Jclass(pooler.extractor, k1, b, field, qfield)
def __init__(self, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.SumMatchingTF') self.extractor = Jclass(field, qfield)
def __init__(self, pooler, field='contents', qfield='analyzed'): Jclass = autoclass('io.anserini.ltr.feature.DfrGl2Stat') self.extractor = Jclass(pooler.extractor, field, qfield)
def __init__(self,filename,tag): Jclass = autoclass('io.anserini.ltr.feature.RunList') self.extractor = Jclass(filename,tag)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import argparse import json import math import struct import subprocess import sys sys.path.append('.') from multiprocessing.pool import ThreadPool from pyserini.pyclass import autoclass, JString from typing import List, Set, Dict JSimpleSearcher = autoclass('io.anserini.search.SimpleSearcher') JIndexReader = autoclass('io.anserini.index.IndexReaderUtils') JTerm = autoclass('org.apache.lucene.index.Term') SELF_TRAN = 0.35 MIN_PROB = 0.0025 LAMBDA_VALUE = 0.3 MIN_COLLECT_PROB = 1e-9 def normalize(scores: List[float]): low = min(scores) high = max(scores) width = high - low if width != 0: return [(s - low) / width for s in scores]