def __init__(self, ranker, prels, anserini, index, topn=10): AbstractQExpander.__init__(self, replace=False, topn=topn) self.prels = prels self.f = None self.anserini = anserini self.index = index self.ranker = ranker
def build(input, expanders, rankers, metrics, output): base_model_name = AbstractQExpander().get_model_name() df = pd.read_csv(input) ds_df = df.iloc[:, :1 + 1 + len(rankers) * len(metrics)] #the original query info ds_df['star_model_count'] = 0 for idx, row in df.iterrows(): star_models = dict() for model in expanders: model_name = model.get_model_name() if model_name == base_model_name: continue flag = True sum = 0 for ranker in rankers: for metric in metrics: v = df.loc[idx, '{}.{}.{}'.format( model_name, ef.get_ranker_name(ranker), metric)] v = v if not pd.isna(v) else 0 v0 = df.loc[idx, '{}.{}.{}'.format( base_model_name, ef.get_ranker_name(ranker), metric)] v0 = v0 if not pd.isna(v0) else 0 if v <= v0: flag = False break sum += v**2 if flag: star_models[model] = sum if len(star_models) > 0: ds_df.loc[idx, 'star_model_count'] = len(star_models.keys()) star_models_sorted = { k: v for k, v in sorted(star_models.items(), key=lambda item: item[1], reverse=True) } for i, star_model in enumerate(star_models_sorted.keys()): ds_df.loc[idx, '{}.{}'.format('method', i + 1)] = star_model.get_model_name() ds_df.loc[idx, '{}.{}'.format('metric', i + 1)] = math.sqrt( star_models[star_model]) ds_df.loc[idx, '{}.{}'.format('query', i + 1)] = df.loc[ idx, '{}'.format(star_model.get_model_name())] else: ds_df.loc[idx, 'star_model_count'] = 0 filename = '{}.{}.{}.dataset.csv'.format( output, '.'.join([ef.get_ranker_name(r) for r in rankers]), '.'.join(metrics)) ds_df.to_csv(filename, index=False) return filename
def get_nrf_expanders(): expanders = [ AbstractQExpander(), Thesaurus(), Wordnet(), Word2Vec('../pre/wiki-news-300d-1M.vec'), Glove('../pre/glove.6B.300d'), Anchor(anchorfile='../pre/anchor_text_en.ttl', vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec'), Wiki('../pre/temp_model_Wiki'), Tagmee(), SenseDisambiguation(), Conceptnet(), Thesaurus(replace=True), Wordnet(replace=True), Word2Vec('../pre/wiki-news-300d-1M.vec', replace=True), Glove('../pre/glove.6B.300d', replace=True), Anchor(anchorfile='../pre/anchor_text_en.ttl', vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec', replace=True), Wiki('../pre/temp_model_Wiki', replace=True), Tagmee(replace=True), SenseDisambiguation(replace=True), Conceptnet(replace=True), Stem(KrovetzStemmer(jarfile='stemmers/kstem-3.4.jar')), Stem(LovinsStemmer()), Stem(PaiceHuskStemmer()), Stem(PorterStemmer()), Stem(Porter2Stemmer()), Stem(SRemovalStemmer()), Stem(Trunc4Stemmer()), Stem(Trunc5Stemmer()), # since RF needs index and search output which depends on ir method and topics database, we cannot add this here. Instead, we run it individually # RF assumes that there exist abstractqueryexpansion files ] return expanders
def __init__(self, replace=False): AbstractQExpander.__init__(self, replace)
def __init__(self, vectorfile, replace=False, topn=3): AbstractQExpander.__init__(self, replace, topn) Word2Vec.vectorfile = vectorfile Word2Vec.word2vec = None
def __init__(self, vectorfile, replace=False, topn=3): AbstractQExpander.__init__(self, replace, topn) Glove.vectorfile = vectorfile Glove.glove = None
def __init__(self, replace=False, topn=3): AbstractQExpander.__init__(self, replace, topn)
def get_nrf_expanders(): expanders_list = [AbstractQExpander()] if param.ReQue['expanders']['Thesaurus']: from expanders.thesaurus import Thesaurus expanders_list.append(Thesaurus()) if param.ReQue['expanders']['Thesaurus']: from expanders.thesaurus import Thesaurus expanders_list.append(Thesaurus(replace=True)) if param.ReQue['expanders']['Wordnet']: from expanders.wordnet import Wordnet expanders_list.append(Wordnet()) if param.ReQue['expanders']['Wordnet']: from expanders.wordnet import Wordnet expanders_list.append(Wordnet(replace=True)) if param.ReQue['expanders']['Word2Vec']: from expanders.word2vec import Word2Vec expanders_list.append(Word2Vec('../pre/wiki-news-300d-1M.vec')) if param.ReQue['expanders']['Word2Vec']: from expanders.word2vec import Word2Vec expanders_list.append( Word2Vec('../pre/wiki-news-300d-1M.vec', replace=True)) if param.ReQue['expanders']['Glove']: from expanders.glove import Glove expanders_list.append(Glove('../pre/glove.6B.300d')) if param.ReQue['expanders']['Glove']: from expanders.glove import Glove expanders_list.append(Glove('../pre/glove.6B.300d', replace=True)) if param.ReQue['expanders']['Anchor']: from expanders.anchor import Anchor expanders_list.append( Anchor(anchorfile='../pre/anchor_text_en.ttl', vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec')) if param.ReQue['expanders']['Anchor']: from expanders.anchor import Anchor expanders_list.append( Anchor(anchorfile='../pre/anchor_text_en.ttl', vectorfile='../pre/wiki-anchor-text-en-ttl-300d.vec', replace=True)) if param.ReQue['expanders']['Wiki']: from expanders.wiki import Wiki expanders_list.append(Wiki('../pre/temp_model_Wiki')) if param.ReQue['expanders']['Wiki']: from expanders.wiki import Wiki expanders_list.append(Wiki('../pre/temp_model_Wiki', replace=True)) if param.ReQue['expanders']['Tagmee']: from expanders.tagmee import Tagmee expanders_list.append(Tagmee()) if param.ReQue['expanders']['Tagmee']: from expanders.tagmee import Tagmee expanders_list.append(Tagmee(replace=True)) if param.ReQue['expanders']['SenseDisambiguation']: from expanders.sensedisambiguation import SenseDisambiguation expanders_list.append(SenseDisambiguation()) if param.ReQue['expanders']['SenseDisambiguation']: from expanders.sensedisambiguation import SenseDisambiguation expanders_list.append(SenseDisambiguation(replace=True)) if param.ReQue['expanders']['Conceptnet']: from expanders.conceptnet import Conceptnet expanders_list.append(Conceptnet()) if param.ReQue['expanders']['Conceptnet']: from expanders.conceptnet import Conceptnet expanders_list.append(Conceptnet(replace=True)) if param.ReQue['expanders']['KrovetzStemmer']: from stemmers.krovetz import KrovetzStemmer expanders_list.append( Stem(KrovetzStemmer(jarfile='stemmers/kstem-3.4.jar'))) if param.ReQue['expanders']['LovinsStemmer']: from stemmers.lovins import LovinsStemmer expanders_list.append(Stem(LovinsStemmer())) if param.ReQue['expanders']['PaiceHuskStemmer']: from stemmers.paicehusk import PaiceHuskStemmer expanders_list.append(Stem(PaiceHuskStemmer())) if param.ReQue['expanders']['PorterStemmer']: from stemmers.porter import PorterStemmer expanders_list.append(Stem(PorterStemmer())) if param.ReQue['expanders']['Porter2Stemmer']: from stemmers.porter2 import Porter2Stemmer expanders_list.append(Stem(Porter2Stemmer())) if param.ReQue['expanders']['SRemovalStemmer']: from stemmers.sstemmer import SRemovalStemmer expanders_list.append(Stem(SRemovalStemmer())) if param.ReQue['expanders']['Trunc4Stemmer']: from stemmers.trunc4 import Trunc4Stemmer expanders_list.append(Stem(Trunc4Stemmer())) if param.ReQue['expanders']['Trunc5Stemmer']: from stemmers.trunc5 import Trunc5Stemmer expanders_list.append(Stem(Trunc5Stemmer())) # since RF needs index and search output which depends on ir method and topics corpora, we cannot add this here. Instead, we run it individually # RF assumes that there exist abstractqueryexpansion files return expanders_list