def initModels(I, modelType): """Init Models of type modelType or load if already computed""" model_file_name = modelType + '.p' sys.stdout.write("Creating models...") sys.stdout.flush() if os.path.isfile(model_file_name): models = pickle.load(open(model_file_name, "rb")) elif modelType == "Vectoriel": weighters = [Binary(I), TF(I), TF_IDF(I), Log(I), Log_plus(I)] models = [Vectoriel(Index, True, w) for w in weighters] pickle.dump(models, open(model_file_name, "wb")) else: print "Unknown model type ABORT THE MISSION" sys.stdout.write("Done!\n") return models
def __init__(self, N, index_file, query_file, relevance_file, model_type="Vectoriel", div_K=None, div_N=None, eval_N=20): """ model_type = Vectoriel | Okapi | Language | PageRank | MetaModel """ self.N = eval_N self.Index = initIndex(index_file) if model_type == "Vectoriel": self.models = [Vectoriel(Index, True, Log_plus(self.Index)) ] #initModels(self.Index,model_type) elif model_type == "Euclidian_model": self.models = [Euclidian_model(self.Index, Log_plus(self.Index))] elif model_type == "Language": print "Init of Language model" self.models = [LanguageModel(self.Index, 0.2)] elif model_type == "Okapi": self.models = [Okapi(self.Index)] elif model_type == "PageRank": self.models = [RankModel(self.Index)] elif model_type == "Hits": self.models = [HitsModel(self.Index)] elif model_type == "KMeans_diversity": self.models = [KMeans_diversity(self.Index, div_K, div_N)] elif model_type == "Greedy_diversity": self.models = [Greedy_diversity(self.Index, div_K, div_N)] elif model_type == "Greedy_diversity_euclidian": print "alpha, N:", div_K, div_N self.models = [ Greedy_diversity_euclidian(self.Index, alpha=div_K, N=div_N) ] elif model_type == "MetaModel": """Learning a linear combination of 4 models""" I = self.Index w1 = TF_IDF(I) model1 = Vectoriel(I, True, w1) w2 = Log_plus(I) model2 = Vectoriel(I, True, w2) #w3 = Log(I) #model3 = Vectoriel(I,True, w3) model3 = Okapi(I) f1 = FeaturerModel(I, model1) f2 = FeaturerModel(I, model2) f3 = FeaturerModel(I, model3) #f4 = FeaturerModel(I,model4) listFeaturers = FeaturerList([f1, f2, f3]) #,f4]) metamodel = MetaModel(listFeaturers, I, query_file, relevance_file) metamodel.train() self.models = [metamodel] print type(self.models[0]) self.query_file = query_file self.relevance_file = relevance_file self.query_parser = GroundTruthParser(self.query_file, self.relevance_file)
sys.stdout.write("Indexing database...") sys.stdout.flush() if os.path.isfile('Index.p'): I = pickle.load(open("Index.p", "rb")) else: parser = ParserCACM() textRepresenter = PorterStemmer() I = Index(parser, textRepresenter) I.indexation(fname) I.parser = None pickle.dump(I, open("Index.p", "wb")) sys.stdout.write("Done!\n") sys.stdout.flush() sys.stdout.write("Creating weighters...") sys.stdout.flush() if os.path.isfile('Vectoriel.p'): models = pickle.load(open("Models.p", "rb")) else: weighters = [Binary(I), Log_plus(I)] #, TF(I), TF_IDF(I), Log(I)] # Log_plus(I)] models = [Vectoriel(True, w) for w in weighters] pickle.dump(models, open("Models.p", "wb")) sys.stdout.write("Done!\n") queryExample = {'techniqu': 1, 'accelerat': 1} query_results = testQuery(queryExample, models)
def __init__(self, index_file, query_file, relevance_file,model_type="Vectoriel"): """ model_type = Vectoriel | Okapi | Language | PageRank | MetaModel """ self.Index = initIndex(index_file) if model_type == "Vectoriel": self.models = initModels(self.Index,model_type) elif model_type == "Language": print "Init of Language model" self.models = [LanguageModel(self.Index,0.2)] elif model_type == "Okapi": self.models = [Okapi(self.Index)] elif model_type == "PageRank": self.models = [RankModel(self.Index)] elif model_type == "Hits": self.models = [HitsModel(self.Index)] elif model_type == "MetaModel": """Learning a linear combination of 4 models""" I = self.Index w1 = TF_IDF(I) model1 = Vectoriel(I,True, w1) w2 = Log_plus(I) model2 = Vectoriel(I,True, w2) w3 = Binary(I) model3 = Vectoriel(I,True, w3) w4 = TF(I) model4 = Vectoriel(I,True, w4) model5 = Okapi(I) model6 = LanguageModel(I,0.2) model7 = RankModel(I,n=5, K=100,d=.85) f1 = FeaturerModel(I,model1) f2 = FeaturerModel(I,model2) f3 = FeaturerModel(I,model3) f4 = FeaturerModel(I,model4) f5 = FeaturerModel(I,model5) f6 = FeaturerModel(I,model6) f7 = FeaturerModel(I,model7) listFeaturers = FeaturerList([f1,f2,f3,f4,f5,f6,f7]) metamodel = MetaModel(listFeaturers,I,query_file,relevance_file) metamodel.train() self.models = [metamodel] elif model_type == "Random": self.models = [RandomModel(self.Index)] else: pass print type(self.models[0]) self.query_file = query_file self.relevance_file = relevance_file self.query_parser = QueryParser(self.query_file, self.relevance_file)
sys.stdout.write("Indexing database...") sys.stdout.flush() if os.path.isfile('Index.p'): I = pickle.load(open("Index.p", "rb")) else: parser = ParserCACM() textRepresenter = PorterStemmer() I = Index(parser, textRepresenter) I.indexation(fname) I.parser = None pickle.dump(I, open("Index.p", "wb")) w1 = TF_IDF(I) model1 = Vectoriel(I, True, w1) w2 = Log_plus(I) model2 = Vectoriel(I, True, w2) w3 = Log(I) model3 = Vectoriel(I, True, w3) model4 = Okapi(I) queryExample = {'techniqu': 1, 'accelerat': 1} f1 = FeaturerModel(I, model1) print "\ndone building f1" f2 = FeaturerModel(I, model2) print "\ndone building f2" f3 = FeaturerModel(I, model3) print "\ndone building f3"
# mu = pr.get_mu() # # hts = Hits(N_pgs,N_iters=100) # hts.randomWalk(A, P, Succ, Index_P, Counter_Index_P) # # print "MAX mu ",max(mu) # print "MAX a ",max(hts.get_a()) #### Graph relevant to query n = 500 K = 1000 q = None #o = Okapi(I) w = TF_IDF(I) o = Vectoriel(I, True, w) queryExample = {'techniqu': 1, 'accelerat': 1} P, Succ, Index_P, Counter_Index_P, N_pgs = select_G_q( n, K, queryExample, o, I) pr = PageRank(N_pgs, d) A = get_A(P, Succ, N_pgs) print P pr.randomWalk(A) mu = pr.get_result(Counter_Index_P) print "MAX mu ", max(mu) print "Npages", N_pgs, pr.N_pages print "Random walk for HITS" # hts = Hits(N_pgs,N_iters=100) # hts.randomWalk(P, Succ, Index_P) # a = hts.get_result(Counter_Index_P) # print a