# Embedded Query Expansion m_list = np.linspace(4, 4, num=1) m = 1 interpolated_aplpha_list = np.linspace(0, 1.0, num=11) word2vec = word2vec_model.word2vec_model(word_emb_path) embd = EmbeddedBased(query_wordcount, collection, word2vec) evaluate_model = EvaluateModel(relevance_path) EQE1 = [] EQE2 = [] print "Embedded..." # Embedding-based system (hyperparameter) tmp_eqe1 = embd.embedded_query_expansion_ci(0.4, 4) tmp_eqe2 = embd.embedded_query_expansion_qi(0.4, 4) tmp_eqe1 = ProcDoc.modeling(tmp_eqe1, background_model, query_lambda) tmp_eqe2 = ProcDoc.modeling(tmp_eqe2, background_model, query_lambda) EQE1.append([ProcDoc.dict2np(tmp_eqe1), tmp_eqe1]) EQE2.append([ProcDoc.dict2np(tmp_eqe2), tmp_eqe2]) Pickle.dump(EQE1, open("model/eqe1_10.pkl", "wb"), True) Pickle.dump(EQE2, open("model/eqe2_10.pkl", "wb"), True) ''' EQE1 = Pickle.load(open("model/eqe1_10.pkl", "rb")) EQE2 = Pickle.load(open("model/eqe2_10.pkl", "rb")) ''' # query process print "query ..." query_docs_point_fb = {} query_model_fb = {} mAP_list = []
collection_word_sum = 1.0 * ProcDoc.word_sum(collection) general_model = {k: v / collection_word_sum for k, v in collection.items()} # HMMTraingSet HMMTraingSetDict = ProcDoc.read_relevance_dict() # query model query = ProcDoc.read_file(query_path) query = ProcDoc.query_preprocess(query) query_wordcount = {} for q, q_content in query.items(): query_wordcount[q] = ProcDoc.word_count(q_content, {}) query_unigram = ProcDoc.unigram(query_wordcount) query_model = ProcDoc.modeling(query_unigram, background_model, query_lambda) ''' for q, w_uni in query_model.items(): if q in HMMTraingSetDict: continue else: query_model.pop(q, None) print(len(query_model.keys())) ''' # query process print("query ...") assessment = evaluate.evaluate_model(False) query_docs_point_fb = {} query_model_fb = {} mAP_list = []