Esempio n. 1
0
# Embedded Query Expansion
m_list = np.linspace(4, 4, num=1)
m = 1
interpolated_aplpha_list = np.linspace(0, 1.0, num=11)
word2vec = word2vec_model.word2vec_model(word_emb_path)

embd = EmbeddedBased(query_wordcount, collection, word2vec)
evaluate_model = EvaluateModel(relevance_path)
EQE1 = []
EQE2 = []
print "Embedded..."
# Embedding-based system (hyperparameter)
tmp_eqe1 = embd.embedded_query_expansion_ci(0.4, 4)
tmp_eqe2 = embd.embedded_query_expansion_qi(0.4, 4)
tmp_eqe1 = ProcDoc.modeling(tmp_eqe1, background_model, query_lambda)
tmp_eqe2 = ProcDoc.modeling(tmp_eqe2, background_model, query_lambda)
EQE1.append([ProcDoc.dict2np(tmp_eqe1), tmp_eqe1])
EQE2.append([ProcDoc.dict2np(tmp_eqe2), tmp_eqe2])

Pickle.dump(EQE1, open("model/eqe1_10.pkl", "wb"), True)
Pickle.dump(EQE2, open("model/eqe2_10.pkl", "wb"), True)
'''
EQE1 = Pickle.load(open("model/eqe1_10.pkl", "rb"))
EQE2 = Pickle.load(open("model/eqe2_10.pkl", "rb"))
'''
# query process
print "query ..."
query_docs_point_fb = {}
query_model_fb = {}
mAP_list = []
Esempio n. 2
0
collection_word_sum = 1.0 * ProcDoc.word_sum(collection)
general_model = {k: v / collection_word_sum for k, v in collection.items()}

# HMMTraingSet
HMMTraingSetDict = ProcDoc.read_relevance_dict()

# query model
query = ProcDoc.read_file(query_path)
query = ProcDoc.query_preprocess(query)
query_wordcount = {}

for q, q_content in query.items():
    query_wordcount[q] = ProcDoc.word_count(q_content, {})

query_unigram = ProcDoc.unigram(query_wordcount)
query_model = ProcDoc.modeling(query_unigram, background_model, query_lambda)
'''
for q, w_uni in query_model.items():
	if q in HMMTraingSetDict:
		continue
	else:
		query_model.pop(q, None)

print(len(query_model.keys()))
'''
# query process
print("query ...")
assessment = evaluate.evaluate_model(False)
query_docs_point_fb = {}
query_model_fb = {}
mAP_list = []