コード例 #1
0
def calculate(pred_relevance, split_idx):
	rel_query_model = pred_relevance
	print type(rel_query_model)
	print rel_query_model.shape.eval()
	with open("query_model.pkl", "rb") as file: query_model = Pickle.load(file)[:split_idx]
	with open("query_list.pkl", "rb") as file:	query_list = Pickle.load(file)[:split_idx]

	with open("doc_model.pkl", "rb") as file: doc_model = Pickle.load(file)
	with open("doc_list.pkl", "rb") as file: doc_list = Pickle.load(file)
	#with open("relevance_model_RM.pkl", "rb") as file : rel_query_model = Pickle.load(file)
	#with open("query_relevance_model_RLE.pkl", "rb") as file : rel_query_model = Pickle.load(file)

	background_model = ProcDoc.read_background_dict()
	qry_eval = evaluate.evaluate_model(True)

	''' document smoothing '''
	for doc_idx in range(doc_model.shape[0]):
		doc_vec = doc_model[doc_idx]
		doc_model[doc_idx] = (1 - doc_lambda) * doc_vec + doc_lambda * background_model
	
	mAP_list = []
	query_rel_list = []
	query_bg_list = []	
	doc_model = np.log(doc_model)

	doc_model = doc_model
	for rel_qry_lambda in np.linspace(0, 1., num=11):
		''' query smoothing '''	
		with open("query_model.pkl", "rb") as file: query_model = Pickle.load(file)[:split_idx]
		X = T.matrix()
		Y = (1- rel_qry_lambda)*X + rel_qry_lambda * rel_query_model 
		f = theano.function([X], Y)
		query_model = f(query_model)
		result = np.argsort(-np.dot(query_model, doc_model.T), axis = 1)
		query_docs_ranking = {}
		''' speedup '''
		for q_idx in range(len(query_list)):
			docs_ranking = []
			for doc_idx in result[q_idx]:
				docs_ranking.append(doc_list[doc_idx])
			query_docs_ranking[query_list[q_idx]] = docs_ranking
		
		''' query 
		for query_key, query_vec in  zip(query_list, query_model):
			print len(query_docs_ranking.keys())
			query_result = np.argsort(-(query_vec * doc_model).sum(axis = 1))
			docs_ranking = []
			for doc_`idx in query_result:
				docs_ranking.append(doc_list[doc_idx])
				query_docs_ranking[query_key] = docs_ranking
			
		mAP = eval.mean_average_precision(query_docs_ranking)	
		print mAP, qry_lambda, rel_qry_lambda
		'''
		mAP = qry_eval.mean_average_precision(query_docs_ranking)	
		mAP_list.append(mAP)
	return max(mAP_list)
コード例 #2
0
    def __init__(self, query_model):
        self.query_model = copy.deepcopy(query_model)
        self.vocabulary_size = 51253
        smoothing = 0.1
        with open("test_query_list.pkl", "rb") as file:
            self.query_list = pickle.load(file)
        with open("doc_list.pkl", "rb") as file:
            self.doc_list = pickle.load(file)

        with open("doc_model.pkl", "rb") as file:
            doc_model = pickle.load(file)

        self.background_model = ProcDoc.read_background_dict()
        ''' smoothing '''
        for d_idx, doc_vec in enumerate(doc_model):
            doc_model[d_idx] = (
                1 - smoothing) * doc_vec + smoothing * self.background_model

        self.doc_model = doc_model
コード例 #3
0
    def __init__(self, query_model, isSpoken=False):
        smoothing = 0.0
        with open("test_query_list.pkl", "rb") as file:
            self.query_list = pickle.load(file)
        with open("doc_list.pkl", "rb") as file:
            self.doc_list = pickle.load(file)
        if isSpoken:
            with open("doc_model_wc_s.pkl", "rb") as file:
                doc_model = pickle.load(file)
        else:
            with open("doc_model_wc.pkl", "rb") as file:
                doc_model = pickle.load(file)

        background_model = ProcDoc.read_background_dict()
        self.query_model = copy.deepcopy(query_model)
        self.vocabulary_size = 51253
        self.doc_model = copy.deepcopy(doc_model)
        self.doc_model = doc_model
        self.background_model = background_model
コード例 #4
0
doc_lambda = 0.9
#remove_list = ["update_embedded_query_expansion_ci.pkl", "update_embedded_query_expansion_qi.pkl", "collection_embedded.pkl", "query_embedded.pkl", "collection_total_similarity.pkl"]
remove_list = []

document_path = "../Corpus/TDT2/SPLIT_DOC_WDID_NEW"
query_path = "../Corpus/TDT2/QUERY_WDID_NEW_middle"
word_emb_path = "data/word2vec_dict.pkl"
relevance_path = "../Corpus/TDT2/AssessmentTrainSet/AssessmentTrainSet.txt"

# document model
data = ProcDoc.read_file(document_path)
doc_wordcount = ProcDoc.doc_preprocess(data)
doc_unigram = ProcDoc.unigram(dict(doc_wordcount))
doc_mdl, doc_IDs = ProcDoc.dict2np(doc_unigram)
# background_model
background_model = ProcDoc.read_background_dict()
background_model_np = ProcDoc.read_background_np()

# document smoothing
for doc_idx in xrange(doc_mdl.shape[0]):
    doc_vec = doc_mdl[doc_idx]
    doc_mdl[doc_idx] = (
        1 - doc_lambda) * doc_vec + doc_lambda * background_model_np

# general model
collection = {}
collection_total_similarity = {}
for key, value in doc_wordcount.items():
    for word, count in value.items():
        if word in collection:
            collection[word] += count
コード例 #5
0
print 'Building a model whose optimizer=adadelta, activation function=softmax'

filepath = "Epochs/with_gamma_single_TF_Spk"
filename = []

#doc_nn.load_weights("NN_Model/DSSM_WEIGHTS_TD_54_double_gamma_shuffle.h5", by_name=True)

emb_model = load_model("NN_Model/RLE_SWLM_E_S.h5")
with open("model/UM/test_query_model_short.pkl", "rb") as file:query_model = Pickle.load(file)
with open("model/UM/test_query_list_short.pkl", "rb") as file:query_list = Pickle.load(file)
with open("model/test_query_model_short.pkl", "rb") as file:query_TF = Pickle.load(file)
with open("model/log_doc_model_s.pkl", "rb") as file:doc_model = Pickle.load(file)
with open("model/doc_list_s.pkl", "rb") as file:doc_list = Pickle.load(file)

bg_md = ProcDoc.read_background_dict()
query_model = 0.5 * query_model + 0.5 *np.array(emb_model.predict_on_batch(query_model))
query_model *= query_TF.sum(axis = 1).reshape(16, 1) * 10
#doc_model = doc_model


max_mAP = ["", 0]
evl = evaluate.evaluate_model(False)

for dir_item in os.listdir(filepath):
    # join dir path and file name
    dir_item_path = os.path.join(filepath, dir_item)
    # check whether a file exists before read
    if os.path.isfile(dir_item_path):
        dir_item_path = "Epochs/with_gamma_single_TF_Spk/54_log_gamma_shuffle_spk_weights-30-0.11.hdf5"
        #if dir_item_path.find("log") == -1: continue