def __init__(self, p, d, q, qrels, out_dir, rank_dir, method, is_binary=True): """ init the hybrid process. :param p: rbp parameter :param d: evalutation depth. 1000. :param q: query id :param qrels: qrels Qrel :param out_dir: output dir :param rank_dir: rank mat dir :param method: (opt_score or opt_doc , depth) :param is_binary: """ threading.Thread.__init__(self) self._method = method self._k = d self._q = q self._qrel = qrels self._p = p self._binary = is_binary # Get the first round evaluation and the number of fitting method considered self._estimate, self._mnum = futils.read_csv_to_dict( HybridOpt.get_dir_str(out_dir, method) + str(q) + "-prob.txt", is_prob=True) self._rank_mat, self._runnum = futils.read_csv_to_dict( rank_dir + str(q) + "-rank.txt", is_prob=False) self.res = None # optimization results, weighting parameters self._mnum -= 1 # remove constant method self._doc_rel = np.zeros((self._k, self._runnum, self._mnum)) self._rbp = np.zeros(self._runnum) self._bg_rbp = np.zeros((self._k, self._runnum)) # # load the rank matrix for k, v in self._rank_mat.iteritems(): tmp_v = np.array(v) curr_rel = 0 is_judged = False if k in self._qrel: if self._qrel[k] > 0: curr_rel = 1 if self._binary else self._qrel[k] is_judged = True if min(tmp_v) < self._k and max( tmp_v ) > -1: # this document is retrieved by one of the system for i in range(0, len(tmp_v)): if 0 <= tmp_v[i] < self._k: self._rbp[i] += curr_rel * np.power(self._p, tmp_v[i]) self._bg_rbp[tmp_v[i], i] = curr_rel * np.power( self._p, tmp_v[i]) if is_judged: self._doc_rel[tmp_v[i], i, :] = np.array( self._estimate[k][1:])
def get_doc_prob(out_dir, res, q, depth): with open(HybridOpt.get_dir_str(out_dir, (depth, -1)) + "rmse.txt", 'a') as fout: curr_str = str(q) for i in range(0, len(res)): curr_str += ",{:.4f}".format(res[i].fun) fout.write(curr_str.strip() + "\n") with open(HybridOpt.get_dir_str(out_dir, (depth, -1)) + "param.txt", 'a') as fout: curr_str = str(q) for i in range(0, len(res)): curr_w = res[i].x for j in range(0, len(res[i].x)): curr_str += ",{:.4f}".format(curr_w[j]) fout.write(curr_str.strip() + "\n") doc_prob = defaultdict(list) for i in range(0, len(res)): curr_dict, mnum = futils.read_csv_to_dict( HybridOpt.get_dir_str(out_dir, (depth, i)) + str(q) + "-prob.txt", is_prob=True) mnum -= 1 for k, v in curr_dict.iteritems(): if k not in doc_prob: doc_prob[k] = [] tmp_prob = np.array(curr_dict[k][1:]) doc_prob[k].append(np.dot(res[i].x, tmp_prob)) with open( HybridOpt.get_dir_str(out_dir, (depth, -1)) + str(q) + "-prob.txt", "a") as fout: for k, v in doc_prob.iteritems(): curr_str = str(k) for j in range(0, len(v)): curr_str += ", {:.4f}".format(v[j]) fout.write(curr_str.strip() + "\n")
def __init__(self, p, d, q, qrelname, fitted_vec, rank_dir, method, out_dir, is_binary=True): """ init the opt process :param p: persistance values :param d: considered pooling depth. :param q: qid. :param qrelname: qrel name :param fitted_vec: fitted_vector for method :param rank_dir: dir of rank mat :param method: method idx :param out_dir: output dir :param: is_binary: True """ threading.Thread.__init__(self) self._outname = out_dir + "opt-weight-" + str(method) + ".txt" self._rmse = out_dir + "opt-rmse-" + str(method) + ".txt" self._k = d self._q = q self._qrel = Qrel(qrelname).get_rel_by_qid(q) self._p = p tmp_rank_mat, self._runnum = futils.read_csv_to_dict( rank_dir + str(q) + "-rank.txt", is_prob=False) self._rank_bg = fitted_vec self._rbp = np.zeros(self._runnum) self._bg_vectors = np.zeros((self._k, self._runnum, self._runnum)) self._bg_rbp = np.zeros((self._k, self._runnum)) self._binary = is_binary # load the rank matrix for k, v in tmp_rank_mat.iteritems(): tmp_v = np.array(v) # convert to np array for processing. is_judged = False curr_rel = 0 if k in self._qrel: if self._qrel[k] > 0: curr_rel = 1 if self._binary else self._qrel[k] is_judged = True if min(tmp_v) < self._k and max( tmp_v ) > -1: # this document is retrieved by one of the system tmp = self._rank_bg[tmp_v] for i in range(0, len(tmp_v)): if 0 <= tmp_v[i] < self._k: self._rbp[i] += curr_rel * np.power(self._p, tmp_v[i]) self._bg_rbp[tmp_v[i], i] = curr_rel * np.power( self._p, tmp_v[i]) if is_judged: self._bg_vectors[ tmp_v[i], i, :] = tmp # set the fitted vector to judged documents
def get_doc_prob(qid, out_dir, rank_dir, fitted_dir, m=4): """ output final estimation based on the weighting param :param qrelname: qrel name :param out_dir: output dir, same as the previous used one :param rank_dir: rank-mat dir :param fitted_dir: fitted vector dir :param m: number of method :return: """ runnum = 100 param_mat = np.zeros((m, len(qid), runnum)) # shrink later for i in range(0, m): curr_mat = np.loadtxt(out_dir + "opt-weight-" + str(i + 1) + ".txt", delimiter=",", dtype=float) if runnum >= curr_mat.shape[1]: runnum = curr_mat.shape[1] - 1 param_mat = param_mat[:, :, 0:runnum] param_mat[i, :, :] = curr_mat[:, 1:] for q in range(0, len(qid)): doc_prob = defaultdict(list) rank_mat, runnum = futils.read_csv_to_dict(rank_dir + str(qid[q]) + "-rank.txt", is_prob=False) fit_mat = np.loadtxt(fitted_dir + str(qid[q]) + ".txt", delimiter=" ", dtype=float) for doc, rank in rank_mat.iteritems(): if doc not in doc_prob: doc_prob[doc] = [0] * m for i in range(0, m): curr_gain = fit_mat[:, i + 1] tmp_gain = curr_gain[np.array(rank)] tmp_gain[tmp_gain == 0] = 10**-6 doc_prob[doc][i] = np.exp( np.dot(param_mat[i, q, :], np.log(tmp_gain))) with open(out_dir + str(qid[q]) + "-prob.txt", "a") as fout: for k, v in doc_prob.items(): curr_str = str(k) for p in v: curr_str += ", {:.4f}".format(p) fout.write(curr_str.strip() + "\n")