def __init__(self, p, d, q, qrelname, fitted_vec, rank_dir, method, out_dir, is_binary=True): """ init the opt process :param p: persistance values :param d: considered pooling depth. :param q: qid. :param qrelname: qrel name :param fitted_vec: fitted_vector for method :param rank_dir: dir of rank mat :param method: method idx :param out_dir: output dir :param: is_binary: True """ threading.Thread.__init__(self) self._outname = out_dir + "opt-weight-" + str(method) + ".txt" self._rmse = out_dir + "opt-rmse-" + str(method) + ".txt" self._k = d self._q = q self._qrel = Qrel(qrelname).get_rel_by_qid(q) self._p = p tmp_rank_mat, self._runnum = futils.read_csv_to_dict( rank_dir + str(q) + "-rank.txt", is_prob=False) self._rank_bg = fitted_vec self._rbp = np.zeros(self._runnum) self._bg_vectors = np.zeros((self._k, self._runnum, self._runnum)) self._bg_rbp = np.zeros((self._k, self._runnum)) self._binary = is_binary # load the rank matrix for k, v in tmp_rank_mat.iteritems(): tmp_v = np.array(v) # convert to np array for processing. is_judged = False curr_rel = 0 if k in self._qrel: if self._qrel[k] > 0: curr_rel = 1 if self._binary else self._qrel[k] is_judged = True if min(tmp_v) < self._k and max( tmp_v ) > -1: # this document is retrieved by one of the system tmp = self._rank_bg[tmp_v] for i in range(0, len(tmp_v)): if 0 <= tmp_v[i] < self._k: self._rbp[i] += curr_rel * np.power(self._p, tmp_v[i]) self._bg_rbp[tmp_v[i], i] = curr_rel * np.power( self._p, tmp_v[i]) if is_judged: self._bg_vectors[ tmp_v[i], i, :] = tmp # set the fitted vector to judged documents
def sep_stratum(first_stratum, fname): """ seperate the two-strata sampled qrels :param fname: :return: """ qrel_str = [] first_qrel = Qrel(first_stratum) sampled_qrel = Qrel(fname) qid = first_qrel.get_qid() for i in range(0, len(qid)): curr_qrel = first_qrel.get_rel_by_qid(qid[i]) curr_sampled_qrel = sampled_qrel.get_rel_by_qid(qid[i]) for doc, rel in curr_sampled_qrel.iteritems(): if doc not in curr_qrel: qrel_str.append( str(qid[i]) + " 0 " + doc + " " + str(curr_sampled_qrel[doc])) with open(fname + "-2s", "w") as fout: for line in qrel_str: fout.write(line + "\n")
def main(argv): method = 3 qrelfile = "" depth = 10 collection = "rob04" # pd = 100 # qid = 651 try: opts, args = getopt.getopt(argv, "j:d:c:h", ["jfile", "depth", "collection"]) except getopt.GetoptError: print('-j <qrelfile> -d <depth> -h help') sys.exit(2) for opt, arg in opts: if opt == '-h': print('-j <qrelfile> -c <collection> -d <depth> -h help') sys.exit() elif opt in ("-j", "--jfile"): qrelfile = arg elif opt in ("-d", "--d"): depth = int(arg) elif opt in ("-c", "--c"): collection = arg # if collection == "tb06": # pd = 50 # elif collection == "tb04": # pd = 80 prifix_dir = "testcase/" rank_dir = prifix_dir + collection + "/doc_rank/" fit_dir = prifix_dir + collection + "/background_gain/fit/origin/" + str( depth) + "/" out_dir = prifix_dir + collection + "/background_gain/opt_score/" + str( depth) + "/" curr_qrel = Qrel(qrelfile) qid = curr_qrel.get_qid() tlist = [] p = 0.95 for q in qid: fit_mat = np.loadtxt(fit_dir + str(q) + ".txt", delimiter=" ", dtype=float) for i in range(1, 5): curr_opt = RBPOpt(p, 1000, q, qrelfile, fit_mat[:, i], rank_dir, i, out_dir) curr_opt.start() tlist.append(curr_opt) for t in tlist: t.join() get_doc_prob(qid, out_dir, rank_dir, fit_dir)
def __init__(self, qrelname, d, gname, gidx): """ init Chao92 estimator :param qrelname: qrel name :param d: considered pooling depth """ self._d = d self._qrel = Qrel(qrelname) self._qid = self._qrel.get_qid() self._isout = False if len(gidx) > 0: self._gname = gname self._gidx = np.array(gidx) self._isout = True
def main(argv): qrelfile = "" depth = 10 collection = "robust" # pd = 100 try: opts, args = getopt.getopt(argv, "j:d:hc:", ["runf", "jfile", "depth"]) except getopt.GetoptError: print('-r <runlist> -j <qrelfile> -d <depth> -h help') sys.exit(2) for opt, arg in opts: if opt == '-h': print('-r <runlist> -j <qrelfile> -o <output> -d <depth> -h help') sys.exit() elif opt in ("-j", "--jfile"): qrelfile = arg elif opt in ("-d", "--d"): depth = int(arg) elif opt in ("-c", "--c"): collection = arg # if collection == "tb06": # pd = 50 # elif collection == "tb04": # pd = 80 prifix_dir = "testcase/" rank_dir = prifix_dir + collection + "/doc_rank/" fit_dir = prifix_dir + collection + "/background_gain/fit/origin/" + str( depth) + "/" out_dir = prifix_dir + collection + "/background_gain/sample_rbp/hybrid/" + str( depth) + "/" curr_qrel = Qrel(qrelfile) result_list = [None] * 4 t_list = [] qid = curr_qrel.get_qid() w_param = [None] * 4 for q in range(0, len(qid)): for i in range(0, 4): result_list[i] = HybridOpt(0.95, 1000, qid[q], curr_qrel.get_rel_by_qid(qid[q]), out_dir, rank_dir, (depth, i)) result_list[i].start() t_list.append(result_list[i]) for t in t_list: t.join() for i in range(0, 4): w_param[i] = result_list[i].res get_doc_prob(out_dir, w_param, qid[q], depth)
def main(argv): runfile = "" qrelfile = "" collection = "" dump_rel = False pd = 100 try: opts, args = getopt.getopt( argv, "r:c:j:d:bh", ["runlist", "collection", "qrelfile", "depth" "backgain", "help"]) except getopt.GetoptError: print('-j <qrelfile> -c <collection> -j <qrelfile> ' '-d <pooling depth> -b backgain -h help') sys.exit(2) for opt, arg in opts: if opt == '-h': print('-j <qrelfile> -c <collection> -j <qrelfile> ' '-d <pooling depth> -b backgain -h help') sys.exit() elif opt in ("-j", "--jfile"): qrelfile = arg elif opt in ("-c", "--collection"): collection = arg elif opt in ("-r", "--run list"): runfile = arg elif opt in ("-b", "--backgain"): dump_rel = True elif opt in ("-d", "--depth"): pd = int(arg) cstr = collection dirstr = cstr # director of runs ## qrels = Qrel(qrelfile) runlist = [] with open(runfile, 'rb') as fin: for rname in fin: runlist.append(Qres(dirstr + rname.strip())) # start to dump rank dump_rank = DumpRank(runlist=runlist, qrels=qrels) if not dump_rel: out_pref = "doc_rank/" dump_rank.dump_rank(out_pref) else: out_pref = "rank_rel/" dump_rank.dump_rel(out_pref, pd)