Ejemplo n.º 1
0
 def __init__(self,
              p,
              d,
              q,
              qrelname,
              fitted_vec,
              rank_dir,
              method,
              out_dir,
              is_binary=True):
     """
     init the opt process
     :param p: persistance values
     :param d: considered pooling depth.
     :param q: qid.
     :param qrelname: qrel name
     :param fitted_vec: fitted_vector for method
     :param rank_dir: dir of rank mat
     :param method: method idx
     :param out_dir: output dir
     :param: is_binary: True
     """
     threading.Thread.__init__(self)
     self._outname = out_dir + "opt-weight-" + str(method) + ".txt"
     self._rmse = out_dir + "opt-rmse-" + str(method) + ".txt"
     self._k = d
     self._q = q
     self._qrel = Qrel(qrelname).get_rel_by_qid(q)
     self._p = p
     tmp_rank_mat, self._runnum = futils.read_csv_to_dict(
         rank_dir + str(q) + "-rank.txt", is_prob=False)
     self._rank_bg = fitted_vec
     self._rbp = np.zeros(self._runnum)
     self._bg_vectors = np.zeros((self._k, self._runnum, self._runnum))
     self._bg_rbp = np.zeros((self._k, self._runnum))
     self._binary = is_binary
     # load the rank matrix
     for k, v in tmp_rank_mat.iteritems():
         tmp_v = np.array(v)  # convert to np array for processing.
         is_judged = False
         curr_rel = 0
         if k in self._qrel:
             if self._qrel[k] > 0:
                 curr_rel = 1 if self._binary else self._qrel[k]
             is_judged = True
         if min(tmp_v) < self._k and max(
                 tmp_v
         ) > -1:  # this document is retrieved by one of the system
             tmp = self._rank_bg[tmp_v]
             for i in range(0, len(tmp_v)):
                 if 0 <= tmp_v[i] < self._k:
                     self._rbp[i] += curr_rel * np.power(self._p, tmp_v[i])
                     self._bg_rbp[tmp_v[i], i] = curr_rel * np.power(
                         self._p, tmp_v[i])
                     if is_judged:
                         self._bg_vectors[
                             tmp_v[i],
                             i, :] = tmp  # set the fitted vector to judged documents
Ejemplo n.º 2
0
def sep_stratum(first_stratum, fname):
    """
    seperate the two-strata sampled qrels
    :param fname:
    :return:
    """
    qrel_str = []
    first_qrel = Qrel(first_stratum)
    sampled_qrel = Qrel(fname)
    qid = first_qrel.get_qid()
    for i in range(0, len(qid)):
        curr_qrel = first_qrel.get_rel_by_qid(qid[i])
        curr_sampled_qrel = sampled_qrel.get_rel_by_qid(qid[i])
        for doc, rel in curr_sampled_qrel.iteritems():
            if doc not in curr_qrel:
                qrel_str.append(
                    str(qid[i]) + " 0 " + doc + " " +
                    str(curr_sampled_qrel[doc]))
    with open(fname + "-2s", "w") as fout:
        for line in qrel_str:
            fout.write(line + "\n")
Ejemplo n.º 3
0
def main(argv):
    method = 3
    qrelfile = ""
    depth = 10
    collection = "rob04"
    # pd = 100
    # qid = 651
    try:
        opts, args = getopt.getopt(argv, "j:d:c:h",
                                   ["jfile", "depth", "collection"])
    except getopt.GetoptError:
        print('-j <qrelfile> -d <depth> -h help')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('-j <qrelfile> -c <collection> -d <depth> -h help')
            sys.exit()
        elif opt in ("-j", "--jfile"):
            qrelfile = arg
        elif opt in ("-d", "--d"):
            depth = int(arg)
        elif opt in ("-c", "--c"):
            collection = arg
    # if collection == "tb06":
    #     pd = 50
    # elif collection == "tb04":
    #     pd = 80

    prifix_dir = "testcase/"
    rank_dir = prifix_dir + collection + "/doc_rank/"
    fit_dir = prifix_dir + collection + "/background_gain/fit/origin/" + str(
        depth) + "/"
    out_dir = prifix_dir + collection + "/background_gain/opt_score/" + str(
        depth) + "/"

    curr_qrel = Qrel(qrelfile)
    qid = curr_qrel.get_qid()
    tlist = []
    p = 0.95
    for q in qid:
        fit_mat = np.loadtxt(fit_dir + str(q) + ".txt",
                             delimiter=" ",
                             dtype=float)
        for i in range(1, 5):
            curr_opt = RBPOpt(p, 1000, q, qrelfile, fit_mat[:, i], rank_dir, i,
                              out_dir)
            curr_opt.start()
            tlist.append(curr_opt)
        for t in tlist:
            t.join()
    get_doc_prob(qid, out_dir, rank_dir, fit_dir)
Ejemplo n.º 4
0
 def __init__(self, qrelname, d, gname, gidx):
     """
     init Chao92 estimator
     :param qrelname: qrel name
     :param d: considered pooling depth
     """
     self._d = d
     self._qrel = Qrel(qrelname)
     self._qid = self._qrel.get_qid()
     self._isout = False
     if len(gidx) > 0:
         self._gname = gname
         self._gidx = np.array(gidx)
         self._isout = True
Ejemplo n.º 5
0
def main(argv):
    qrelfile = ""
    depth = 10
    collection = "robust"
    # pd = 100

    try:
        opts, args = getopt.getopt(argv, "j:d:hc:", ["runf", "jfile", "depth"])
    except getopt.GetoptError:
        print('-r <runlist> -j <qrelfile> -d <depth> -h help')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('-r <runlist> -j <qrelfile> -o <output> -d <depth> -h help')
            sys.exit()
        elif opt in ("-j", "--jfile"):
            qrelfile = arg
        elif opt in ("-d", "--d"):
            depth = int(arg)
        elif opt in ("-c", "--c"):
            collection = arg
    # if collection == "tb06":
    #     pd = 50
    # elif collection == "tb04":
    #     pd = 80
    prifix_dir = "testcase/"
    rank_dir = prifix_dir + collection + "/doc_rank/"
    fit_dir = prifix_dir + collection + "/background_gain/fit/origin/" + str(
        depth) + "/"
    out_dir = prifix_dir + collection + "/background_gain/sample_rbp/hybrid/" + str(
        depth) + "/"

    curr_qrel = Qrel(qrelfile)
    result_list = [None] * 4
    t_list = []
    qid = curr_qrel.get_qid()
    w_param = [None] * 4
    for q in range(0, len(qid)):
        for i in range(0, 4):
            result_list[i] = HybridOpt(0.95, 1000, qid[q],
                                       curr_qrel.get_rel_by_qid(qid[q]),
                                       out_dir, rank_dir, (depth, i))
            result_list[i].start()
            t_list.append(result_list[i])
        for t in t_list:
            t.join()
        for i in range(0, 4):
            w_param[i] = result_list[i].res
        get_doc_prob(out_dir, w_param, qid[q], depth)
Ejemplo n.º 6
0
def main(argv):
    runfile = ""
    qrelfile = ""
    collection = ""
    dump_rel = False
    pd = 100
    try:
        opts, args = getopt.getopt(
            argv, "r:c:j:d:bh",
            ["runlist", "collection", "qrelfile", "depth"
             "backgain", "help"])
    except getopt.GetoptError:
        print('-j <qrelfile> -c <collection> -j <qrelfile> '
              '-d <pooling depth> -b backgain -h help')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('-j <qrelfile> -c <collection> -j <qrelfile> '
                  '-d <pooling depth> -b backgain -h help')
            sys.exit()
        elif opt in ("-j", "--jfile"):
            qrelfile = arg
        elif opt in ("-c", "--collection"):
            collection = arg
        elif opt in ("-r", "--run list"):
            runfile = arg
        elif opt in ("-b", "--backgain"):
            dump_rel = True
        elif opt in ("-d", "--depth"):
            pd = int(arg)

    cstr = collection

    dirstr = cstr  # director of runs
    ##
    qrels = Qrel(qrelfile)
    runlist = []
    with open(runfile, 'rb') as fin:
        for rname in fin:
            runlist.append(Qres(dirstr + rname.strip()))
    # start to dump rank
    dump_rank = DumpRank(runlist=runlist, qrels=qrels)
    if not dump_rel:
        out_pref = "doc_rank/"
        dump_rank.dump_rank(out_pref)
    else:
        out_pref = "rank_rel/"
        dump_rank.dump_rel(out_pref, pd)