Beispiel #1
0
def _run_exp_10():
    """disease name in meshheading"""
    target = 'a'
    dt = strftime("%m%d%H%M%s", gmtime())  # datetime as an exp id
    resdir = os.path.join(cfg.PATHS['vardir'], 'res-'+dt)
    os.mkdir(resdir)

    # parse topics
    queries_articles = utils.parse_topics(cfg.PATHS['topics'], 'a')
    # queries_trials = utils.parse_topics(cfg.PATHS['topics'], 't')

    # run queries
    solr.run_queries(queries_articles, resdir, target='a')
    # solr.run_queries(queries_trials, resdir, target='t')

    utils.run_evaluators(resdir)
Beispiel #2
0
def _run_exp_13():
    """use manually crafted quries"""
    target = 'a'
    resdir = os.path.join(cfg.PATHS['vardir'], 'res-exp13-manual')

    # run normal queries
    queries_articles = []
    for i in range(1, 31):
        if cfg.topic and i != int(cfg.topic):
            continue
        q_file = os.path.join(resdir, "a{}.query".format(i))
        if not os.path.exists(q_file):
            logger.log('ERROR', "query #{} does not exist".format(i),
                       printout=True)
            return
        with open(q_file) as fin:
            queries_articles.append({'query': fin.read()})
    solr.run_queries(queries_articles, resdir, target='a', save_queries=False)

    # run cjt queries
    cfg.CONF_SOLR['enable_conj_uprank'] = True
    queries_articles = []
    q_no = []
    for i in range(1, 31):
        if cfg.topic and i != int(cfg.topic):
            continue
        q_file = os.path.join(resdir, "a{}-cjt.query".format(i))
        if not os.path.exists(q_file):
            continue
        else:
            q_no.append(i)
        with open(q_file) as fin:
            queries_articles.append({'query': fin.read()})
    solr.run_queries(queries_articles, resdir, target='a',
                     q_no=q_no, save_queries=False)

    # merge two ranked lists: up-ranked the one of conjunctive
    merge_ranked_list(resdir)

    # run evaluators
    if cfg.evaluate and target == 'a':
        utils.run_evaluators(resdir)
Beispiel #3
0
def _run_exp_trial():
    """temporary run for trials result"""
    target = 't'
    dt = strftime("%m%d%H%M%s", gmtime())  # datetime as an exp id
    resdir = os.path.join(cfg.PATHS['vardir'], 'res-'+dt)
    os.mkdir(resdir)

    cfg.CONF_SOLR['enable_conj_uprank'] = False
    queries_articles = utils.parse_topics(cfg.PATHS['topics'], target)
    solr.run_queries(queries_articles, resdir, target=target)

    # run conjunctive
    cfg.CONF_SOLR['enable_conj_uprank'] = True
    queries_articles = utils.parse_topics(cfg.PATHS['topics'], target)
    solr.run_queries(queries_articles, resdir, target=target)

    # merge two ranked lists: up-ranked the one of conjunctive
    merge_ranked_list(resdir, target=target)

    # run evaluators
    if cfg.evaluate and target == 'a':
        utils.run_evaluators(resdir)
Beispiel #4
0
def _run_exp_optimize_weights():
    """ ! do not delete this run
    we have 5 query clauses; disease, gene, variant, demographics, others
    We want to randomly select weights for each, while maintaining the
    priorities of the groups, such that
        mesh:disease, mesh:gene
            > disease, gene, mutation
            > mesh:other
            > other
            > mesh:demographic
    for now, as an intermediate step, the order is as below:
        disease
            > gene
            > mutation
            > mesh:demographic
    """
    target = 'a'
    tmpl_dir = 'var/q_tmpl-exp12'
    run = 100
    count_update = 0
    dt = strftime("%m%d%H%M%s", gmtime())  # datetime as an exp id
    resdir = os.path.join(cfg.PATHS['vardir'], 'res-'+dt)
    os.mkdir(resdir)

    top_k = 10
    # prev_weight = random.sample(range(0, 300), 7)
    # below is for exp11, do not remove
    prev_weight = [61.7131052045164, 139.5433048356918, 177.81197097303223,
                   92.7144638040653, 95.5334, 224.83844597239275,
                   583.6807943285283, 24.860191103787024]
    # below is for exp12
    prev_weight = [60.711201641273405, 4.880055149158676, 279.26698225881864,
                   10.125666977818522, 192.75185006506513, 76.04244782933952,
                   253.47416863967885, 212.58225480704252]
    prev_weight = [23.948835757798317, 13.309514268605971, 284.15669468723354,
                   1.1630530198432623, 155.01554494126165, 37.0197502834617,
                   277.45222362827445, 227.85804114860733]
    # [-2.2757592772051787, 4.164624621483287, 99.31682681787032, 171.97563793280884, 155.45324625453765, 75.8805921891793, 357.13763872425716, 297.9524237215415]

    curr_weight = deepcopy(prev_weight)
    variation = 5
    best_score = 0

    for i in range(run):
        logger.log('INFO', 'opt running - #{}'.format(i+1), printout=True)
        if i == 0:
            pass
        elif i % 10 == 1:  # occasionally generate totally random weights
            curr_weight = random.sample(range(0, 300), 8)
        elif i % 10 == 2:  # larger variation
            curr_weight = [max(0, random.gauss(wt, variation * 10))
                           for wt in prev_weight]
        elif i % 5 == 3:  # just change one weight
            curr_weight = deepcopy(prev_weight)
            idx = random.randint(0, len(curr_weight)-1)
            curr_weight[idx] = random.gauss(curr_weight[idx], variation)
        else:
            # randomize curr_weight
            curr_weight = [max(0, random.gauss(wt, variation))
                           for wt in prev_weight]
        queries = []
        q_no = [4, 8, 12, 16, 20, 24, 28]
        for i in range(1, 31):
            if i not in q_no:
                continue
            file = os.path.join(tmpl_dir, 'a{}.template'.format(i))
            with open(file) as f:
                q = f.read()
            for j in range(8):
                q = q.replace('<WT{}>'.format(j+1),
                              str(round(curr_weight[j]/100, 6)))
            queries.append({'query': q})
        # run queries
        solr.run_queries(queries, resdir, target=target, q_no=q_no)

        logger.log('INFO', "previous weight: {}".format(prev_weight),
                   printout=True)
        logger.log('INFO', "random weight: {}".format(curr_weight),
                   printout=True)
        # run evaluators
        if cfg.evaluate:
            infAP, infNDCG = utils.run_evaluators(resdir)
            score = 2 / ((1 / infAP) + (1 / infNDCG))
            if best_score < score:
                count_update += 1
                logger.log('INFO', 'updating weights', printout=True)
                # update best_score and prev_weight
                prev_weight = curr_weight
                best_score = score
    logger.log('INFO', "Optimization finished:", printout=True)
    logger.log('INFO',
               "{} times out of {} runs updated"
               "".format(count_update, run), printout=True)
    logger.log('INFO',
               "best_weights: {}"
               "".format(', '.join([str(x) for x in prev_weight])),
               printout=True)
    logger.log('INFO', "best_score: {}".format(best_score), printout=True)