Exemple #1
0
#   a list of training configurations (feature name only for each configuration
#train_cfgs=[("MF-e.MF-i.bm25_scores.indri_scores",data_cfg.trn_list_fn,"train")]
#train_cfgs=[("bm25_scores.indri_scores",data_cfg.trn_list_fn,"train"),
#            ("MF-e.MF-i", data_cfg.trn_list_fn, "train")]
'''
train_cfgs=[("bm25_scores",data_cfg.all_list_fn,"train"),
           ("indri_scores",data_cfg.all_list_fn,"train"),
           ("MF-e",data_cfg.all_list_fn,"train"),
           ("MF-i",data_cfg.all_list_fn,"train")]
'''

train_cfgs = [("embed_sim", data_cfg.trn_list_fn, "train")]

model_type = "gnb"

cand_list = gen_utils.read_lines_from_text_file(data_cfg.cand_lst_fn)


def load_feat_and_labels(qid_list, cfg):
    feat_name = cfg[0]
    feat_root_path = os.path.join(data_cfg.feat_root_path, feat_name)
    ret_feats = []
    ret_labels = []
    for qid in qid_list:
        print "Loading feature: ", qid
        feat_fn = os.path.join(feat_root_path, str(qid) + ".npz")
        if not os.path.exists(feat_fn):
            print "Warning: feature not exist:", qid
            continue

        gt_token = data_utils.load_quaser_gt_by_id(qid)
Exemple #2
0
from boom.modules import Module
from multiprocessing import Pool
from nltk import word_tokenize
from nltk.corpus import stopwords
import utils.gen_utils as gen_utils
import numpy as np

stop_words = set(stopwords.words('english'))
candidate_list = gen_utils.read_lines_from_text_file("data/candidate.lst")
model = gen_utils.read_dict_from_pkl(
    "models/MF-e.MF-i.bm25_scores.indri_scores/train/linear_svm/model.pkl")


def multi_process_helper(args):
    q_and_context_list = args[0]
    ret_list = []

    print "In prediction: ", len(q_and_context_list), type(q_and_context_list)
    id = 0

    for q_and_context in q_and_context_list:
        id += 1
        print "In prediction, ind ", id, "/", len(q_and_context_list)

        feat = np.asarray(q_and_context['final_feat'])
        feat = np.expand_dims(feat, axis=0)
        pred_label = candidate_list[model.predict(feat)[0]]

        final_meta = {}
        final_meta['pred'] = pred_label
        final_meta['q_meta'] = q_and_context['question']