Beispiel #1
0
def load_questions(model, rel, hash, qid, dictionary):
    # features = [line for line in open(fname)]
    questions = {}
    # length = len(model.docvecs)
    for i in range(len(rel)):
        qi = qid[i]
        r = rel[i]
        h = hash[i]
        index = translate_hash(h, dictionary)
        # if qid in queries.keys():
        #     q_vec = queries[qid]
        # else:
        #     q_vec = average_vec(query,model,dim)
        #     queries[qid] = q_vec
        q_vec = [int(a) for a in '{:020b}'.format(int(qi))] * 5
        if index > -1:  # and index < length:
            if qi in questions.keys():
                new = np.array(
                    [np.array([float(a) for a in model.docvecs[index]])]).T
                questions[qi].a = np.hstack((new, questions[qi].a))
                questions[qi].y = np.append(r, questions[qi].y)
            else:
                questions[qi] = q(q_vec,
                                  [float(a) for a in model.docvecs[index]], r)
    return questions
Beispiel #2
0
def load_qs(model):
    dictionary = load_doc_hashes("temp_mapper.txt")
    questions = {}
    # dim = len(model[model.vocab.keys()[0]])
    # docs = [line.split() for line in open("content.raw_text")]
    # docs = MySentences("temp_new.raw_text")
    # length = len(docs)
    dim = len(model[model.vocab.keys()[0]])
    d = MySentences(
        "new_averages.txt"
    )  # io.mmread("R_new.mtx").T # [average_vec(mx,model,dim) for mx in docs]
    features = MySentences("temp_features.rtData")
    length = len(d)
    queries = {}
    o = 0
    for words in features:
        o += 1
        if o == 2000:
            break
        # words = line.split()
        relevancy = int(words[0]) / 5
        qid = int(words[1].split(':')[1])
        # query = words[:x]
        i = 2
        while words[i] != "#":
            i += 1
        i += 1
        query = []
        while words[i][0:4] != "http":
            query.append(words[i])
            i += 1
        url = words[i]
        hash = words[i + 1]

        if qid in queries.keys():
            q_vec = queries[qid]
        else:
            q_vec = average_vec(query, model, dim)
            queries[qid] = q_vec
        index = translate_hash(hash, dictionary)
        if index > -1 and index < length:
            # document = docs[index]
            # doc_vec = average_vec(document,model,length)
            if qid in questions.keys():
                new = np.array([np.array([float(a) for a in d[index]])]).T
                questions[qid].a = np.hstack((new, questions[qid].a))
                # questions[qid].atext.insert(0,document)
                questions[qid].y = np.append(questions[qid].y, relevancy)
            else:
                # print q_vec,[float(a) for a in d[index]],query,[document],np.array([])
                questions[qid] = q(q_vec, [float(a) for a in d[index]],
                                   relevancy)
    return questions
Beispiel #3
0
def load_qs(model):
    dictionary = load_doc_hashes("temp_mapper.txt")
    questions = {}
    # dim = len(model[model.vocab.keys()[0]])
    # docs = [line.split() for line in open("content.raw_text")]
    # docs = MySentences("temp_new.raw_text")
    # length = len(docs)
    dim = len(model[model.vocab.keys()[0]])
    d = MySentences("new_averages.txt")# io.mmread("R_new.mtx").T # [average_vec(mx,model,dim) for mx in docs]
    features = MySentences("temp_features.rtData")
    length = len(d)
    queries = {}
    o = 0
    for words in features:
        o += 1
        if o == 2000:
           break
        # words = line.split()
        relevancy = int(words[0])/5
        qid = int(words[1].split(':')[1])
        # query = words[:x]
        i = 2
        while words[i] != "#":
            i += 1
        i += 1
        query = []
        while words[i][0:4] != "http":
            query.append(words[i])
            i += 1
        url = words[i]
        hash = words[i+1]

        if qid in queries.keys():
            q_vec = queries[qid]
        else:
            q_vec = average_vec(query,model,dim)
            queries[qid] = q_vec
        index = translate_hash(hash,dictionary)
        if index > -1 and index < length:
            # document = docs[index]
            # doc_vec = average_vec(document,model,length)
            if qid in questions.keys():
                new = np.array([np.array([float(a) for a in d[index]])]).T
                questions[qid].a = np.hstack((new,questions[qid].a))
                # questions[qid].atext.insert(0,document)
                questions[qid].y = np.append(questions[qid].y,relevancy)
            else:
                # print q_vec,[float(a) for a in d[index]],query,[document],np.array([])
                questions[qid] = q(q_vec,[float(a) for a in d[index]],relevancy)
    return questions
Beispiel #4
0
def load_questions(model,rel,hash,qid,dictionary):
    # features = [line for line in open(fname)]
    questions = {}
    # length = len(model.docvecs)
    for i in range(len(rel)):
        qi = qid[i]
        r = rel[i]
        h = hash[i]
        index = translate_hash(h,dictionary)
        # if qid in queries.keys():
        #     q_vec = queries[qid]
        # else:
        #     q_vec = average_vec(query,model,dim)
        #     queries[qid] = q_vec
        q_vec = [int(a) for a in '{:020b}'.format(int(qi))]*5
        if index > -1: # and index < length:
            if qi in questions.keys():
                new = np.array([np.array([float(a) for a in model.docvecs[index]])]).T
                questions[qi].a = np.hstack((new,questions[qi].a))
                questions[qi].y = np.append(r,questions[qi].y)
            else:
                questions[qi] = q(q_vec,[float(a) for a in model.docvecs[index]],r)
    return questions