def load_questions(model, rel, hash, qid, dictionary): # features = [line for line in open(fname)] questions = {} # length = len(model.docvecs) for i in range(len(rel)): qi = qid[i] r = rel[i] h = hash[i] index = translate_hash(h, dictionary) # if qid in queries.keys(): # q_vec = queries[qid] # else: # q_vec = average_vec(query,model,dim) # queries[qid] = q_vec q_vec = [int(a) for a in '{:020b}'.format(int(qi))] * 5 if index > -1: # and index < length: if qi in questions.keys(): new = np.array( [np.array([float(a) for a in model.docvecs[index]])]).T questions[qi].a = np.hstack((new, questions[qi].a)) questions[qi].y = np.append(r, questions[qi].y) else: questions[qi] = q(q_vec, [float(a) for a in model.docvecs[index]], r) return questions
def load_qs(model): dictionary = load_doc_hashes("temp_mapper.txt") questions = {} # dim = len(model[model.vocab.keys()[0]]) # docs = [line.split() for line in open("content.raw_text")] # docs = MySentences("temp_new.raw_text") # length = len(docs) dim = len(model[model.vocab.keys()[0]]) d = MySentences( "new_averages.txt" ) # io.mmread("R_new.mtx").T # [average_vec(mx,model,dim) for mx in docs] features = MySentences("temp_features.rtData") length = len(d) queries = {} o = 0 for words in features: o += 1 if o == 2000: break # words = line.split() relevancy = int(words[0]) / 5 qid = int(words[1].split(':')[1]) # query = words[:x] i = 2 while words[i] != "#": i += 1 i += 1 query = [] while words[i][0:4] != "http": query.append(words[i]) i += 1 url = words[i] hash = words[i + 1] if qid in queries.keys(): q_vec = queries[qid] else: q_vec = average_vec(query, model, dim) queries[qid] = q_vec index = translate_hash(hash, dictionary) if index > -1 and index < length: # document = docs[index] # doc_vec = average_vec(document,model,length) if qid in questions.keys(): new = np.array([np.array([float(a) for a in d[index]])]).T questions[qid].a = np.hstack((new, questions[qid].a)) # questions[qid].atext.insert(0,document) questions[qid].y = np.append(questions[qid].y, relevancy) else: # print q_vec,[float(a) for a in d[index]],query,[document],np.array([]) questions[qid] = q(q_vec, [float(a) for a in d[index]], relevancy) return questions
def load_qs(model): dictionary = load_doc_hashes("temp_mapper.txt") questions = {} # dim = len(model[model.vocab.keys()[0]]) # docs = [line.split() for line in open("content.raw_text")] # docs = MySentences("temp_new.raw_text") # length = len(docs) dim = len(model[model.vocab.keys()[0]]) d = MySentences("new_averages.txt")# io.mmread("R_new.mtx").T # [average_vec(mx,model,dim) for mx in docs] features = MySentences("temp_features.rtData") length = len(d) queries = {} o = 0 for words in features: o += 1 if o == 2000: break # words = line.split() relevancy = int(words[0])/5 qid = int(words[1].split(':')[1]) # query = words[:x] i = 2 while words[i] != "#": i += 1 i += 1 query = [] while words[i][0:4] != "http": query.append(words[i]) i += 1 url = words[i] hash = words[i+1] if qid in queries.keys(): q_vec = queries[qid] else: q_vec = average_vec(query,model,dim) queries[qid] = q_vec index = translate_hash(hash,dictionary) if index > -1 and index < length: # document = docs[index] # doc_vec = average_vec(document,model,length) if qid in questions.keys(): new = np.array([np.array([float(a) for a in d[index]])]).T questions[qid].a = np.hstack((new,questions[qid].a)) # questions[qid].atext.insert(0,document) questions[qid].y = np.append(questions[qid].y,relevancy) else: # print q_vec,[float(a) for a in d[index]],query,[document],np.array([]) questions[qid] = q(q_vec,[float(a) for a in d[index]],relevancy) return questions
def load_questions(model,rel,hash,qid,dictionary): # features = [line for line in open(fname)] questions = {} # length = len(model.docvecs) for i in range(len(rel)): qi = qid[i] r = rel[i] h = hash[i] index = translate_hash(h,dictionary) # if qid in queries.keys(): # q_vec = queries[qid] # else: # q_vec = average_vec(query,model,dim) # queries[qid] = q_vec q_vec = [int(a) for a in '{:020b}'.format(int(qi))]*5 if index > -1: # and index < length: if qi in questions.keys(): new = np.array([np.array([float(a) for a in model.docvecs[index]])]).T questions[qi].a = np.hstack((new,questions[qi].a)) questions[qi].y = np.append(r,questions[qi].y) else: questions[qi] = q(q_vec,[float(a) for a in model.docvecs[index]],r) return questions