def queryExpanEva(extend,documentAddress,queryAddress,resultAddress): expandedTerm=userLogExpan(extend,documentAddress,queryAddress) # expandedTerm=queryExpand1(document,extend,queryAddress) print(expandedTerm) query1=calculate_query(get_query(queryAddress)) result=readFile(resultAddress) for num in query1: for term in expandedTerm: query1[num][term]=query1[num].get(term,0) + 1 #print(query1) for e in extend: # document1, length_by_docid1 = make_invidx(documentAddress, e) # simi1= cosine_distance(document1,query1,1000) document2 = make_invidx_by_OkapiBM25(documentAddress, e) # simi1= cosine_distance(document1,query1,50) # print(simi1) for num in query1: # print("tf-idf: ") # print(MRR(result, simi1[num])) # print(recall(result,simi1[num])) # document3= make_index_AIG(documentAddress,e,query1[num]) # simi3=sum_score(document3,1000) simi2=sum_keyword(document2,query1[num],1000) # print("AIG: ") # print(MRR(result, simi3)) # print(recall(result,simi3)) print("BM25: ") print(MRR(result, simi2))
def send(self, event=None): # Get the msg input from the user and return the answer depends on the query option relative_position_of_scrollbar = self.scroll_bar.get()[1] msg = self.user_msg.get() # display the user msg self.display_user_text(msg) # get query option from user # -KB: query from the KB # -Feeds: feed text to Edison and get return if self.query_option.get() == "KB": query = Query() j = query.match_question(msg) print("matched q: %s real q: %s" % (query.get_question(j), msg)) ans = query.get_query(j) else: ans = edison.get_response_kibana(msg) # display the bot msg self.display_bot_text(ans) self.msg_txtbox.delete("0", tk.END) # move the scroll bar to bottom if relative_position_of_scrollbar == 1: self._textarea.yview_moveto(1)
def query_database(query_words: List[str]): first_word = query_words[0].lower() if first_word == "select": return str(query.get_query(" ".join(query_words))) elif first_word in ["delete", "insert", "update"]: query.set_query(" ".join(query_words)) return "Query succesfully executed!" else: return f"This is not a valid query with first word {first_word}!"
def get_k(queryAddress,documentAddress,e): document1, length_by_docid1 = make_invidx(documentAddress, e) query1=calculate_query(get_query(queryAddress)) simi1= cosine_distance(document1,query1,100) result=readFile(resultAddress) for num in simi1: for weight1,docid in simi1[num]: if docid in result: print(docid)
def print_results(): q = query.get() try: ql = get_query(q.lower().split()) except: ql = [] # print link into result for i in range(len(ql)): if i == 0: result1.config(text=ql[i]) elif i == 1: result2.config(text=ql[i]) elif i == 2: result3.config(text=ql[i]) elif i == 3: result4.config(text=ql[i]) elif i == 4: result5.config(text=ql[i]) elif i == 5: result6.config(text=ql[i]) elif i == 6: result7.config(text=ql[i]) elif i == 7: result8.config(text=ql[i]) elif i == 8: result9.config(text=ql[i]) elif i == 9: result10.config(text=ql[i]) # print blank into result for i in range(len(ql), 10): if i == 0: result1.config(text="") elif i == 1: result2.config(text="") elif i == 2: result3.config(text="") elif i == 3: result4.config(text="") elif i == 4: result5.config(text="") elif i == 5: result6.config(text="") elif i == 6: result7.config(text="") elif i == 7: result8.config(text="") elif i == 8: result9.config(text="") elif i == 9: result10.config(text="") return
def get_test_feedback(userid=None, site_id=None, qid=None, qtype=None, runid=None): q = {"doclist": {"$exists": True}} if userid: q["userid"] = userid if site_id: q["site_id"] = site_id if qid and qid.lower() != "all": q["qid"] = qid if runid: q["runid"] = runid if "qid" in q and "site_id" in q and "userid" in q: feedbacks = db.feedback.find(q).hint([("qid", pymongo.ASCENDING), ("site_id", pymongo.ASCENDING), ("userid", pymongo.ASCENDING) ]) elif "site_id" in q and "userid" in q: feedbacks = db.feedback.find(q).hint([("site_id", pymongo.ASCENDING), ("userid", pymongo.ASCENDING) ]) else: feedbacks = db.feedback.find(q) if qtype is not None: if qtype == "test": qtype_qids = set([q["_id"] for q in query.get_query(site_id=site_id) if "type" in q and q["type"] == "test"]) else: qtype_qids = set([q["_id"] for q in query.get_query(site_id=site_id) if ("type" in q and q["type"] != "test") or "type" not in q]) readyfeedback = [] for f in feedbacks: if qtype is not None: if f["qid"] in qtype_qids: readyfeedback.append(f) else: readyfeedback.append(f) return readyfeedback
def userLogExpan(extend,documentAddress,queryAddress): ses=userFeedback("C://TweetStudy//12//12//userFeedback3.txt") # print(ses) ret={} for e in extend: document1, length_by_docid1 = make_invidx(documentAddress, e) tf,atid=read_file(documentAddress,e) # query1=calculate_query(get_query(queryAddress)) dic=get_query(queryAddress) for num in dic: re=coWeight(ses,nltk.word_tokenize(dic[num]),document1,tf,atid) ret=selectTopTerm(re,10) return ret
def get_trec(site_id): trec_runs = [] trec_qrels = [] trec_qrels_raw = [] queries = query.get_query(site_id) participants = user.get_participants() for test_period in config["TEST_PERIODS"]: if datetime.datetime.now() < test_period["END"]: continue for participant in participants: userid = participant["_id"] participant_runs = {} for q in queries: if "type" not in q or not q["type"] == "test": continue qid = q["_id"] runs = db.run.find({"userid": userid, "qid": qid}) if not runs: continue testrun = None testrundate = datetime.datetime(2000, 1, 1) for run in runs: if testrundate < run["creation_time"] < test_period["END"]: testrundate = run["creation_time"] testrun = run if not testrun: continue participant_runs[qid] = testrun if participant_runs: trec_runs.append(get_trec_run(participant_runs, test_period["NAME"], participant["teamname"])) test_period_feedbacks = {} for q in queries: if "type" not in q or not q["type"] == "test": continue qid = q["_id"] feedbacks = feedback.get_test_feedback(site_id=site_id, qid=qid) test_period_feedbacks[qid] = [f for f in feedbacks if (test_period["START"] < f["modified_time"] < test_period["END"])] trec_qrels.append(get_trec_qrel(test_period_feedbacks, test_period["NAME"])) trec_qrels_raw.append(get_trec_qrel(test_period_feedbacks, test_period["NAME"], rawcount=True)) return trec_runs, trec_qrels, trec_qrels_raw
def getInput(): x1 = entry1.get() counter1 = 650 counter2 = 250 label1 = tk.Label(root, text=str((' ' * 230 + '\n') * 12), width=1300) canvas1.create_window(counter1, counter2, window=label1) query_input = query.get_query(x1) query_ = query.extract_posting(query_input) if query_ == None: query_ = 'No results found' label1 = tk.Label(root, text=str(query_), width=1300) canvas1.create_window(counter1, counter2, window=label1) counter2 += 20
def queryExpand1(document,extend,queryAddress): invx ,tf_by_docid,df,N= make_invidx_by_OkapiBM251(document, extend) query1=calculate_query(get_query(queryAddress)) simi4={} for num in query1: simi=sum_keyword(invx,query1[num],20) termSelected=TSV(tf_by_docid,df,N,simi) #print(termSelected) invx1=make_invidx_by_OkapiBM25s(document, extend,termSelected) a= sorted([(termSelected[term], term) for term in termSelected], reverse=True)[:10] print(a) for weight,term in a: query1[num][term]=weight simi4=sum_keyword(invx1,query1[num],1000) #print(simi4) return simi4
def get_topK(queryAddress,documentAddress,extend): for e in extend: document1, length_by_docid1 = make_invidx(documentAddress, e) document2 = make_invidx_by_OkapiBM25(documentAddress, e) #print(document) #print(length_by_docid) #saveIndex(index, (document, length_by_docid)) #document, length_by_docid = loadIndex(index) #get query query1=calculate_query(get_query(queryAddress)) for num in query1: document3= make_index_AIG(documentAddress,e,query1[num]) simi3=sum_score(document3,10) simi2=sum_keyword(document2,query1[num],10) #print(simi) #document= make_index_AIG(documentAddress,e,query1[num]) simi1= cosine_distance(document1,query1,10) # simi=sum_keyword(document, query1[num],10) #print(query1) #cosine similarity #simi=score(document,query1,10) #simi= cosine_distance(document,query1,10) #print(simi) for num in simi1: for weight1,docid in simi1[num]: #print(docid) for weight2,docid2 in simi2: if docid==docid2: print(docid2," : ",weight2) for weight3,docid3 in simi3: if docid==docid3: print(docid3,":",weight3) for weight2,docid2 in simi2: for weight3,docid3 in simi3: if docid2==docid3: print(docid3,":",weight3)
def results(page_number): from query import find_some, get_query, text_procces global current_search, links_ db_string = "postgres://*****:*****@localhost:5432/database" if ('searchquery' in request.form): current_search = request.form['searchquery'] sentence = request.form['searchquery'] processed_sentence = text_procces(sentence) urls = get_query(processed_sentence) urls_dict = find_some(urls, list(processed_sentence.keys())) urls_list = [] links_ = [] for i in urls_dict: urls_list.append([i, urls_dict[i]]) urls_list = sorted(urls_list, key=lambda k: k[1], reverse=True) for i in urls_list: links_.append(i[0]) if current_search == "": return redirect('/') else: return render_template('results.html', query=current_search, links=links_, page=page_number, res_cnt=len(links_))
def evaluation(extend, documentAddress, queryAddress,resultAddress): result=readFile(resultAddress) #print(result) #tf-idf for e in extend: # document1, length_by_docid1 = make_invidx(documentAddress, e) query1=calculate_query(get_query(queryAddress)) # simi1= cosine_distance(document1,query1,1000) retrieval =0.0 relevance=0.0 document2 = make_invidx_by_OkapiBM25(documentAddress, e) for num in query1: # print("tf-idf: ") # print(MRR(result, simi1[num])) #print(recall(result,simi1[num])) # document3= make_index_AIG(documentAddress,e,query1[num]) # simi3=sum_score(document3,1000) # print("AIG: ") # print(MRR(result, simi3)) #print(recall(result,simi3)) simi2=sum_keyword(document2,query1[num],1000) print("BM25: ") print(MRR(result, simi2))
###################################################### test_id = os.environ.get("TEST_ID", str(uuid.uuid4())) instrumentation_key = os.environ.get("APPINSIGHTS_INSTRUMENTATIONKEY") telemetry_client = None if instrumentation_key: telemetry_client = TelemetryClient(instrumentation_key) print("Test run for '{}' started.".format(test_id)) queries_total = int(os.environ.get("QUERIES_TOTAL", -1)) queries_executed = 0 while queries_executed < queries_total or queries_total < 0: raw_query = get_query() print("\nTest '{}' executing #{}:\n{}\n".format(test_id, queries_executed, raw_query)) t = timeit.Timer(functools.partial(cursor.execute, raw_query)) query_time = t.timeit(number=1) print("Query took: {:.2f} seconds".format(query_time)) queries_executed += 1 if telemetry_client: telemetry_client.track_metric("query_time", query_time, properties={"test_id": test_id}) telemetry_client.flush()
def query(self, query, index, document_type): query.build_query() resp, content = self.__request( '/%s/%s/_search/' % (index, document_type), 'GET', query.get_query()) return DataCollection(content)
def query(self, query, index, document_type): query.build_query() resp, content = self.__request('/%s/%s/_search/' % (index, document_type), 'GET', query.get_query()) return DataCollection(content)