Esempio n. 1
0
def queryExpanEva(extend,documentAddress,queryAddress,resultAddress):
    expandedTerm=userLogExpan(extend,documentAddress,queryAddress)
   # expandedTerm=queryExpand1(document,extend,queryAddress)
    print(expandedTerm)
    query1=calculate_query(get_query(queryAddress))
    result=readFile(resultAddress)
    for num in query1:
        for term in expandedTerm:
            query1[num][term]=query1[num].get(term,0) + 1
#print(query1)   
    for e in extend:
#        document1, length_by_docid1 = make_invidx(documentAddress, e)
#        simi1= cosine_distance(document1,query1,1000)
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
   # simi1= cosine_distance(document1,query1,50)
#    print(simi1)
        for num in query1:
#            print("tf-idf: ")
#            print(MRR(result, simi1[num]))
#            print(recall(result,simi1[num])) 
#            document3= make_index_AIG(documentAddress,e,query1[num])
 #           simi3=sum_score(document3,1000)
            simi2=sum_keyword(document2,query1[num],1000)
 #           print("AIG: ")
#            print(MRR(result, simi3))
#            print(recall(result,simi3))
            print("BM25: ")
            print(MRR(result, simi2))
Esempio n. 2
0
    def send(self, event=None):
        # Get the msg input from the user and return the answer depends on the query option
        relative_position_of_scrollbar = self.scroll_bar.get()[1]
        msg = self.user_msg.get()

        # display the user msg
        self.display_user_text(msg)

        # get query option from user
        # -KB: query from the KB
        # -Feeds: feed text to Edison and get return
        if self.query_option.get() == "KB":
            query = Query()
            j = query.match_question(msg)
            print("matched q: %s real q: %s" % (query.get_question(j), msg))
            ans = query.get_query(j)
        else:
            ans = edison.get_response_kibana(msg)

            # display the bot msg
        self.display_bot_text(ans)
        self.msg_txtbox.delete("0", tk.END)

        # move the scroll bar to bottom
        if relative_position_of_scrollbar == 1:
            self._textarea.yview_moveto(1)
Esempio n. 3
0
def query_database(query_words: List[str]):
    first_word = query_words[0].lower()
    if first_word == "select":
        return str(query.get_query(" ".join(query_words)))
    elif first_word in ["delete", "insert", "update"]:
        query.set_query(" ".join(query_words))
        return "Query succesfully executed!"
    else:
        return f"This is not a valid query with first word {first_word}!"
Esempio n. 4
0
def get_k(queryAddress,documentAddress,e):
    document1, length_by_docid1 = make_invidx(documentAddress, e)
    query1=calculate_query(get_query(queryAddress))
    simi1= cosine_distance(document1,query1,100)
    result=readFile(resultAddress)
    for num in simi1:
        for weight1,docid in simi1[num]:
            if docid in result:
                print(docid)
Esempio n. 5
0
def print_results():
    q = query.get()
    try:
        ql = get_query(q.lower().split())
    except:
        ql = []

    # print link into result
    for i in range(len(ql)):
        if i == 0:
            result1.config(text=ql[i])
        elif i == 1:
            result2.config(text=ql[i])
        elif i == 2:
            result3.config(text=ql[i])
        elif i == 3:
            result4.config(text=ql[i])
        elif i == 4:
            result5.config(text=ql[i])
        elif i == 5:
            result6.config(text=ql[i])
        elif i == 6:
            result7.config(text=ql[i])
        elif i == 7:
            result8.config(text=ql[i])
        elif i == 8:
            result9.config(text=ql[i])
        elif i == 9:
            result10.config(text=ql[i])

    # print blank into result
    for i in range(len(ql), 10):
        if i == 0:
            result1.config(text="")
        elif i == 1:
            result2.config(text="")
        elif i == 2:
            result3.config(text="")
        elif i == 3:
            result4.config(text="")
        elif i == 4:
            result5.config(text="")
        elif i == 5:
            result6.config(text="")
        elif i == 6:
            result7.config(text="")
        elif i == 7:
            result8.config(text="")
        elif i == 8:
            result9.config(text="")
        elif i == 9:
            result10.config(text="")

    return
Esempio n. 6
0
def get_test_feedback(userid=None, site_id=None, qid=None, qtype=None, runid=None):
    q = {"doclist": {"$exists": True}}
    if userid:
        q["userid"] = userid
    if site_id:
        q["site_id"] = site_id

    if qid and qid.lower() != "all":
        q["qid"] = qid

    if runid:
        q["runid"] = runid

    if "qid" in q and "site_id" in q and "userid" in q:
        feedbacks = db.feedback.find(q).hint([("qid", pymongo.ASCENDING),
                                              ("site_id", pymongo.ASCENDING),
                                              ("userid", pymongo.ASCENDING)
                                              ])
    elif "site_id" in q and "userid" in q:
        feedbacks = db.feedback.find(q).hint([("site_id", pymongo.ASCENDING),
                                              ("userid", pymongo.ASCENDING)
                                              ])
    else:
        feedbacks = db.feedback.find(q)

    if qtype is not None:
        if qtype == "test":
            qtype_qids = set([q["_id"] for q in query.get_query(site_id=site_id)
                              if "type" in q and q["type"] == "test"])
        else:
            qtype_qids = set([q["_id"] for q in query.get_query(site_id=site_id)
                              if ("type" in q and q["type"] != "test") or "type" not in q])

    readyfeedback = []
    for f in feedbacks:
        if qtype is not None:
            if f["qid"] in qtype_qids:
                readyfeedback.append(f)
        else:
            readyfeedback.append(f)
    return readyfeedback
Esempio n. 7
0
def userLogExpan(extend,documentAddress,queryAddress):    
    ses=userFeedback("C://TweetStudy//12//12//userFeedback3.txt")
#    print(ses)
    ret={}
    for e in extend:
        document1, length_by_docid1 = make_invidx(documentAddress, e)
        tf,atid=read_file(documentAddress,e)
      #  query1=calculate_query(get_query(queryAddress))
        dic=get_query(queryAddress)
        for num in dic:
            re=coWeight(ses,nltk.word_tokenize(dic[num]),document1,tf,atid)
            ret=selectTopTerm(re,10)
    return ret
Esempio n. 8
0
def get_trec(site_id):
    trec_runs = []
    trec_qrels = []
    trec_qrels_raw = []
    queries = query.get_query(site_id)
    participants = user.get_participants()
    for test_period in config["TEST_PERIODS"]:
        if datetime.datetime.now() < test_period["END"]:
            continue
        for participant in participants:
            userid = participant["_id"]
            participant_runs = {}
            for q in queries:
                if "type" not in q or not q["type"] == "test":
                    continue
                qid = q["_id"]
                runs = db.run.find({"userid": userid,
                                    "qid": qid})
                if not runs:
                    continue
                testrun = None
                testrundate = datetime.datetime(2000, 1, 1)
                for run in runs:
                    if testrundate < run["creation_time"] < test_period["END"]:
                        testrundate = run["creation_time"]
                        testrun = run
                if not testrun:
                    continue
                participant_runs[qid] = testrun

            if participant_runs:
                trec_runs.append(get_trec_run(participant_runs,
                                              test_period["NAME"],
                                              participant["teamname"]))
        test_period_feedbacks = {}
        for q in queries:
            if "type" not in q or not q["type"] == "test":
                continue
            qid = q["_id"]
            feedbacks = feedback.get_test_feedback(site_id=site_id, qid=qid)
            test_period_feedbacks[qid] = [f for f in feedbacks if (test_period["START"] <
                                                                   f["modified_time"] <
                                                                   test_period["END"])]
        trec_qrels.append(get_trec_qrel(test_period_feedbacks,
                                        test_period["NAME"]))
        trec_qrels_raw.append(get_trec_qrel(test_period_feedbacks,
                                            test_period["NAME"],
                                            rawcount=True))
    return trec_runs, trec_qrels, trec_qrels_raw
Esempio n. 9
0
def getInput():
    x1 = entry1.get()
    counter1 = 650
    counter2 = 250
    label1 = tk.Label(root, text=str((' ' * 230 + '\n') * 12), width=1300)
    canvas1.create_window(counter1, counter2, window=label1)

    query_input = query.get_query(x1)

    query_ = query.extract_posting(query_input)
    if query_ == None:
        query_ = 'No results found'
    label1 = tk.Label(root, text=str(query_), width=1300)
    canvas1.create_window(counter1, counter2, window=label1)

    counter2 += 20
Esempio n. 10
0
def queryExpand1(document,extend,queryAddress):
    invx ,tf_by_docid,df,N= make_invidx_by_OkapiBM251(document, extend)
    query1=calculate_query(get_query(queryAddress))
    simi4={}
    for num in query1:
        simi=sum_keyword(invx,query1[num],20)
        termSelected=TSV(tf_by_docid,df,N,simi)
        #print(termSelected)
        invx1=make_invidx_by_OkapiBM25s(document, extend,termSelected)
        a= sorted([(termSelected[term], term) for term in termSelected], reverse=True)[:10]
        print(a)
        for weight,term in a:
            query1[num][term]=weight
        simi4=sum_keyword(invx1,query1[num],1000)
        #print(simi4)
    return simi4
Esempio n. 11
0
def get_topK(queryAddress,documentAddress,extend):
    for e in extend:
        document1, length_by_docid1 = make_invidx(documentAddress, e)
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
        
        
#print(document)
#print(length_by_docid)
#saveIndex(index, (document, length_by_docid))
#document, length_by_docid = loadIndex(index)
#get query
        query1=calculate_query(get_query(queryAddress))
        for num in query1:
            document3= make_index_AIG(documentAddress,e,query1[num])
            simi3=sum_score(document3,10)
            simi2=sum_keyword(document2,query1[num],10)
        #print(simi)     
        #document= make_index_AIG(documentAddress,e,query1[num])
        simi1= cosine_distance(document1,query1,10)
       # simi=sum_keyword(document, query1[num],10)
#print(query1)
#cosine similarity
#simi=score(document,query1,10)
#simi= cosine_distance(document,query1,10)

    #print(simi)
        for num in simi1:
            for weight1,docid in simi1[num]:
            #print(docid)
                for weight2,docid2 in simi2:
                    if docid==docid2:
                        print(docid2," : ",weight2)
                for weight3,docid3 in simi3:
                    if docid==docid3:
                        print(docid3,":",weight3)
            for weight2,docid2 in simi2:
                for weight3,docid3 in simi3:
                    if docid2==docid3:
                        print(docid3,":",weight3)
Esempio n. 12
0
def results(page_number):
    from query import find_some, get_query, text_procces
    global current_search, links_
    db_string = "postgres://*****:*****@localhost:5432/database"

    if ('searchquery' in request.form):
        current_search = request.form['searchquery']
        sentence = request.form['searchquery']
        processed_sentence = text_procces(sentence)
        urls = get_query(processed_sentence)
        urls_dict = find_some(urls, list(processed_sentence.keys()))
        urls_list = []
        links_ = []
        for i in urls_dict:
            urls_list.append([i, urls_dict[i]])
        urls_list = sorted(urls_list, key=lambda k: k[1], reverse=True)
        for i in urls_list:
            links_.append(i[0])
    if current_search == "":
        return redirect('/')
    else:
        return render_template('results.html', query=current_search, links=links_, page=page_number,
                               res_cnt=len(links_))
Esempio n. 13
0
def evaluation(extend, documentAddress, queryAddress,resultAddress):
    result=readFile(resultAddress)
    #print(result)
    #tf-idf
    for e in extend:
#        document1, length_by_docid1 = make_invidx(documentAddress, e)
        query1=calculate_query(get_query(queryAddress))
 #       simi1= cosine_distance(document1,query1,1000)
        retrieval =0.0
        relevance=0.0
        document2 = make_invidx_by_OkapiBM25(documentAddress, e)
        for num in query1:
#            print("tf-idf: ")
 #           print(MRR(result, simi1[num]))
            #print(recall(result,simi1[num]))
#            document3= make_index_AIG(documentAddress,e,query1[num])
#            simi3=sum_score(document3,1000)
#            print("AIG: ")
#            print(MRR(result, simi3))
            #print(recall(result,simi3))
            simi2=sum_keyword(document2,query1[num],1000)
            print("BM25: ")
            print(MRR(result, simi2))
Esempio n. 14
0
######################################################

test_id = os.environ.get("TEST_ID", str(uuid.uuid4()))

instrumentation_key = os.environ.get("APPINSIGHTS_INSTRUMENTATIONKEY")
telemetry_client = None
if instrumentation_key:
    telemetry_client = TelemetryClient(instrumentation_key)

print("Test run for '{}' started.".format(test_id))

queries_total = int(os.environ.get("QUERIES_TOTAL", -1))
queries_executed = 0

while queries_executed < queries_total or queries_total < 0:
    raw_query = get_query()
    print("\nTest '{}' executing #{}:\n{}\n".format(test_id, queries_executed,
                                                    raw_query))

    t = timeit.Timer(functools.partial(cursor.execute, raw_query))
    query_time = t.timeit(number=1)

    print("Query took: {:.2f} seconds".format(query_time))
    queries_executed += 1

    if telemetry_client:
        telemetry_client.track_metric("query_time",
                                      query_time,
                                      properties={"test_id": test_id})
        telemetry_client.flush()
Esempio n. 15
0
 def query(self, query, index, document_type):
     query.build_query()
     resp, content = self.__request(
         '/%s/%s/_search/' % (index, document_type), 'GET',
         query.get_query())
     return DataCollection(content)
Esempio n. 16
0
 def query(self, query, index, document_type):
     query.build_query()
     resp, content = self.__request('/%s/%s/_search/' % (index, document_type), 'GET', query.get_query())
     return DataCollection(content)