return [item['link'] for item in res['items']][:10] # Returns a list of links appearing in search for a given request. def GetLinksForQueries(queries): links = [] for query in queries: links += GetLinksForQuery(query) # remove duplicates return list(set(links)) parser, st, stop = common_lib.Init() query = sys.argv[1] keyword_query = common_lib.buildFullQuery(query, query, parser, stop) links_keywords = GetLinksForQueries([keyword_query]) sum_score = 0 len_score = 0 for link in links_keywords: passage, _ = passage_retrieval.GetTopPassageFromLink(keyword_query, link) if passage: score = passage_retrieval.ScorePassage(keyword_query, passage) sum_score += score len_score += 1 print "======================================================" print passage.encode('utf-8') print "score ----------> " + str(score)
i = 1 all_scores = 0 top_scores = 0 for ves in e.findall('vespaadd'): for doc in ves.findall('document'): f.write("<doc number = \"" + str(i) + "\">\n") try: # adding links for keyword query if (i != 396): i += 1 continue print "doc " + str(i) cont = "" if doc.find('content') != None: cont = doc.find('content').text keyword_query = common_lib.buildFullQuery( doc.find('subject').text, cont, parser, stop) links_bing = GetLinksForQueryBing(keyword_query) links_google = GetLinksForQueryGoogle(keyword_query) f.write("<keyword_query>" + escape(keyword_query) + "</keyword_query>\n") f.write("<links>\n") sum_score = 0 len_score = 0 j = 1 max_score = 0 passages = [] scores = {} keyword_query = passage_retrieval.RemoveSynonymsFromKeywords( keyword_query) print keyword_query for link in links_bing:
# Returns a list of links appearing in search for a given request. def GetLinksForQueries(queries): links = [] for query in queries: links += GetLinksForQuery(query) # remove duplicates return list(set(links)) parser, st, stop = common_lib.Init() query = sys.argv[1] keyword_query = common_lib.buildFullQuery(query, query, parser, stop) links_keywords = GetLinksForQueries([keyword_query]) sum_score = 0 len_score = 0 for link in links_keywords: passage, _ = passage_retrieval.GetTopPassageFromLink(keyword_query, link) if passage: score = passage_retrieval.ScorePassage(keyword_query, passage) sum_score += score len_score += 1 print "======================================================" print passage.encode('utf-8') print "score ----------> " + str(score)
import xml.etree.ElementTree from xml.sax.saxutils import escape import common_lib parser, st, stop = common_lib.Init() f = open("output_small.xml", "wb") f.write("<data>\n") e = xml.etree.ElementTree.parse('small_sample.xml').getroot() i = 1 for ves in e.findall('vespaadd'): for doc in ves.findall('document'): f.write("<doc number = \"" + str(i) + "\">\n") i += 1 f.write("<subject>" + escape(doc.find('subject').text) + "</subject>\n") f.write("<content>" + escape(doc.find('content').text) + "</content>\n") #f.write("<category>" + escape(subject) + "</category>\n") f.write("<keywords>" + escape(common_lib.buildFullQuery(doc.find('subject').text, doc.find('content').text, parser, stop)) + "</keywords>\n") f.write("</doc>\n") f.write("</data>\n") f.close() #print buildFullQuery("long-distance trail throughout CA", parser)
).execute() results = [item['link'] for item in res['items']] results = results[:min(20, len(results))] return [r for r in results if r.find("youtube") == -1] parser, st, stop = common_lib.Init() title = sys.argv[1] body = sys.argv[1] signal.signal(signal.SIGALRM, signal_handler) signal.alarm(51) # 51 seconds try: keyword_query = common_lib.buildFullQuery(title, body, parser, stop) links_bing_yahoo = GetLinksForQueryBing("answers.yahoo.com " + keyword_query)[:2] links_bing = links_bing_yahoo + GetLinksForQueryBing(keyword_query) links_google = GetLinksForQueryGoogle(keyword_query) keyword_query = passage_retrieval.RemoveSynonymsFromKeywords(keyword_query) sum_score = 0 len_score = 0 results = [] rank = 1 for link in links_bing: passage, score = passage_retrieval.GetTopPassageFromLinkWithML(keyword_query, link) if passage: results.append([link, link in links_google, passage, 0, rank])
results = [item['link'] for item in res['items']] results = results[:min(20, len(results))] return [r for r in results if r.find("youtube") == -1] parser, st, stop = common_lib.Init() title = sys.argv[1] body = sys.argv[1] signal.signal(signal.SIGALRM, signal_handler) signal.alarm(51) # 51 seconds try: keyword_query = common_lib.buildFullQuery(title, body, parser, stop) links_bing_yahoo = GetLinksForQueryBing("answers.yahoo.com " + keyword_query)[:2] links_bing = links_bing_yahoo + GetLinksForQueryBing(keyword_query) links_google = GetLinksForQueryGoogle(keyword_query) keyword_query = passage_retrieval.RemoveSynonymsFromKeywords(keyword_query) sum_score = 0 len_score = 0 results = [] rank = 1 for link in links_bing: passage, score = passage_retrieval.GetTopPassageFromLinkWithML( keyword_query, link)
).execute() return [item['link'] for item in res['items']][:20] def GetLinksForQueryBing(query): bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY') result_list = bing.search_all(query, limit=20, format='json') return [result.url for result in result_list][:20] parser, st, stop = common_lib.Init() output = {} #f = open("links_results", "wb") #f.write("<data>\n") e = xml.etree.ElementTree.parse('small_sample.xml').getroot() i = 0 for ves in e.findall('vespaadd'): for doc in ves.findall('document'): # adding links for keyword query print "next doc" keyword_query = common_lib.buildFullQuery(doc.find('subject').text, doc.find('content').text, parser, stop) google_links = GetLinksForQueryGoogle(keyword_query) bing_links = GetLinksForQueryBing(keyword_query) links = [google_links, bing_links] output[i] = links i += 1 with open('output_links.txt', 'w') as f: json.dump(output, f, ensure_ascii=False)