def search(self, query): augf = AnalyticUUIDGeneratorFactory() aug = augf.create() results = [] for query1 in return_search_results(query.rawQuery): query1 = SearchQuery(type=SearchType.SENTENCES, terms=query1.split(" "), rawQuery=query1, k=500) result = self.other.search(query1) # logging.info(result.searchResultItems) results.extend(result.searchResultItems) # results = SearchResult(searchResultItems=results, searchQuery=query) # logging.info(len(results)) resultsDict = {} for result in results: resultsDict[result.sentenceId.uuidString] = result results = [] for key in resultsDict: results.append(resultsDict[key]) # results = results[:10] # comment out on full run comm_ids_list, temp = get_comm_ids(results) dictUUID = fetch_dataset(comm_ids_list, temp) inv_map = {v: k for k, v in dictUUID.items()} toHannah = [] for uuid in dictUUID: toHannah.append([query.rawQuery, dictUUID[uuid]]) resultItemRet = SearchResult(uuid=aug.next(), searchQuery=query, searchResultItems=results, metadata=AnnotationMetadata( tool="search", timestamp=int(time.time())), lang="eng") model = pickle.load(open("./trained_model.p", "rb")) pre = Preprocess() feature_matrix = pre.process_run(toHannah) dictRanks = pre_ranking(feature_matrix, model, toHannah, inv_map) results = rerank(dictRanks, resultItemRet) resultArr = results.searchResultItems resultArr = sorted(resultArr, key=lambda result: result.score, reverse=True) for item in resultArr: logging.info(item.score) resultItemRet = SearchResult(uuid=aug.next(), searchQuery=query, searchResultItems=resultArr, metadata=AnnotationMetadata( tool="search", timestamp=int(time.time())), lang="eng") return resultItemRet
def search_questions(host, port, question_texts, k): ''' Given a hostname (or IP) and port of a search service to connect to, a dictionary `question_texts` mapping question IDs to question texts (strings), and the maximum number of hits to retrieve, k, send each question text to the search service and retrieve the results as a SearchResult object. Before passing a question text to the search client (in a query), escape it using `escape_query`. Return a dictionary mapping question IDs to the respective SearchResult objects. Note: k is passed in the SearchQuery; it is also used to truncate SearchResult.searchResultItems afterward (noting that some services do not respect the k parameter). So the length of searchResultItems in the returned SearchResult objects will always be at most k. ''' results = dict() with SearchClientWrapper(host, port) as search_client: for (question_id, question_text) in question_texts.items(): result = search_client.search( SearchQuery(type=SearchType.SENTENCES, rawQuery=escape_query(question_text), k=k)) result.searchResultItems = result.searchResultItems[:k] results[question_id] = result return results
def test_search_communications(): impl = FooSearch() host = 'localhost' port = find_port() timeout = 5 terms = ['foo', 'bar'] query = SearchQuery(type=SearchType.COMMUNICATIONS, terms=[t for t in terms]) with SubprocessSearchServiceWrapper(impl, host, port, timeout=timeout): transport = TSocket.TSocket(host, port) transport = TTransport.TFramedTransport(transport) protocol = TCompactProtocol.TCompactProtocol(transport) cli = Search.Client(protocol) transport.open() res = cli.search(query) transport.close() assert res.uuid.uuidString == '12345678-1234-5678-1234-567812345678' assert len(res.searchResults) == 2 assert res.searchResults[0].communicationId == 'foo' assert res.searchResults[0].score == 42. assert res.searchResults[1].communicationId == 'bar' assert res.searchResults[1].score == 42. assert res.metadata.tool == 'Foo Search'
def kscore(s): truth = [] answer_labels = {} with open("dev-match.tsv") as match: reader = csv.reader(match, delimiter="\t", quotechar="'") for row in reader: answer_labels[row[3]] = row[4] with gzip.open("WikiQA-dev.tsv.gz", 'rt') as wiki: reader = csv.reader(wiki, delimiter="\t", quotechar="'") next(reader) used = {} k_val_dict = {1: [0, 0], 10: [0, 0], 100: [0, 0], 1000: [0, 0]} k_vals = [1, 10, 100, 1000] for row in reader: print(row) query = row[1] sentenceID = row[4] # query = query.replace(","," ") # query = query.replace("'"," ") query = query.replace('"', "") query = query.replace("/", " ") query = query.replace("?", "") if query not in used: used[query] = 0 terms = query.split(" ") for k_val in k_vals: query1 = SearchQuery(type=SearchType.SENTENCES, terms=terms, k=k_val, rawQuery=query) results = s.search(query1) atK = 0 totCorrect = 0 hasAnswerInMatch = False for result in results.searchResultItems: if atK == k_val: break else: atK += 1 try: totCorrect += int(answer_labels[ result.sentenceId.uuidString]) hasAnswerInMatch = True except (KeyError): atK -= 1 if totCorrect >= 1: k_val_dict[k_val][0] += 1 if hasAnswerInMatch: k_val_dict[k_val][1] += 1 else: continue print("Baseline success @k") print("1: {}".format(k_val_dict[1][0] / k_val_dict[1][1])) print("10: {}".format(k_val_dict[10][0] / k_val_dict[10][1])) print("100: {}".format(k_val_dict[100][0] / k_val_dict[100][1])) print("1000: {}".format(k_val_dict[1000][0] / k_val_dict[1000][1]))
def search(self, string): string = string.replace('"', "") string = string.replace("/", " ") string = string.replace("?", "") terms = string.split(" ") with SearchClientWrapper("172.18.0.4", 9090) as search_client: handler = SearchHandler(search_client, "wikiQA", "", "") query1 = SearchQuery(type=SearchType.SENTENCES, terms=terms, k=500, rawQuery=string) return handler.search(query1)
def main(): parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description='Interface with a Concrete Search service') parser.add_argument('host', help='Hostname of search service to which to' ' connect.') parser.add_argument('port', type=int, help='Port of search service to which to connect.') parser.add_argument('--http-lookup-url', type=str, help='Look up result communication text from HTTP ' 'service via provided URL template, for ' 'example, http://localhost:3000/comm/id/%%s') parser.add_argument('--user-id', type=str, help='user id to send to search service') concrete.version.add_argparse_argument(parser) ns = parser.parse_args() out_f = codecs.getwriter('utf-8')(sys.stdout) with SearchClientWrapper(ns.host, ns.port) as client: while True: try: line = raw_input('> ').strip().decode('utf-8') except EOFError: print break if line: terms = line.split() query = SearchQuery(terms=terms, type=SearchType.COMMUNICATIONS, userId=ns.user_id) results = client.search(query) for result in results.searchResults: if ns.http_lookup_url: out_f.write( requests.get(ns.http_lookup_url % result.communicationId).text + u'\n') else: out_f.write(result.communicationId + u'\n')
def execute_search_query(search_client, terms, k): logging.debug("executing query '{}'".format(u' '.join(terms))) query = SearchQuery(type=SearchType.COMMUNICATIONS, terms=terms, k=k) result = search_client.search(query) return [(item.communicationId, item.score) for item in result.searchResultItems]