class IRSearch(object): def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig('path', 'xml_path') self.index_name = self.conf.getConfig('search', 'index_name') self.doc_type = self.conf.getConfig('search', 'doc_type') self.es = Elasticsearch() self.search_body = {} self.search_type_support = ['match_all', 'term', 'terms', 'match', 'multi_match', 'bool', 'range', 'prefix', 'wildcard'] self.search_type_model = self.conf.getSeachModel() self.conn = ES('127.0.0.1:9200') self.search_result = None self.conn.default_indices = [self.index_name] def makeQuery(self, searchtype, searchfield, keyword, is_sort=False, is_aggs=False, is_multi_match=False, use_bool=""): if searchtype not in self.search_type_support: print('Ops, your search type is not supported') print('Supported search types:\n') print(self.search_type_support) return self.search_body = self.search_type_model[searchtype] if is_multi_match: self.search_body["query"][searchtype] = { "query": keyword, "fields": searchfield } elif use_bool: self.search_body["query"][searchtype][use_bool] = [{ "term": { searchfield: keyword } }] else: self.search_body["query"][searchtype][searchfield] = keyword print(self.search_body) return self.search_body # I don't know what I am doing because I'm an idiot. def Query(self, searchtype, searchfield, keyword, is_sort=False, is_aggs=False, is_multi_match=False, use_bool=""): query_body = self.makeQuery( searchtype, searchfield, keyword, is_sort, is_aggs, is_multi_match, use_bool) result = self.es.search(index=self.index_name, doc_type=self.doc_type, body=query_body) return result def querySingle(self, searchfield, keyword): q = TermQuery(searchfield, keyword) self.search_result = self.conn.search(query=q)
class Test(object): def __init__(self): self.conf = Conf() self.xml_path = self.conf.getConfig("path", "xml_path") self.index_name = self.conf.getConfig("search", "index_name") self.doc_type = self.conf.getConfig("search", "doc_type") self.es = Elasticsearch(timeout=30, max_retries=10, retry_on_timeout=True) self.search_body = {} self.search_type_support = [ "match_all", "term", "terms", "match", "multi_match", "bool", "range", "prefix", "wildcard", ] self.search_type_model = self.conf.getSeachModel() def getCount(self): print(self.es.count(index=self.index_name, doc_type=self.doc_type)) def searchSingle(self): res = self.es.search( index=self.index_name, doc_type=self.doc_type, body={ "query": {"match": {"id_info": "NCT02065063"}}, "size": 10000, }, ) # body={"query": {"match": {"detailed_description": "carcinoma"}}}, # body={"query": {"match": {"id_info": "NCT00001431"}}}, for r in res["hits"]["hits"]: print(r["_source"]) with open("carcinoma", 'a') as f: f.write("{}\n".format(r["_source"]["id_info"])) def getPickles(self, pickle_path): with open(pickle_path, 'rb') as pf: data = pickle.load(pf) # pprint.pprint(data) return data