def search_candidate(token): entities = None if entity_dict.__contains__(token): entities = entity_dict[token] else: entities = search(ELASTICSEARCH, token).items() entity_dict[token] = entities return entities
def search_candidate(token): entities = None if entity_dict.__contains__(token) and isEnglish(token): entities = entity_dict[token] else: entities = search(ELASTICSEARCH, token).items() entity_dict[token] = entities return entities #entity,id,score,label
def search(key, term): """Simple Elasticsearch Query""" query = json.dumps({ "query": { "match": { key: term } } }) response = es.search(index='elements', body=query) print (formatprint(response))
def search(user, query, topic, target, tweet_date, tweet_date_end, page_number, page_size, bow, country='US'): start_from = (page_number * page_size) - page_size american = [ 'PaulNicklen', 'KevinHart4real', 'ProfBrianCox', 'elonmusk', 'Snowden', 'jeffjarvis', 'JimCameron' ] english = ['iamjohnoliver', 'JeremyClarkson', 'JimWhite'] must = [] should = [] if user in american: should.append({"term": {"location": 'American'}}) if user in english: should.append({"term": {"location": 'English'}}) for bow1 in bow: should.append({"match": {"text": bow1}}) if query.startswith('"') and query.endswith('"'): query = query.replace('"', '') query_body = { "size": page_size, "from": start_from, "query": { "function_score": { "query": { "match_phrase": { "text": query } } } } } else: must.append( {"query_string": { "query": query, "default_field": "text" }}) if topic != "All": must.append({"term": {"topic": topic}}) if target != "": must.append({"term": {"user": target}}) if tweet_date != "": must.append({ "range": { "date": { "gte": tweet_date, "lte": tweet_date_end } } }) query_body = { "size": page_size, "from": start_from, "query": { "function_score": { "query": { "bool": { "should": should, "must": must, } }, "gauss": { "date": { "origin": "2020-01-01 00:00:00", "scale": "30d", "offset": "1d", "decay": 0.3 } } } } } res = es.search(index='twitter', body=query_body) total_page = math.ceil(res['hits']['total']['value'] / page_size) return total_page, page_number, res['hits']['hits']
'lte': 'now', 'time_zone': '-05:00' } } }] } }, 'sort': [{ 'timestamp': 'asc' }, { '_id': 'asc' }] } http_entries = {} for record in elasticsearch.search(query, start): flow_id = record['_source']['flow_id'] if 'hostname' not in record['_source']['http']: logging.warning( 'http record with http.hostname missing (flow_id = %d)', flow_id) continue hostname = record['_source']['http']['hostname'] if flow_id in http_entries: assert http_entries[flow_id]['hostname'] == hostname else: flow_record = elasticsearch.flow(flow_id, start) if flow_record is None: logging.warning('flow record with flow_id = %d missing', flow_id)