Exemple #1
0
def search_candidate(token):
    entities = None
    if entity_dict.__contains__(token):
        entities = entity_dict[token]
    else:
        entities = search(ELASTICSEARCH, token).items()
        entity_dict[token] = entities
    return entities
Exemple #2
0
def search_candidate(token):
    entities = None
    if entity_dict.__contains__(token) and isEnglish(token):
        entities = entity_dict[token]
    else:
        entities = search(ELASTICSEARCH, token).items()
        entity_dict[token] = entities
    return entities  #entity,id,score,label
Exemple #3
0
def search(key, term):
    """Simple Elasticsearch Query"""
    query = json.dumps({
        "query": {
            "match": {
                key: term
            }
        }
    })
    response = es.search(index='elements', body=query)
    print (formatprint(response))
Exemple #4
0
def search(user,
           query,
           topic,
           target,
           tweet_date,
           tweet_date_end,
           page_number,
           page_size,
           bow,
           country='US'):
    start_from = (page_number * page_size) - page_size

    american = [
        'PaulNicklen', 'KevinHart4real', 'ProfBrianCox', 'elonmusk', 'Snowden',
        'jeffjarvis', 'JimCameron'
    ]
    english = ['iamjohnoliver', 'JeremyClarkson', 'JimWhite']

    must = []
    should = []

    if user in american:
        should.append({"term": {"location": 'American'}})
    if user in english:
        should.append({"term": {"location": 'English'}})

    for bow1 in bow:
        should.append({"match": {"text": bow1}})

    if query.startswith('"') and query.endswith('"'):
        query = query.replace('"', '')
        query_body = {
            "size": page_size,
            "from": start_from,
            "query": {
                "function_score": {
                    "query": {
                        "match_phrase": {
                            "text": query
                        }
                    }
                }
            }
        }
    else:
        must.append(
            {"query_string": {
                "query": query,
                "default_field": "text"
            }})

        if topic != "All":
            must.append({"term": {"topic": topic}})

        if target != "":
            must.append({"term": {"user": target}})

        if tweet_date != "":
            must.append({
                "range": {
                    "date": {
                        "gte": tweet_date,
                        "lte": tweet_date_end
                    }
                }
            })

        query_body = {
            "size": page_size,
            "from": start_from,
            "query": {
                "function_score": {
                    "query": {
                        "bool": {
                            "should": should,
                            "must": must,
                        }
                    },
                    "gauss": {
                        "date": {
                            "origin": "2020-01-01 00:00:00",
                            "scale": "30d",
                            "offset": "1d",
                            "decay": 0.3
                        }
                    }
                }
            }
        }
    res = es.search(index='twitter', body=query_body)
    total_page = math.ceil(res['hits']['total']['value'] / page_size)

    return total_page, page_number, res['hits']['hits']
Exemple #5
0
                            'lte': 'now',
                            'time_zone': '-05:00'
                        }
                    }
                }]
            }
        },
        'sort': [{
            'timestamp': 'asc'
        }, {
            '_id': 'asc'
        }]
    }

    http_entries = {}
    for record in elasticsearch.search(query, start):
        flow_id = record['_source']['flow_id']
        if 'hostname' not in record['_source']['http']:
            logging.warning(
                'http record with http.hostname missing (flow_id = %d)',
                flow_id)
            continue
        hostname = record['_source']['http']['hostname']

        if flow_id in http_entries:
            assert http_entries[flow_id]['hostname'] == hostname
        else:
            flow_record = elasticsearch.flow(flow_id, start)
            if flow_record is None:
                logging.warning('flow record with flow_id = %d missing',
                                flow_id)