Python searchの例、database.es.search Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ctrlr_search.py プロジェクト: kanav-mehra/solve-iwmi

def wordCloud():
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "my_sample": {
                "sampler": {
                    "shard_size": 50000
                },
                "aggregations": {
                    "wordCloud": {
                        "significant_text": {
                            "size": 100,
                            "field": "full_text_processed",
                        }
                    }
                }
            }
        }
    }
    res = es.search(index='twitter', body=body)

    wordCloud = [{
        'text': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['my_sample']['wordCloud']['buckets']]

    return {'wordCloud': wordCloud}, 200

コード例 #2

0

ファイルを表示

ファイル: search.py プロジェクト: joaopfonseca/solve-iwmi

def createTableRows(filters):
    """
    Takes in filters from the frontend and creates a query that elasticsearch can use

    Args: filters with the fields below (more used in helpers/filters.py)
    sort - what field the table should be sorted on
    order - if the sort is asc or desc
    size - size of the elasticsearch query
    from - what document in the db to start with. This is used when scrolling to add more documentsz to the search
    Yeilds:
        An array of filters to be used in the bool field of elasticsearch query
    """
    query = createQueryFilters(filters)

    body = {'query': query}
    if filters['sort'] != 'id':
        body['sort'] = [{
            filters['sort']: {
                "order": filters["direction"]
            }
        }, "_score"]
    body['size'] = filters['size'] * 2
    body['from'] = filters['from']

    #this is added for the search to create the fields that make it so the text is highlighted when returned
    if 'search' in filters and filters['search']:

        query['bool']['must'].append(
            {"match": {
                "is_retweet": {
                    "query": False
                }
            }})

        body['highlight'] = {
            "pre_tags": ["<mark><b>"],
            "post_tags": ["</b></mark>"],
            "fragment_size": 500,
            "fields": {
                "full_text_trans": {
                    "highlight_query": {
                        "bool": {
                            "must": [{
                                "match": {
                                    "full_text_trans": {
                                        "query": filters['search']
                                    }
                                }
                            }]
                        }
                    }
                }
            }
        }

    sys.stdout.flush()
    #actaully search the database
    rows = es.search(index='twitter', body=body)

    return rows['hits']

コード例 #3

0

ファイルを表示

def terms():
    terms = es.search(index='artesanato',
                      doc_type='product',
                      body={
                          'size': 0,
                          'aggregations': {
                              'titles': {
                                  'terms': {
                                      'field': 'tags'
                                  }
                              }
                          }
                      })

    return json.dumps(terms['aggregations']['titles']['buckets'])

コード例 #4

0

ファイルを表示

ファイル: search.py プロジェクト: kanav-mehra/solve-iwmi

def createTableRows(filters):

    query = createQueryFilters(filters)

    body = {'query': query}
    if filters['sort'] != 'id':
        body['sort'] = [{
            filters['sort']: {
                "order": filters["direction"]
            }
        }, "_score"]
    body['size'] = filters['size'] * 2
    body['from'] = filters['from']

    if 'search' in filters and filters['search']:

        query['bool']['must'].append(
            {"match": {
                "is_retweet": {
                    "query": False
                }
            }})

        body['highlight'] = {
            "pre_tags": ["<mark><b>"],
            "post_tags": ["</b></mark>"],
            "fragment_size": 500,
            "fields": {
                "full_text_trans": {
                    "highlight_query": {
                        "bool": {
                            "must": [{
                                "match": {
                                    "full_text_trans": {
                                        "query": filters['search']
                                    }
                                }
                            }]
                        }
                    }
                }
            }
        }

    sys.stdout.flush()
    rows = es.search(index='twitter', body=body)

    return rows['hits']

コード例 #5

0

ファイルを表示

  def resolve_products(self, args, context, info):
    filter_query = context.args.get('filter', '')

    search = {
      'index': 'artesanato',
      'doc_type': 'product',
    }

    if filter_query: search['q'] = filter_query

    products = es.search(**search)

    return [
      formatProduct(product)
      for product in products['hits']['hits']
    ]

コード例 #6

0

ファイルを表示

def wordCloud():
    """ 
    Takes in filters and returns data for visualization of the world cloud 

    POST Body
        check helpers/filters for all the filters used in the code 

    Yields
        wordCloud - Data in format for the wordcloud
    """
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "my_sample": {
                "sampler": {
                    "shard_size": 50000
                },
                "aggregations": {
                    "wordCloud": {
                        "significant_text": {
                            "size": 100,
                            "field": "full_text_processed",
                        }
                    }
                }
            }
        }
    }
    res = es.search(index='twitter', body=body)

    wordCloud = [{
        'text': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['my_sample']['wordCloud']['buckets']]

    return {'wordCloud': wordCloud}, 200

コード例 #7

0

ファイルを表示

def dashboard():
    """ 
    Takes in filters and returns data for visualizations

    POST Body
        check helpers/filters for all the filters used in the code 

    Yields
        langPie - Format of data for langauge pie chart
        povPie - Format of data for point of views pie chart
        topicsBar - Bar chart of top topics
        topics - Creates a line chart of the topics for each day

        avgSent - Card for average sentiment
        uniqueUsers - Number of unique users
        tweetCount - Total tweet count
        rewtweetCount - Total rewtweets
    """
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "langPie": {
                "terms": {
                    "field": "lang.keyword"
                }
            },
            "povPie": {
                "terms": {
                    "field": "pov.keyword"
                }
            },
            "topicsBar": {
                "terms": {
                    "field": "topics.keyword"
                }
            },
            "avgSent": {
                "avg": {
                    "field": "sentiment"
                }
            },
            "uniqueUsers": {
                "cardinality": {
                    "field": "user_id"
                }
            },
            "rewtweetCount": {
                "terms": {
                    "field": "is_retweet"
                }
            },
            "topics": {
                "terms": {
                    "field": "topics.keyword",
                    "size": 10
                },
                "aggs": {
                    "dates": {
                        "histogram": {
                            "field": "tweet_created_at",
                            "interval":
                            86400000  #this is a day #259200000 #this is 3 days # this is a week #604800000
                        }
                    }
                }
            }
        }
    }

    res = es.search(index='twitter', body=body)
    count = es.count(index='twitter', body={'query': query})

    topicLine = []
    for topic in res['aggregations']['topics']['buckets']:
        topicLine.append({
            "id":
            topic['key'],
            'data': [{
                'x':
                datetime.fromtimestamp(date['key'] /
                                       1000).strftime('%m/%d/%Y'),
                'y':
                date['doc_count']
            } for date in topic['dates']['buckets']]
        })

    povPie = [{
        'id': x['key'],
        'label': x['key'].capitalize(),
        'value': x['doc_count']
    } for x in res['aggregations']['povPie']['buckets']]
    langPie = [{
        'id': x['key'],
        'label': x['key'].upper(),
        'value': x['doc_count']
    } for x in res['aggregations']['langPie']['buckets']]
    topicsBar = [{
        'topic': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['topicsBar']['buckets']]

    if res['aggregations']['rewtweetCount']['buckets'][0]['key'] == 1:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][0][
            'doc_count']
    elif len(res['aggregations']['rewtweetCount']['buckets']) == 1:
        retweetCount = 0
    else:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][1][
            'doc_count']

    return {
        'povPie': povPie,
        'topicLine': topicLine,
        'langPie': langPie,
        'topicsBar': topicsBar,
        'avgSent': res['aggregations']['avgSent']['value'],
        'uniqueUsers': res['aggregations']['uniqueUsers']['value'],
        'rewtweetCount': retweetCount,
        'tweetCount': count['count']
    }, 200

コード例 #8

0

ファイルを表示

ファイル: network.py プロジェクト: kanav-mehra/solve-iwmi

def createNetwork(userID):
    res = es.get(index="users", id=userID)
    ids = []
    links = []
    results = res['_source']['edges']
    if len(results) > 300:
        results = results[0:300]
    for edge in results:
        ids.append(edge["target"])
        links.append({
            'source': userID,
            'target': edge['target'],
            'distance': edge['weighted_dist']
        })
    nodeIDs = [userID]
    nodes = [{
        'id': userID,
        'name': res['_source']['name'],
        "radius": 12,
        "depth": 0,
        "color": "rgb(244, 117, 96)"
    }]

    body = {
        'query': {
            'ids': {
                'type': '_doc',
                'values': ids
            }
        },
        'size': len(ids)
    }

    res = es.search(index='users', body=body)

    edges = []

    for user in res['hits']['hits']:
        if int(user['_id']) not in nodeIDs:
            nodes.append({
                'id': int(user['_id']),
                'name': user['_source']['name'],
                'radius': 8,
                'depth': 1,
                'color': 'rgb(97, 205, 187)'
            })
            nodeIDs.append(int(user['_id']))
        edges = edges + user['_source']['edges']

    for edge in edges:
        if edge['target'] not in nodeIDs:
            nodes.append({
                'id': edge['target'],
                'name': '',
                'radius': 4,
                'depth': 2,
                'color': 'rgb(232, 193, 160)'
            })
            nodeIDs.append(edge['target'])

        links.append({
            'source': edge['source'],
            'target': edge['target'],
            'distance': edge['weighted_dist']
        })

    nodeID = [n['id'] for n in nodes]
    edgeID = [e['target'] for e in links]
    print(len(nodeID), len(list(set(nodeID))))
    sys.stdout.flush()
    return nodes, links

コード例 #9

0

ファイルを表示

ファイル: ctrlr_search.py プロジェクト: kanav-mehra/solve-iwmi

def dashboard():
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "langPie": {
                "terms": {
                    "field": "lang.keyword"
                }
            },
            "povPie": {
                "terms": {
                    "field": "pov.keyword"
                }
            },
            "topicsBar": {
                "terms": {
                    "field": "topics.keyword"
                }
            },
            "avgSent": {
                "avg": {
                    "field": "sentiment"
                }
            },
            "uniqueUsers": {
                "cardinality": {
                    "field": "user_id"
                }
            },
            "rewtweetCount": {
                "terms": {
                    "field": "is_retweet"
                }
            },
            "topics": {
                "terms": {
                    "field": "topics.keyword",
                    "size": 10
                },
                "aggs": {
                    "dates": {
                        "histogram": {
                            "field": "tweet_created_at",
                            "interval":
                            86400000  #this is a day #259200000 #this is 3 days # this is a week604800000
                        }
                    }
                }
            }
        }
    }

    res = es.search(index='twitter', body=body)
    count = es.count(index='twitter', body={'query': query})

    topicLine = []
    for topic in res['aggregations']['topics']['buckets']:
        topicLine.append({
            "id":
            topic['key'],
            'data': [{
                'x':
                datetime.fromtimestamp(date['key'] /
                                       1000).strftime('%m/%d/%Y'),
                'y':
                date['doc_count']
            } for date in topic['dates']['buckets']]
        })

    povPie = [{
        'id': x['key'],
        'label': x['key'].capitalize(),
        'value': x['doc_count']
    } for x in res['aggregations']['povPie']['buckets']]
    langPie = [{
        'id': x['key'],
        'label': x['key'].upper(),
        'value': x['doc_count']
    } for x in res['aggregations']['langPie']['buckets']]
    topicsBar = [{
        'topic': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['topicsBar']['buckets']]

    if res['aggregations']['rewtweetCount']['buckets'][0]['key'] == 1:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][0][
            'doc_count']
    elif len(res['aggregations']['rewtweetCount']['buckets']) == 1:
        retweetCount = 0
    else:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][1][
            'doc_count']

    return {
        'povPie': povPie,
        'topicLine': topicLine,
        'langPie': langPie,
        'topicsBar': topicsBar,
        'avgSent': res['aggregations']['avgSent']['value'],
        'uniqueUsers': res['aggregations']['uniqueUsers']['value'],
        'rewtweetCount': retweetCount,
        'tweetCount': count['count']
    }, 200

コード例 #10

0

ファイルを表示

ファイル: network.py プロジェクト: joaopfonseca/solve-iwmi

def createNetwork(userID):
    """
    Creates the nodes and links from the users database

    Args:
        userid of center node
    """
    #gets the user of the center
    res = es.get(index="users", id=userID)
    ids = []
    links = []
    results = res['_source']['edges']
    #limit the amount of nodes otherwise this can crash user browsers
    if len(results) > 300:
        results = results[0:300]
    #go through all the 1st level nodes and create links while keeping track of users seen
    for edge in results:
        ids.append(edge["target"])
        links.append({
            'source': userID,
            'target': edge['target'],
            'distance': edge['weighted_dist']
        })
    nodeIDs = [userID]
    nodes = [{
        'id': userID,
        'name': res['_source']['name'],
        "radius": 12,
        "depth": 0,
        "color": "rgb(244, 117, 96)"
    }]
    #query the database now for information on all level 1 nodes
    body = {
        'query': {
            'ids': {
                'type': '_doc',
                'values': ids
            }
        },
        'size': len(ids)
    }

    res = es.search(index='users', body=body)

    edges = []
    #go through and add data for all level 1 nodes
    for user in res['hits']['hits']:
        if int(user['_id']) not in nodeIDs:
            nodes.append({
                'id': int(user['_id']),
                'name': user['_source']['name'],
                'radius': 8,
                'depth': 1,
                'color': 'rgb(97, 205, 187)'
            })
            nodeIDs.append(int(user['_id']))
        edges = edges + user['_source']['edges']
    #go through now and for each level 2 node create the nodes and the links if its not already added
    for edge in edges:
        if edge['target'] not in nodeIDs:
            nodes.append({
                'id': edge['target'],
                'name': '',
                'radius': 4,
                'depth': 2,
                'color': 'rgb(232, 193, 160)'
            })
            nodeIDs.append(edge['target'])

        links.append({
            'source': edge['source'],
            'target': edge['target'],
            'distance': edge['weighted_dist']
        })

    nodeID = [n['id'] for n in nodes]
    edgeID = [e['target'] for e in links]
    print(len(nodeID), len(list(set(nodeID))))
    sys.stdout.flush()
    return nodes, links