Esempio n. 1
0
def createTableRows(filters):
    """
    Takes in filters from the frontend and creates a query that elasticsearch can use

    Args: filters with the fields below (more used in helpers/filters.py)
    sort - what field the table should be sorted on
    order - if the sort is asc or desc
    size - size of the elasticsearch query
    from - what document in the db to start with. This is used when scrolling to add more documentsz to the search
    Yeilds:
        An array of filters to be used in the bool field of elasticsearch query
    """
    query = createQueryFilters(filters)

    body = {'query': query}
    if filters['sort'] != 'id':
        body['sort'] = [{
            filters['sort']: {
                "order": filters["direction"]
            }
        }, "_score"]
    body['size'] = filters['size'] * 2
    body['from'] = filters['from']

    #this is added for the search to create the fields that make it so the text is highlighted when returned
    if 'search' in filters and filters['search']:

        query['bool']['must'].append(
            {"match": {
                "is_retweet": {
                    "query": False
                }
            }})

        body['highlight'] = {
            "pre_tags": ["<mark><b>"],
            "post_tags": ["</b></mark>"],
            "fragment_size": 500,
            "fields": {
                "full_text_trans": {
                    "highlight_query": {
                        "bool": {
                            "must": [{
                                "match": {
                                    "full_text_trans": {
                                        "query": filters['search']
                                    }
                                }
                            }]
                        }
                    }
                }
            }
        }

    sys.stdout.flush()
    #actaully search the database
    rows = es.search(index='twitter', body=body)

    return rows['hits']
Esempio n. 2
0
def wordCloud():
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "my_sample": {
                "sampler": {
                    "shard_size": 50000
                },
                "aggregations": {
                    "wordCloud": {
                        "significant_text": {
                            "size": 100,
                            "field": "full_text_processed",
                        }
                    }
                }
            }
        }
    }
    res = es.search(index='twitter', body=body)

    wordCloud = [{
        'text': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['my_sample']['wordCloud']['buckets']]

    return {'wordCloud': wordCloud}, 200
Esempio n. 3
0
def createTableRows(filters):

    query = createQueryFilters(filters)

    body = {'query': query}
    if filters['sort'] != 'id':
        body['sort'] = [{
            filters['sort']: {
                "order": filters["direction"]
            }
        }, "_score"]
    body['size'] = filters['size'] * 2
    body['from'] = filters['from']

    if 'search' in filters and filters['search']:

        query['bool']['must'].append(
            {"match": {
                "is_retweet": {
                    "query": False
                }
            }})

        body['highlight'] = {
            "pre_tags": ["<mark><b>"],
            "post_tags": ["</b></mark>"],
            "fragment_size": 500,
            "fields": {
                "full_text_trans": {
                    "highlight_query": {
                        "bool": {
                            "must": [{
                                "match": {
                                    "full_text_trans": {
                                        "query": filters['search']
                                    }
                                }
                            }]
                        }
                    }
                }
            }
        }

    sys.stdout.flush()
    rows = es.search(index='twitter', body=body)

    return rows['hits']
Esempio n. 4
0
def wordCloud():
    """ 
    Takes in filters and returns data for visualization of the world cloud 

    POST Body
        check helpers/filters for all the filters used in the code 

    Yields
        wordCloud - Data in format for the wordcloud
    """
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "my_sample": {
                "sampler": {
                    "shard_size": 50000
                },
                "aggregations": {
                    "wordCloud": {
                        "significant_text": {
                            "size": 100,
                            "field": "full_text_processed",
                        }
                    }
                }
            }
        }
    }
    res = es.search(index='twitter', body=body)

    wordCloud = [{
        'text': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['my_sample']['wordCloud']['buckets']]

    return {'wordCloud': wordCloud}, 200
Esempio n. 5
0
def dashboard():
    """ 
    Takes in filters and returns data for visualizations

    POST Body
        check helpers/filters for all the filters used in the code 

    Yields
        langPie - Format of data for langauge pie chart
        povPie - Format of data for point of views pie chart
        topicsBar - Bar chart of top topics
        topics - Creates a line chart of the topics for each day

        avgSent - Card for average sentiment
        uniqueUsers - Number of unique users
        tweetCount - Total tweet count
        rewtweetCount - Total rewtweets
    """
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "langPie": {
                "terms": {
                    "field": "lang.keyword"
                }
            },
            "povPie": {
                "terms": {
                    "field": "pov.keyword"
                }
            },
            "topicsBar": {
                "terms": {
                    "field": "topics.keyword"
                }
            },
            "avgSent": {
                "avg": {
                    "field": "sentiment"
                }
            },
            "uniqueUsers": {
                "cardinality": {
                    "field": "user_id"
                }
            },
            "rewtweetCount": {
                "terms": {
                    "field": "is_retweet"
                }
            },
            "topics": {
                "terms": {
                    "field": "topics.keyword",
                    "size": 10
                },
                "aggs": {
                    "dates": {
                        "histogram": {
                            "field": "tweet_created_at",
                            "interval":
                            86400000  #this is a day #259200000 #this is 3 days # this is a week #604800000
                        }
                    }
                }
            }
        }
    }

    res = es.search(index='twitter', body=body)
    count = es.count(index='twitter', body={'query': query})

    topicLine = []
    for topic in res['aggregations']['topics']['buckets']:
        topicLine.append({
            "id":
            topic['key'],
            'data': [{
                'x':
                datetime.fromtimestamp(date['key'] /
                                       1000).strftime('%m/%d/%Y'),
                'y':
                date['doc_count']
            } for date in topic['dates']['buckets']]
        })

    povPie = [{
        'id': x['key'],
        'label': x['key'].capitalize(),
        'value': x['doc_count']
    } for x in res['aggregations']['povPie']['buckets']]
    langPie = [{
        'id': x['key'],
        'label': x['key'].upper(),
        'value': x['doc_count']
    } for x in res['aggregations']['langPie']['buckets']]
    topicsBar = [{
        'topic': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['topicsBar']['buckets']]

    if res['aggregations']['rewtweetCount']['buckets'][0]['key'] == 1:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][0][
            'doc_count']
    elif len(res['aggregations']['rewtweetCount']['buckets']) == 1:
        retweetCount = 0
    else:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][1][
            'doc_count']

    return {
        'povPie': povPie,
        'topicLine': topicLine,
        'langPie': langPie,
        'topicsBar': topicsBar,
        'avgSent': res['aggregations']['avgSent']['value'],
        'uniqueUsers': res['aggregations']['uniqueUsers']['value'],
        'rewtweetCount': retweetCount,
        'tweetCount': count['count']
    }, 200
Esempio n. 6
0
def dashboard():
    print(request.json)
    sys.stdout.flush()

    query = createQueryFilters(request.json)
    body = {
        'query': query,
        "aggregations": {
            "langPie": {
                "terms": {
                    "field": "lang.keyword"
                }
            },
            "povPie": {
                "terms": {
                    "field": "pov.keyword"
                }
            },
            "topicsBar": {
                "terms": {
                    "field": "topics.keyword"
                }
            },
            "avgSent": {
                "avg": {
                    "field": "sentiment"
                }
            },
            "uniqueUsers": {
                "cardinality": {
                    "field": "user_id"
                }
            },
            "rewtweetCount": {
                "terms": {
                    "field": "is_retweet"
                }
            },
            "topics": {
                "terms": {
                    "field": "topics.keyword",
                    "size": 10
                },
                "aggs": {
                    "dates": {
                        "histogram": {
                            "field": "tweet_created_at",
                            "interval":
                            86400000  #this is a day #259200000 #this is 3 days # this is a week604800000
                        }
                    }
                }
            }
        }
    }

    res = es.search(index='twitter', body=body)
    count = es.count(index='twitter', body={'query': query})

    topicLine = []
    for topic in res['aggregations']['topics']['buckets']:
        topicLine.append({
            "id":
            topic['key'],
            'data': [{
                'x':
                datetime.fromtimestamp(date['key'] /
                                       1000).strftime('%m/%d/%Y'),
                'y':
                date['doc_count']
            } for date in topic['dates']['buckets']]
        })

    povPie = [{
        'id': x['key'],
        'label': x['key'].capitalize(),
        'value': x['doc_count']
    } for x in res['aggregations']['povPie']['buckets']]
    langPie = [{
        'id': x['key'],
        'label': x['key'].upper(),
        'value': x['doc_count']
    } for x in res['aggregations']['langPie']['buckets']]
    topicsBar = [{
        'topic': x['key'],
        'value': x['doc_count']
    } for x in res['aggregations']['topicsBar']['buckets']]

    if res['aggregations']['rewtweetCount']['buckets'][0]['key'] == 1:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][0][
            'doc_count']
    elif len(res['aggregations']['rewtweetCount']['buckets']) == 1:
        retweetCount = 0
    else:
        retweetCount = res['aggregations']['rewtweetCount']['buckets'][1][
            'doc_count']

    return {
        'povPie': povPie,
        'topicLine': topicLine,
        'langPie': langPie,
        'topicsBar': topicsBar,
        'avgSent': res['aggregations']['avgSent']['value'],
        'uniqueUsers': res['aggregations']['uniqueUsers']['value'],
        'rewtweetCount': retweetCount,
        'tweetCount': count['count']
    }, 200