def wordCloud(): print(request.json) sys.stdout.flush() query = createQueryFilters(request.json) body = { 'query': query, "aggregations": { "my_sample": { "sampler": { "shard_size": 50000 }, "aggregations": { "wordCloud": { "significant_text": { "size": 100, "field": "full_text_processed", } } } } } } res = es.search(index='twitter', body=body) wordCloud = [{ 'text': x['key'], 'value': x['doc_count'] } for x in res['aggregations']['my_sample']['wordCloud']['buckets']] return {'wordCloud': wordCloud}, 200
def createTableRows(filters): """ Takes in filters from the frontend and creates a query that elasticsearch can use Args: filters with the fields below (more used in helpers/filters.py) sort - what field the table should be sorted on order - if the sort is asc or desc size - size of the elasticsearch query from - what document in the db to start with. This is used when scrolling to add more documentsz to the search Yeilds: An array of filters to be used in the bool field of elasticsearch query """ query = createQueryFilters(filters) body = {'query': query} if filters['sort'] != 'id': body['sort'] = [{ filters['sort']: { "order": filters["direction"] } }, "_score"] body['size'] = filters['size'] * 2 body['from'] = filters['from'] #this is added for the search to create the fields that make it so the text is highlighted when returned if 'search' in filters and filters['search']: query['bool']['must'].append( {"match": { "is_retweet": { "query": False } }}) body['highlight'] = { "pre_tags": ["<mark><b>"], "post_tags": ["</b></mark>"], "fragment_size": 500, "fields": { "full_text_trans": { "highlight_query": { "bool": { "must": [{ "match": { "full_text_trans": { "query": filters['search'] } } }] } } } } } sys.stdout.flush() #actaully search the database rows = es.search(index='twitter', body=body) return rows['hits']
def terms(): terms = es.search(index='artesanato', doc_type='product', body={ 'size': 0, 'aggregations': { 'titles': { 'terms': { 'field': 'tags' } } } }) return json.dumps(terms['aggregations']['titles']['buckets'])
def createTableRows(filters): query = createQueryFilters(filters) body = {'query': query} if filters['sort'] != 'id': body['sort'] = [{ filters['sort']: { "order": filters["direction"] } }, "_score"] body['size'] = filters['size'] * 2 body['from'] = filters['from'] if 'search' in filters and filters['search']: query['bool']['must'].append( {"match": { "is_retweet": { "query": False } }}) body['highlight'] = { "pre_tags": ["<mark><b>"], "post_tags": ["</b></mark>"], "fragment_size": 500, "fields": { "full_text_trans": { "highlight_query": { "bool": { "must": [{ "match": { "full_text_trans": { "query": filters['search'] } } }] } } } } } sys.stdout.flush() rows = es.search(index='twitter', body=body) return rows['hits']
def resolve_products(self, args, context, info): filter_query = context.args.get('filter', '') search = { 'index': 'artesanato', 'doc_type': 'product', } if filter_query: search['q'] = filter_query products = es.search(**search) return [ formatProduct(product) for product in products['hits']['hits'] ]
def wordCloud(): """ Takes in filters and returns data for visualization of the world cloud POST Body check helpers/filters for all the filters used in the code Yields wordCloud - Data in format for the wordcloud """ print(request.json) sys.stdout.flush() query = createQueryFilters(request.json) body = { 'query': query, "aggregations": { "my_sample": { "sampler": { "shard_size": 50000 }, "aggregations": { "wordCloud": { "significant_text": { "size": 100, "field": "full_text_processed", } } } } } } res = es.search(index='twitter', body=body) wordCloud = [{ 'text': x['key'], 'value': x['doc_count'] } for x in res['aggregations']['my_sample']['wordCloud']['buckets']] return {'wordCloud': wordCloud}, 200
def dashboard(): """ Takes in filters and returns data for visualizations POST Body check helpers/filters for all the filters used in the code Yields langPie - Format of data for langauge pie chart povPie - Format of data for point of views pie chart topicsBar - Bar chart of top topics topics - Creates a line chart of the topics for each day avgSent - Card for average sentiment uniqueUsers - Number of unique users tweetCount - Total tweet count rewtweetCount - Total rewtweets """ print(request.json) sys.stdout.flush() query = createQueryFilters(request.json) body = { 'query': query, "aggregations": { "langPie": { "terms": { "field": "lang.keyword" } }, "povPie": { "terms": { "field": "pov.keyword" } }, "topicsBar": { "terms": { "field": "topics.keyword" } }, "avgSent": { "avg": { "field": "sentiment" } }, "uniqueUsers": { "cardinality": { "field": "user_id" } }, "rewtweetCount": { "terms": { "field": "is_retweet" } }, "topics": { "terms": { "field": "topics.keyword", "size": 10 }, "aggs": { "dates": { "histogram": { "field": "tweet_created_at", "interval": 86400000 #this is a day #259200000 #this is 3 days # this is a week #604800000 } } } } } } res = es.search(index='twitter', body=body) count = es.count(index='twitter', body={'query': query}) topicLine = [] for topic in res['aggregations']['topics']['buckets']: topicLine.append({ "id": topic['key'], 'data': [{ 'x': datetime.fromtimestamp(date['key'] / 1000).strftime('%m/%d/%Y'), 'y': date['doc_count'] } for date in topic['dates']['buckets']] }) povPie = [{ 'id': x['key'], 'label': x['key'].capitalize(), 'value': x['doc_count'] } for x in res['aggregations']['povPie']['buckets']] langPie = [{ 'id': x['key'], 'label': x['key'].upper(), 'value': x['doc_count'] } for x in res['aggregations']['langPie']['buckets']] topicsBar = [{ 'topic': x['key'], 'value': x['doc_count'] } for x in res['aggregations']['topicsBar']['buckets']] if res['aggregations']['rewtweetCount']['buckets'][0]['key'] == 1: retweetCount = res['aggregations']['rewtweetCount']['buckets'][0][ 'doc_count'] elif len(res['aggregations']['rewtweetCount']['buckets']) == 1: retweetCount = 0 else: retweetCount = res['aggregations']['rewtweetCount']['buckets'][1][ 'doc_count'] return { 'povPie': povPie, 'topicLine': topicLine, 'langPie': langPie, 'topicsBar': topicsBar, 'avgSent': res['aggregations']['avgSent']['value'], 'uniqueUsers': res['aggregations']['uniqueUsers']['value'], 'rewtweetCount': retweetCount, 'tweetCount': count['count'] }, 200
def createNetwork(userID): res = es.get(index="users", id=userID) ids = [] links = [] results = res['_source']['edges'] if len(results) > 300: results = results[0:300] for edge in results: ids.append(edge["target"]) links.append({ 'source': userID, 'target': edge['target'], 'distance': edge['weighted_dist'] }) nodeIDs = [userID] nodes = [{ 'id': userID, 'name': res['_source']['name'], "radius": 12, "depth": 0, "color": "rgb(244, 117, 96)" }] body = { 'query': { 'ids': { 'type': '_doc', 'values': ids } }, 'size': len(ids) } res = es.search(index='users', body=body) edges = [] for user in res['hits']['hits']: if int(user['_id']) not in nodeIDs: nodes.append({ 'id': int(user['_id']), 'name': user['_source']['name'], 'radius': 8, 'depth': 1, 'color': 'rgb(97, 205, 187)' }) nodeIDs.append(int(user['_id'])) edges = edges + user['_source']['edges'] for edge in edges: if edge['target'] not in nodeIDs: nodes.append({ 'id': edge['target'], 'name': '', 'radius': 4, 'depth': 2, 'color': 'rgb(232, 193, 160)' }) nodeIDs.append(edge['target']) links.append({ 'source': edge['source'], 'target': edge['target'], 'distance': edge['weighted_dist'] }) nodeID = [n['id'] for n in nodes] edgeID = [e['target'] for e in links] print(len(nodeID), len(list(set(nodeID)))) sys.stdout.flush() return nodes, links
def dashboard(): print(request.json) sys.stdout.flush() query = createQueryFilters(request.json) body = { 'query': query, "aggregations": { "langPie": { "terms": { "field": "lang.keyword" } }, "povPie": { "terms": { "field": "pov.keyword" } }, "topicsBar": { "terms": { "field": "topics.keyword" } }, "avgSent": { "avg": { "field": "sentiment" } }, "uniqueUsers": { "cardinality": { "field": "user_id" } }, "rewtweetCount": { "terms": { "field": "is_retweet" } }, "topics": { "terms": { "field": "topics.keyword", "size": 10 }, "aggs": { "dates": { "histogram": { "field": "tweet_created_at", "interval": 86400000 #this is a day #259200000 #this is 3 days # this is a week604800000 } } } } } } res = es.search(index='twitter', body=body) count = es.count(index='twitter', body={'query': query}) topicLine = [] for topic in res['aggregations']['topics']['buckets']: topicLine.append({ "id": topic['key'], 'data': [{ 'x': datetime.fromtimestamp(date['key'] / 1000).strftime('%m/%d/%Y'), 'y': date['doc_count'] } for date in topic['dates']['buckets']] }) povPie = [{ 'id': x['key'], 'label': x['key'].capitalize(), 'value': x['doc_count'] } for x in res['aggregations']['povPie']['buckets']] langPie = [{ 'id': x['key'], 'label': x['key'].upper(), 'value': x['doc_count'] } for x in res['aggregations']['langPie']['buckets']] topicsBar = [{ 'topic': x['key'], 'value': x['doc_count'] } for x in res['aggregations']['topicsBar']['buckets']] if res['aggregations']['rewtweetCount']['buckets'][0]['key'] == 1: retweetCount = res['aggregations']['rewtweetCount']['buckets'][0][ 'doc_count'] elif len(res['aggregations']['rewtweetCount']['buckets']) == 1: retweetCount = 0 else: retweetCount = res['aggregations']['rewtweetCount']['buckets'][1][ 'doc_count'] return { 'povPie': povPie, 'topicLine': topicLine, 'langPie': langPie, 'topicsBar': topicsBar, 'avgSent': res['aggregations']['avgSent']['value'], 'uniqueUsers': res['aggregations']['uniqueUsers']['value'], 'rewtweetCount': retweetCount, 'tweetCount': count['count'] }, 200
def createNetwork(userID): """ Creates the nodes and links from the users database Args: userid of center node """ #gets the user of the center res = es.get(index="users", id=userID) ids = [] links = [] results = res['_source']['edges'] #limit the amount of nodes otherwise this can crash user browsers if len(results) > 300: results = results[0:300] #go through all the 1st level nodes and create links while keeping track of users seen for edge in results: ids.append(edge["target"]) links.append({ 'source': userID, 'target': edge['target'], 'distance': edge['weighted_dist'] }) nodeIDs = [userID] nodes = [{ 'id': userID, 'name': res['_source']['name'], "radius": 12, "depth": 0, "color": "rgb(244, 117, 96)" }] #query the database now for information on all level 1 nodes body = { 'query': { 'ids': { 'type': '_doc', 'values': ids } }, 'size': len(ids) } res = es.search(index='users', body=body) edges = [] #go through and add data for all level 1 nodes for user in res['hits']['hits']: if int(user['_id']) not in nodeIDs: nodes.append({ 'id': int(user['_id']), 'name': user['_source']['name'], 'radius': 8, 'depth': 1, 'color': 'rgb(97, 205, 187)' }) nodeIDs.append(int(user['_id'])) edges = edges + user['_source']['edges'] #go through now and for each level 2 node create the nodes and the links if its not already added for edge in edges: if edge['target'] not in nodeIDs: nodes.append({ 'id': edge['target'], 'name': '', 'radius': 4, 'depth': 2, 'color': 'rgb(232, 193, 160)' }) nodeIDs.append(edge['target']) links.append({ 'source': edge['source'], 'target': edge['target'], 'distance': edge['weighted_dist'] }) nodeID = [n['id'] for n in nodes] edgeID = [e['target'] for e in links] print(len(nodeID), len(list(set(nodeID)))) sys.stdout.flush() return nodes, links