def test_search_index(self): # Add entry first along with the index doc = {"name": "test"} es.create_index("searching") es.add_entry(index_name="searching", id=1, body=doc) time.sleep(1) res = es.search_index(index_name="searching") print(res['hits']['hits'][0]['_source']) self.assertIn('test', res['hits']['hits'][0]['_source']['name']) es.delete_index("searching")
def word_cloud(id, topic): """The word cloud task creates a word cloud from the data.""" item = {} category = [] cat = TwitterCat.objects.filter(user_id=id) for entry in cat: entry = preprocessor.preprocess(entry.category_name) entry = preprocessor.porter_stemming(entry) entry = ''.join(c for c in entry if c not in '[]\'') res = (elastic_utils.search_index( topic, query='{"query":{"query_string":{"fields":["text"],"query":"%s*"}}}' % str(entry))) total = res['hits']['total'] item[entry] = total category.append(entry) current_task.update_state(state='PROGRESS', meta={ 'current_categories': category, 'current_results': item }) jsonData = json.dumps(item) return (category, jsonData)
def elastic_info(index_list): """Displays statistics from the topics.""" final_res = [] current_entry = 0 all_entries = [] for entry in index_list: index_dict = {} all_entries.append(entry) index_dict["name"] = {} index_dict["current_entry"] = entry if current_entry is 0: current_task.update_state(state='PROGRESS', meta={ 'current_percentage': 0, "current_entry": entry }) res = elastic_utils.search_index(entry, query={ "query": { "match_all": {} }, "sort": [{ "date": { "order": "desc" } }], "size": 10 }) current_array = [] for current in res["hits"]["hits"]: test = {} test["date"] = current["_source"]["date"] test["total"] = current["_source"]["total"] test["last_collected"] = current["_source"]["last_time"] current_array.append(test) index_dict["name"]["current"] = current_array median_array = [] res_median = elastic_utils.iterate_search(entry + "-median") for median in res_median: med = {} med["day_median"] = median["_source"]["day_median"] med["hour_median"] = median["_source"]["hour_median"] med["minute_median"] = median["_source"]["minute_median"] median_array.append(med) index_dict["name"]["median"] = median_array res_latest = elastic_utils.search_index(entry + "-latest", query={ "query": { "match_all": {} }, "sort": [{ "created.keyword": { "order": "desc" } }], "size": 5 }) latest_array = [] for item in res_latest["hits"]["hits"]: cur_entry = {} cur_entry["created"] = item["_source"]["created"] cur_entry["text"] = item["_source"]["text"] cur_entry["image"] = item["_source"]["profile_picture"] cur_entry["name"] = item["_source"]["name"] latest_array.append(cur_entry) index_dict["name"]["latest"] = latest_array all_entries.append(latest_array) if current_entry is not 0: current_task.update_state( state='PROGRESS', meta={ 'current_percentage': (current_entry / len(index_list)) * 100, 'current_entry': entry, 'final_res': final_res }) current_entry += 1 final_res.append(index_dict) print(len(final_res)) return final_res
#Import the necessary methods from tweetpy library import tensorflow as tf import tweepy import json from textblob import TextBlob from nltk.tokenize import word_tokenize import re import preprocessor import fyp_webapp.config as cfg from fyp_webapp.ElasticSearch import elastic_utils as es res = es.search_index(cfg.twitter_credentials['topic']) if (res['hits']['total'] is None): id = 0 else: id = res['hits']['total'] class StreamListener(tweepy.StreamListener): def on_status(self, status): if hasattr(status, 'retweeted_status'): return #this filters out retweets else: global id id += 1 dict = { "description": str(status.user.description), "loc": str(status.user.location), "text": str(status.text), "coords": str(status.coordinates),
def timeline(request): if request.POST: print(request.POST) answer = request.POST['dropdown'] cat = TwitterCat.objects.filter(category_name=answer) name = "" for mod in cat: res = elastic_utils.iterate_search(index_name=mod.category_name, query={ "size": 20, "query": { "match_all": {} }, "sort": [{ "date": { "order": "desc" } }], }) med = elastic_utils.search_index(index_name=mod.category_name + "-median") name = mod.category_name break else: cat = TwitterCat.objects.filter(user=request.user) name = "" for mod in cat: res = elastic_utils.iterate_search(index_name=mod.category_name, query={ "size": 20, "query": { "match_all": {} }, "sort": [{ "date": { "order": "desc" } }], }) med = elastic_utils.search_index(index_name=mod.category_name + "-median") name = mod.category_name break cat = TwitterCat.objects.filter(user=request.user) data = {} i = 0 for entry in res: temp_data = {} for hour in entry["_source"]["hour_breakdown"]: temp_data[int(hour)] = (entry["_source"]["hour_breakdown"][hour]) data[entry["_source"]["date"]] = temp_data i += 1 if i == 20: break day_median = med["hits"]["hits"][0]["_source"]["day_median"] hour_median = med["hits"]["hits"][0]["_source"]["hour_median"] minute_median = med["hits"]["hits"][0]["_source"]["minute_median"] hour_med_tresh = round(hour_median * 2, 2) minute_med_tresh = round(minute_median * 2, 2) day_med_tresh = round(day_median * 1.5, 2) print(hour_med_tresh) #for entry in res: # temp_data = {} # for hour in entry["_source"]["hour_breakdown"]: # temp_data[int(hour)] = (entry["_source"]["hour_breakdown"][hour]) # data[entry["_source"]["date"]] = temp_data return render( request, "fyp/timeline/index.html", { "data": data, "name": name, "cats": cat, "hour_med_tresh": hour_med_tresh, "minute_med_tresh": minute_med_tresh, "day_med_tresh": day_med_tresh })