def collect_todays_tweets(entry):
    """Collects todays tweets for every topic."""
    count_word_frequency = Counter()
    word_counter = Counter()
    hour_break_dict = {}
    if ("-latest") not in entry:
        if ("median") not in entry:
            # we frst need to collect all todays tweets
            entry_total = elastic_utils.last_id(entry)
            if elastic_utils.check_index_exists(entry + "-latest") is True:
                total = elastic_utils.last_id(entry + "-latest")
                day_res = elastic_utils.iterate_search(entry + "-latest",
                                                       query={
                                                           "query": {
                                                               "match_all": {}
                                                           },
                                                           "sort": [{
                                                               "last_time": {
                                                                   "order":
                                                                   "desc"
                                                               }
                                                           }]
                                                       })
                for test in day_res:
                    time_of_tweet = test["_source"]["created"]
                    datetime_object = datetime.strptime(
                        time_of_tweet, '%Y-%m-%d %H:%M:%S')
                    dateobj = datetime_object.strftime("%Y-%m-%d")
                    created_at = datetime_object.strftime("%Y-%m-%dT%H:%M:%S")
                    count_word_frequency.update(str(datetime_object.hour))
                    if str(datetime_object.hour) in hour_break_dict:
                        hour_break_dict[str(datetime_object.hour)] += 1
                    else:
                        hour_break_dict[str(datetime_object.hour)] = 1

                    words = preprocessor.filter_multiple(str(
                        test["_source"]["text"]),
                                                         ats=True,
                                                         hashtags=True,
                                                         stopwords=True,
                                                         stemming=False,
                                                         urls=True,
                                                         singles=True)
                    terms_all = [term for term in words]
                    word_counter.update(terms_all)
                    freq_obj = {
                        "hour_breakdown": hour_break_dict,
                        "words": json.dumps(word_counter.most_common(400)),
                        "total": total,
                        "date": dateobj,
                        "last_time": created_at
                    }
                    elastic_utils.add_entry(entry, entry_total + 1, freq_obj)
                    elastic_utils.delete_index(entry + "-latest")
                try:
                    elastic_utils.create_index(entry + "-latest")
                except:
                    print(
                        "Todays index already exists! This is an exception, but it's probably ok"
                    )
Esempio n. 2
0
 def test_list_all_index(self):
     es.create_index("list_all")
     res = es.list_all_indexes()
     self.assertIn("\'test\'" , res)
     self.assertIn("\'list_all\'", res)
     es.delete_index("list_all")
     es.delete_index("test")
     self.assertIn("{}", res)
 def test_list_all_index(self):
     es.create_index("list_all")
     res = es.list_all_indexes()
     time.sleep(1)
     self.assertIn('test', res)
     self.assertIn('list_all', res)
     es.delete_index("list_all")
     es.delete_index("test")
     time.sleep(1)
 def test_search_index(self):
     # Add entry first along with the index
     doc = {"name": "test"}
     es.create_index("searching")
     es.add_entry(index_name="searching", id=1, body=doc)
     time.sleep(1)
     res = es.search_index(index_name="searching")
     print(res['hits']['hits'][0]['_source'])
     self.assertIn('test', res['hits']['hits'][0]['_source']['name'])
     es.delete_index("searching")
Esempio n. 5
0
def twittercat_delete(
        request,
        pk,
        template_name='fyp/Category/twittercat_confirm_delete.html'):
    book = get_object_or_404(TwitterCat, pk=pk)
    if request.method == 'POST':
        topic = book.category_name + "-latest"
        book.delete()
        elastic_utils.delete_index(topic)
        return redirect('fyp_webapp:twittercat_list')
    return render(request, template_name, {'object': book})
 def tearDown(self):
     es.delete_index("test")
 def tearDown(self):
     time.sleep(1)
     es.delete_index("test")
 def test_delete_index(self):
     self.assertIn("\'acknowledged\': True", es.delete_index("test"))
 def test_create_index(self):
     self.assertIn(" \'index\': \'testcase\'",
                   es.create_index("testcase"))  #An index is made
     time.sleep(1)
     es.delete_index("testcase")