Esempio n. 1
0
 def getSimilarities(cls, start_date, end_date):
     """ Get similarities between dates. """
     start_date_at_first_hour = DateUtils.date_at_first_hour(start_date)
     end_date_at_first_hour = DateUtils.date_at_first_hour(
         end_date if end_date else datetime.today())
     return SimilarityDAO().find(start_date_at_first_hour,
                                 end_date_at_first_hour)
Esempio n. 2
0
    def get_init_and_end_dates(date):
        """ Return 10 days ago at 00:00 and yesterday at 23:59"""
        init_date = date - datetime.timedelta(days=11)
        init_first_hour = DateUtils().date_at_first_hour(init_date)

        yesterday = date - datetime.timedelta(days=1)
        yesterday_last_hour = DateUtils().date_at_last_hour(yesterday)

        return init_first_hour, yesterday_last_hour
Esempio n. 3
0
 def analyze(cls, last_day=None):
     """ Run cooccurrence analysis for the last day with all its intervals. """
     param_last_day = last_day
     # Run for previous day
     if not last_day:  # Parameter last_day should be the required day at 00:00:00
         last_day = datetime.combine(
             (datetime.now() - timedelta(days=1)).date(),
             datetime.min.time())
     # Get last day at 23:59:59
     last_day = last_day + timedelta(days=1) - timedelta(
         seconds=1)  # This works because Python's sum is immutable
     # Run for last N days.
     for delta in ConfigurationManager().get_list('cooccurrence_deltas'):
         # Calculate start date from delta
         start_date = datetime.combine(
             (last_day - timedelta(days=int(delta))).date(),
             datetime.min.time())
         # Run cooccurrence analysis
         cls.get_logger().info(
             f'Starting cooccurrence analysis for last {delta} days.')
         cls.analyze_cooccurrence_for_window(start_date, last_day)
         cls.get_logger().info(
             f'Cooccurrence analysis for last {delta} days done.')
     # Run usage analysis as soon as possible
     HashtagUsageService.calculate_topics_hashtag_usage(param_last_day)
     UserTopicService().init_process_with_date(
         DateUtils.today() if not param_last_day else param_last_day +
         timedelta(days=1))
Esempio n. 4
0
 def test_get_all_increases(self):
     # Set Up
     old_date = datetime.strptime("1996-03-15", CSVUtils.DATE_FORMAT)
     self.target.put_increase_for_candidate('test1', 1000, old_date)
     self.target.put_increase_for_candidate('test2', 2400, old_date)
     new_date = datetime.strptime("1901-05-25", CSVUtils.DATE_FORMAT)
     self.target.put_increase_for_candidate('test1', 3000, new_date)
     self.target.put_increase_for_candidate('test2', 4000, new_date)
     # New test
     increases = self.target.get_all_increases()
     assert len(increases) == 2
     test1_increases = increases[0]
     assert len(test1_increases) == 3
     assert test1_increases['date'] == DateUtils.date_to_timestamp(old_date.date())
     assert test1_increases['test1'] == 1000
     assert test1_increases['test2'] == 2400
     test2_increases = increases[1]
     assert len(test2_increases) == 3
     assert test2_increases['date'] == DateUtils.date_to_timestamp(new_date.date())
     assert test2_increases['test1'] == 3000
     assert test2_increases['test2'] == 4000
Esempio n. 5
0
 def get_for_follower_updating(self):
     """ Polls a candidate for updating its follower list. """
     # Lock to avoid concurrency issues when retrieving candidates across threads
     ConcurrencyUtils().acquire_lock('candidate_for_update')
     for candidate in self.candidates:
         # We will only return a candidate if it was not updated today and is not being currently updated
         if candidate not in self.updating_followers and not DateUtils.is_today(
                 candidate.last_updated_followers):
             self.logger.info(
                 f'Returning candidate {candidate.screen_name} for follower retrieval.'
             )
             self.updating_followers.add(candidate)
             # Unlock
             ConcurrencyUtils().release_lock('candidate_for_update')
             return candidate
     # Unlock
     ConcurrencyUtils().release_lock('candidate_for_update')
     raise FollowerUpdatingNotNecessaryError()
Esempio n. 6
0
 def calculate_topics_hashtag_usage(cls, end_date=None):
     """ Calculate the usage of all hashtags in today showable topics and the total usage of the topic itself. """
     supporters = cls.__generate_supporters_map()
     date = DateUtils.today() if not end_date else end_date + timedelta(
         days=1)
     # End time is yesterday at 23:59:59
     end_time = date - timedelta(seconds=1)
     # Run for different intervals of dates
     for delta in ConfigurationManager().get_list(
             'showable_cooccurrence_deltas'):
         # Calculate start date from delta
         start_date = datetime.combine(
             (end_time - timedelta(days=int(delta))).date(),
             datetime.min.time())
         # Calculate data
         cls.get_logger().info(
             f'Starting hashtag usage calculation for {delta} days window.')
         cls.calculate_hashtag_usage(start_date,
                                     end_time,
                                     interval='days',
                                     supporters=supporters)
         # Log finish for time checking
         cls.get_logger().info(
             f'Hashtag usage calculation finished for {delta} days window.')
     # Log finish for time checking
     cls.get_logger().info('Hashtag usage calculation finished.')
     # Once we've analyzed hashtags, topic usage calculations are just additions
     cls.get_logger().info('Starting topic usage calculation.')
     # Run for different intervals of dates
     for delta in ConfigurationManager().get_list(
             'showable_cooccurrence_deltas'):
         # Calculate start date from delta
         start_date = datetime.combine(
             (end_time - timedelta(days=int(delta))).date(),
             datetime.min.time())
         # Calculate data
         cls.get_logger().info(
             f'Starting topic usage calculation for {delta} days window.')
         cls.calculate_topic_usage(start_date, end_time, interval='days')
         # Log finish for time checking
         cls.get_logger().info(
             f'Topic usage calculation finished for {delta} days window.')
     # Log finish for time checking
     cls.get_logger().info('Topic usage calculation finished.')
Esempio n. 7
0
 def find_topic(cls, topic_id, start_date, end_date=None):
     """ Find showable graph for time window and topic_id. """
     # Parse end date to match database values
     end_date = DateUtils.last_second_of_day(end_date if end_date else start_date)
     # Retrieve topic graph
     graph = ShowableGraphDAO().find(topic_id, start_date, end_date)
     # Normalize node size
     nodes = graph['nodes']
     sizes = list(map(lambda node: node['size'], nodes))
     max_size = max(sizes)
     # Normalize to a (0,1] vector
     for node in nodes:
         node['size'] = (node['size'] / max_size)
     # Subtract minimum value to get effective [0,1) vector and transform to wanted interval
     sizes = list(map(lambda node: node['size'], nodes))
     min_size = min(sizes)
     max_size = max(sizes) - min_size
     for node in nodes:
         node['size'] = ((node['size'] - min_size)/max_size)*(cls.MAX_SIZE - cls.MIN_SIZE) + cls.MIN_SIZE
     return graph
Esempio n. 8
0
 def __make_date_dict(key, value):
     """ Create a dictionary adding first entry {'date': key} and flatten value, which is a map. """
     dictionary = {'date': DateUtils.date_to_timestamp(key)}
     for screen_name, count in value.items():
         dictionary[screen_name] = count
     return dictionary
Esempio n. 9
0
 def store(self, data):
     # Generate document
     data['date'] = DateUtils.today()
     # Store document
     self.collection.insert_one(data)
Esempio n. 10
0
 def find_topic(cls, topic_id, start_date, end_date):
     end_date = DateUtils.last_second_of_day(end_date if end_date else start_date)
     document = TopicUsageDAO().find(topic_id, start_date, end_date)
     tweet_id = HashtagDAO().first_known_usage_tweet_id(topic_id)
     return HashtagUsageResponseMapper.map_one(document, tweet_id)
Esempio n. 11
0
 def find_hashtag(cls, hashtag_name, start_date, end_date):
     # Parse end date to match database values
     end_date = DateUtils.last_second_of_day(end_date if end_date else start_date)
     document = HashtagUsageDAO().find(hashtag_name, start_date, end_date)
     tweet_id = HashtagDAO().first_known_usage_tweet_id(hashtag_name)
     return HashtagUsageResponseMapper.map_one(document, tweet_id)