def getSimilarities(cls, start_date, end_date): """ Get similarities between dates. """ start_date_at_first_hour = DateUtils.date_at_first_hour(start_date) end_date_at_first_hour = DateUtils.date_at_first_hour( end_date if end_date else datetime.today()) return SimilarityDAO().find(start_date_at_first_hour, end_date_at_first_hour)
def get_init_and_end_dates(date): """ Return 10 days ago at 00:00 and yesterday at 23:59""" init_date = date - datetime.timedelta(days=11) init_first_hour = DateUtils().date_at_first_hour(init_date) yesterday = date - datetime.timedelta(days=1) yesterday_last_hour = DateUtils().date_at_last_hour(yesterday) return init_first_hour, yesterday_last_hour
def analyze(cls, last_day=None): """ Run cooccurrence analysis for the last day with all its intervals. """ param_last_day = last_day # Run for previous day if not last_day: # Parameter last_day should be the required day at 00:00:00 last_day = datetime.combine( (datetime.now() - timedelta(days=1)).date(), datetime.min.time()) # Get last day at 23:59:59 last_day = last_day + timedelta(days=1) - timedelta( seconds=1) # This works because Python's sum is immutable # Run for last N days. for delta in ConfigurationManager().get_list('cooccurrence_deltas'): # Calculate start date from delta start_date = datetime.combine( (last_day - timedelta(days=int(delta))).date(), datetime.min.time()) # Run cooccurrence analysis cls.get_logger().info( f'Starting cooccurrence analysis for last {delta} days.') cls.analyze_cooccurrence_for_window(start_date, last_day) cls.get_logger().info( f'Cooccurrence analysis for last {delta} days done.') # Run usage analysis as soon as possible HashtagUsageService.calculate_topics_hashtag_usage(param_last_day) UserTopicService().init_process_with_date( DateUtils.today() if not param_last_day else param_last_day + timedelta(days=1))
def test_get_all_increases(self): # Set Up old_date = datetime.strptime("1996-03-15", CSVUtils.DATE_FORMAT) self.target.put_increase_for_candidate('test1', 1000, old_date) self.target.put_increase_for_candidate('test2', 2400, old_date) new_date = datetime.strptime("1901-05-25", CSVUtils.DATE_FORMAT) self.target.put_increase_for_candidate('test1', 3000, new_date) self.target.put_increase_for_candidate('test2', 4000, new_date) # New test increases = self.target.get_all_increases() assert len(increases) == 2 test1_increases = increases[0] assert len(test1_increases) == 3 assert test1_increases['date'] == DateUtils.date_to_timestamp(old_date.date()) assert test1_increases['test1'] == 1000 assert test1_increases['test2'] == 2400 test2_increases = increases[1] assert len(test2_increases) == 3 assert test2_increases['date'] == DateUtils.date_to_timestamp(new_date.date()) assert test2_increases['test1'] == 3000 assert test2_increases['test2'] == 4000
def get_for_follower_updating(self): """ Polls a candidate for updating its follower list. """ # Lock to avoid concurrency issues when retrieving candidates across threads ConcurrencyUtils().acquire_lock('candidate_for_update') for candidate in self.candidates: # We will only return a candidate if it was not updated today and is not being currently updated if candidate not in self.updating_followers and not DateUtils.is_today( candidate.last_updated_followers): self.logger.info( f'Returning candidate {candidate.screen_name} for follower retrieval.' ) self.updating_followers.add(candidate) # Unlock ConcurrencyUtils().release_lock('candidate_for_update') return candidate # Unlock ConcurrencyUtils().release_lock('candidate_for_update') raise FollowerUpdatingNotNecessaryError()
def calculate_topics_hashtag_usage(cls, end_date=None): """ Calculate the usage of all hashtags in today showable topics and the total usage of the topic itself. """ supporters = cls.__generate_supporters_map() date = DateUtils.today() if not end_date else end_date + timedelta( days=1) # End time is yesterday at 23:59:59 end_time = date - timedelta(seconds=1) # Run for different intervals of dates for delta in ConfigurationManager().get_list( 'showable_cooccurrence_deltas'): # Calculate start date from delta start_date = datetime.combine( (end_time - timedelta(days=int(delta))).date(), datetime.min.time()) # Calculate data cls.get_logger().info( f'Starting hashtag usage calculation for {delta} days window.') cls.calculate_hashtag_usage(start_date, end_time, interval='days', supporters=supporters) # Log finish for time checking cls.get_logger().info( f'Hashtag usage calculation finished for {delta} days window.') # Log finish for time checking cls.get_logger().info('Hashtag usage calculation finished.') # Once we've analyzed hashtags, topic usage calculations are just additions cls.get_logger().info('Starting topic usage calculation.') # Run for different intervals of dates for delta in ConfigurationManager().get_list( 'showable_cooccurrence_deltas'): # Calculate start date from delta start_date = datetime.combine( (end_time - timedelta(days=int(delta))).date(), datetime.min.time()) # Calculate data cls.get_logger().info( f'Starting topic usage calculation for {delta} days window.') cls.calculate_topic_usage(start_date, end_time, interval='days') # Log finish for time checking cls.get_logger().info( f'Topic usage calculation finished for {delta} days window.') # Log finish for time checking cls.get_logger().info('Topic usage calculation finished.')
def find_topic(cls, topic_id, start_date, end_date=None): """ Find showable graph for time window and topic_id. """ # Parse end date to match database values end_date = DateUtils.last_second_of_day(end_date if end_date else start_date) # Retrieve topic graph graph = ShowableGraphDAO().find(topic_id, start_date, end_date) # Normalize node size nodes = graph['nodes'] sizes = list(map(lambda node: node['size'], nodes)) max_size = max(sizes) # Normalize to a (0,1] vector for node in nodes: node['size'] = (node['size'] / max_size) # Subtract minimum value to get effective [0,1) vector and transform to wanted interval sizes = list(map(lambda node: node['size'], nodes)) min_size = min(sizes) max_size = max(sizes) - min_size for node in nodes: node['size'] = ((node['size'] - min_size)/max_size)*(cls.MAX_SIZE - cls.MIN_SIZE) + cls.MIN_SIZE return graph
def __make_date_dict(key, value): """ Create a dictionary adding first entry {'date': key} and flatten value, which is a map. """ dictionary = {'date': DateUtils.date_to_timestamp(key)} for screen_name, count in value.items(): dictionary[screen_name] = count return dictionary
def store(self, data): # Generate document data['date'] = DateUtils.today() # Store document self.collection.insert_one(data)
def find_topic(cls, topic_id, start_date, end_date): end_date = DateUtils.last_second_of_day(end_date if end_date else start_date) document = TopicUsageDAO().find(topic_id, start_date, end_date) tweet_id = HashtagDAO().first_known_usage_tweet_id(topic_id) return HashtagUsageResponseMapper.map_one(document, tweet_id)
def find_hashtag(cls, hashtag_name, start_date, end_date): # Parse end date to match database values end_date = DateUtils.last_second_of_day(end_date if end_date else start_date) document = HashtagUsageDAO().find(hashtag_name, start_date, end_date) tweet_id = HashtagDAO().first_known_usage_tweet_id(hashtag_name) return HashtagUsageResponseMapper.map_one(document, tweet_id)