def analyze(cls, last_day=None): """ Run cooccurrence analysis for the last day with all its intervals. """ param_last_day = last_day # Run for previous day if not last_day: # Parameter last_day should be the required day at 00:00:00 last_day = datetime.combine( (datetime.now() - timedelta(days=1)).date(), datetime.min.time()) # Get last day at 23:59:59 last_day = last_day + timedelta(days=1) - timedelta( seconds=1) # This works because Python's sum is immutable # Run for last N days. for delta in ConfigurationManager().get_list('cooccurrence_deltas'): # Calculate start date from delta start_date = datetime.combine( (last_day - timedelta(days=int(delta))).date(), datetime.min.time()) # Run cooccurrence analysis cls.get_logger().info( f'Starting cooccurrence analysis for last {delta} days.') cls.analyze_cooccurrence_for_window(start_date, last_day) cls.get_logger().info( f'Cooccurrence analysis for last {delta} days done.') # Run usage analysis as soon as possible HashtagUsageService.calculate_topics_hashtag_usage(param_last_day) UserTopicService().init_process_with_date( DateUtils.today() if not param_last_day else param_last_day + timedelta(days=1))
def calculate_topics_hashtag_usage(cls, end_date=None): """ Calculate the usage of all hashtags in today showable topics and the total usage of the topic itself. """ supporters = cls.__generate_supporters_map() date = DateUtils.today() if not end_date else end_date + timedelta( days=1) # End time is yesterday at 23:59:59 end_time = date - timedelta(seconds=1) # Run for different intervals of dates for delta in ConfigurationManager().get_list( 'showable_cooccurrence_deltas'): # Calculate start date from delta start_date = datetime.combine( (end_time - timedelta(days=int(delta))).date(), datetime.min.time()) # Calculate data cls.get_logger().info( f'Starting hashtag usage calculation for {delta} days window.') cls.calculate_hashtag_usage(start_date, end_time, interval='days', supporters=supporters) # Log finish for time checking cls.get_logger().info( f'Hashtag usage calculation finished for {delta} days window.') # Log finish for time checking cls.get_logger().info('Hashtag usage calculation finished.') # Once we've analyzed hashtags, topic usage calculations are just additions cls.get_logger().info('Starting topic usage calculation.') # Run for different intervals of dates for delta in ConfigurationManager().get_list( 'showable_cooccurrence_deltas'): # Calculate start date from delta start_date = datetime.combine( (end_time - timedelta(days=int(delta))).date(), datetime.min.time()) # Calculate data cls.get_logger().info( f'Starting topic usage calculation for {delta} days window.') cls.calculate_topic_usage(start_date, end_time, interval='days') # Log finish for time checking cls.get_logger().info( f'Topic usage calculation finished for {delta} days window.') # Log finish for time checking cls.get_logger().info('Topic usage calculation finished.')
def store(self, data): # Generate document data['date'] = DateUtils.today() # Store document self.collection.insert_one(data)