def remove_operation(feed, activities, trim=True, batch_interface=None): ''' Remove the activities from the feed functions used in tasks need to be at the main level of the module ''' t = timer() msg_format = 'running %s.remove_many operation for %s activities batch interface %s' logger.debug(msg_format, feed, len(activities), batch_interface) feed.remove_many(activities, trim=trim, batch_interface=batch_interface) logger.debug('remove many operation took %s seconds', t.next())
def add_operation(feed, activities, trim=True, batch_interface=None): ''' Add the activities to the feed functions used in tasks need to be at the main level of the module ''' t = timer() msg_format = 'running %s.add_many operation for %s activities batch interface %s and trim %s' logger.debug(msg_format, feed, len(activities), batch_interface, trim) feed.add_many(activities, batch_interface=batch_interface, trim=trim) logger.debug('add many operation took %s seconds', t.next())
def add_many(self, activities, trim=True, current_activities=None, *args, **kwargs): ''' Adds many activities to the feed Unfortunately we can't support the batch interface. The writes depend on the reads. Also subsequent writes will depend on these writes. So no batching is possible at all. :param activities: the list of activities ''' validate_list_of_strict(activities, (self.activity_class, FakeActivity)) # start by getting the aggregator aggregator = self.get_aggregator() t = timer() # get the current aggregated activities if current_activities is None: current_activities = self[:self.merge_max_length] msg_format = 'reading %s items took %s' logger.debug(msg_format, self.merge_max_length, t.next()) # merge the current activities with the new ones new, changed, deleted = aggregator.merge(current_activities, activities) logger.debug('merge took %s', t.next()) # new ones we insert, changed we do a delete and insert new_aggregated = self._update_from_diff(new, changed, deleted) new_aggregated = aggregator.rank(new_aggregated) # trim every now and then if trim and random.random() <= self.trim_chance: self.timeline_storage.trim(self.key, self.max_length) return new_aggregated
def test_aggregated_add_many(self): # setup the pins and activity chunk t = timer() admin_user_id = 1 aggregated = manager.get_feeds(admin_user_id)['aggregated'] pins = list(Pin.objects.filter(user=admin_user_id)[:3]) activities = [] base_activity = pins[0].create_activity() sample_size = 1000 for x in range(1, sample_size): activity = copy.deepcopy(base_activity) activity.actor_id = x activity.object_id = x activities.append(activity) aggregated.insert_activities(activities) for activity in activities: aggregated.add_many([activity], trim=False) add_many_time = t.next() print 'add many ran 10000 times, took %s' % add_many_time popular_user_time = 100000. / sample_size * add_many_time print 'popular user fanout would take %s seconds' % popular_user_time
def test_aggregated_add_many(self): # setup the pins and activity chunk t = timer() admin_user_id = 1 aggregated = manager.get_feeds(admin_user_id)["aggregated"] pins = list(Pin.objects.filter(user=admin_user_id)[:3]) activities = [] base_activity = pins[0].create_activity() sample_size = 1000 for x in range(1, sample_size): activity = copy.deepcopy(base_activity) activity.actor_id = x activity.object_id = x activities.append(activity) aggregated.insert_activities(activities) for activity in activities: aggregated.add_many([activity], trim=False) add_many_time = t.next() print "add many ran 10000 times, took %s" % add_many_time popular_user_time = 100000.0 / sample_size * add_many_time print "popular user fanout would take %s seconds" % popular_user_time
def add_many(self, activities, trim=True, current_activities=None, *args, **kwargs): ''' Adds many activities to the feed Unfortunately we can't support the batch interface. The writes depend on the reads. Also subsequent writes will depend on these writes. So no batching is possible at all. :param activities: the list of activities ''' validate_list_of_strict( activities, (self.activity_class, FakeActivity)) # start by getting the aggregator aggregator = self.get_aggregator() t = timer() # get the current aggregated activities if current_activities is None: current_activities = self[:self.merge_max_length] msg_format = 'reading %s items took %s' logger.debug(msg_format, self.merge_max_length, t.next()) # merge the current activities with the new ones new, changed, deleted = aggregator.merge( current_activities, activities) logger.debug('merge took %s', t.next()) # new ones we insert, changed we do a delete and insert new_aggregated = self._update_from_diff(new, changed, deleted) new_aggregated = aggregator.rank(new_aggregated) # trim every now and then if trim and random.random() <= self.trim_chance: self.timeline_storage.trim(self.key, self.max_length) return new_aggregated
def run_benchmark(benchmark, network_size, max_network_size, multiplier, duration): logger.info('Starting the benchmark! Exciting.... :)') if benchmark is None: benchmark_class = get_benchmark('stream_bench_custom') benchmark = benchmark_class(network_size, max_network_size, multiplier, duration) else: benchmark_class = get_benchmark(benchmark) benchmark = benchmark_class() logger.info('Running benchmark %s', benchmark.name) logger.info('Network size starting at %s will grow to %s', benchmark.network_size, benchmark.max_network_size) logger.info('Multiplier is set to %s and duration %s', benchmark.multiplier, benchmark.duration) metrics_instance = get_metrics_instance() social_model = benchmark.get_social_model() days = 0 while True: logger.info( 'Simulating a social network with network size %s', social_model.network_size) object_id = 1 for x in range(benchmark.duration): days += 1 social_model.day = days daily_tasks = collections.defaultdict(list) t = timer() metrics_instance.on_day_change(days) logger.debug('Day %s for our network', days) # create load based on the current model active_users = social_model.active_users for user_id in active_users: # follow other users, note that we don't actually store the follower # lists for this benchmark for target_user_id in social_model.get_new_follows(user_id): daily_tasks['follow_users'].append([user_id, target_user_id]) # create activities for x in range(social_model.get_user_activity(user_id)): activity = create_activity(user_id, object_id) object_id += 1 daily_tasks['add_activities'].append([user_id, activity]) # read a few pages of data daily_tasks['read_feed_pages'].append([user_id]) logger.debug('%s seconds spent creating the model', t.next()) # send the daily tasks to celery batch_tasks = [] for task_name, task_args in daily_tasks.items(): task = getattr(tasks, task_name) for task_arg_chunk in chunks(task_args, 100): task_signature = task.s(social_model, task_arg_chunk) batch_tasks.append(task_signature) for signature in batch_tasks: signature.apply_async() logger.debug('%s seconds spent sending %s tasks', t.next(), len(batch_tasks)) # wait #while True: # if result.ready(): # break # time.sleep(1) # logger.debug('Waiting for day %s to finish' % days) logger.debug('Day %s finished', days) time.sleep(1) # grow the network logger.info('Growing the social network.....') social_model.network_size = social_model.network_size * benchmark.multiplier metrics_instance.on_network_size_change(social_model.network_size) if social_model.network_size >= benchmark.max_network_size: logger.info( 'Reached the max users, we\'re done with our benchmark!')
def benchmark_aggregated_feed(): t = timer() manager.feed_classes = {"aggregated": AggregatedFeed} manager.add_entry(1, 1) print "Benchmarking aggregated feed took: %0.2fs" % t.next()
def benchmark_flat_feed(): t = timer() manager.feed_classes = {"flat": FashiolistaFeed} manager.add_entry(1, 1) print "Benchmarking flat feed took: %0.2fs" % t.next()
def benchmark_flat_feed(): t = timer() manager.feed_classes = {'flat': FashiolistaFeed} manager.add_entry(1, 1) print "Benchmarking flat feed took: %0.2fs" % t.next()
def benchmark_aggregated_feed(): t = timer() manager.feed_classes = {'aggregated': AggregatedFeed} manager.add_entry(1, 1) print "Benchmarking aggregated feed took: %0.2fs" % t.next()
def run_benchmark(benchmark, network_size, max_network_size, multiplier, duration): logger.info('Starting the benchmark! Exciting.... :)') if benchmark is None: benchmark_class = get_benchmark('stream_bench_custom') benchmark = benchmark_class(network_size, max_network_size, multiplier, duration) else: benchmark_class = get_benchmark(benchmark) benchmark = benchmark_class() logger.info('Running benchmark %s', benchmark.name) logger.info('Network size starting at %s will grow to %s', benchmark.network_size, benchmark.max_network_size) logger.info('Multiplier is set to %s and duration %s', benchmark.multiplier, benchmark.duration) metrics_instance = get_metrics_instance() social_model = benchmark.get_social_model() days = 0 while True: logger.info('Simulating a social network with network size %s', social_model.network_size) object_id = 1 for x in range(benchmark.duration): days += 1 social_model.day = days daily_tasks = collections.defaultdict(list) t = timer() metrics_instance.on_day_change(days) logger.debug('Day %s for our network', days) # create load based on the current model active_users = social_model.active_users for user_id in active_users: # follow other users, note that we don't actually store the follower # lists for this benchmark for target_user_id in social_model.get_new_follows(user_id): daily_tasks['follow_users'].append( [user_id, target_user_id]) # create activities for x in range(social_model.get_user_activity(user_id)): activity = create_activity(user_id, object_id) object_id += 1 daily_tasks['add_activities'].append([user_id, activity]) # read a few pages of data daily_tasks['read_feed_pages'].append([user_id]) logger.debug('%s seconds spent creating the model', t.next()) # send the daily tasks to celery batch_tasks = [] for task_name, task_args in daily_tasks.items(): task = getattr(tasks, task_name) for task_arg_chunk in chunks(task_args, 100): task_signature = task.s(social_model, task_arg_chunk) batch_tasks.append(task_signature) for signature in batch_tasks: signature.apply_async() logger.debug('%s seconds spent sending %s tasks', t.next(), len(batch_tasks)) # wait #while True: # if result.ready(): # break # time.sleep(1) # logger.debug('Waiting for day %s to finish' % days) logger.debug('Day %s finished', days) time.sleep(1) # grow the network logger.info('Growing the social network.....') social_model.network_size = social_model.network_size * benchmark.multiplier metrics_instance.on_network_size_change(social_model.network_size) if social_model.network_size >= benchmark.max_network_size: logger.info( 'Reached the max users, we\'re done with our benchmark!')