def test_aggregated_add_many(self): # setup the pins and activity chunk t = timer() admin_user_id = 1 aggregated = feedly.get_feeds(admin_user_id)['aggregated'] pins = list(Pin.objects.filter(user=admin_user_id)[:3]) activities = [] base_activity = pins[0].create_activity() sample_size = 1000 for x in range(1, sample_size): activity = copy.deepcopy(base_activity) activity.actor_id = x activity.object_id = x activities.append(activity) print 'running on %s' % settings.FEEDLY_CASSANDRA_HOSTS print 'inserting the many' aggregated.insert_activities(activities) print 'done, took %s' % t.next() for activity in activities: aggregated.add_many([activity], trim=False) add_many_time = t.next() print 'add many ran 10000 times, took %s' % add_many_time popular_user_time = 100000. / sample_size * add_many_time print 'popular user fanout would take %s seconds' % popular_user_time
def remove_operation(feed, activities, trim=True, batch_interface=None): ''' Remove the activities from the feed functions used in tasks need to be at the main level of the module ''' t = timer() msg_format = 'running %s.remove_many operation for %s activities batch interface %s' logger.debug(msg_format, feed, len(activities), batch_interface) feed.remove_many(activities, trim=trim, batch_interface=batch_interface) logger.debug('remove many operation took %s seconds', t.next())
def add_operation(feed, activities, trim=True, batch_interface=None): ''' Add the activities to the feed functions used in tasks need to be at the main level of the module ''' t = timer() msg_format = 'running %s.add_many operation for %s activities batch interface %s and trim %s' logger.debug(msg_format, feed, len(activities), batch_interface, trim) feed.add_many(activities, batch_interface=batch_interface, trim=trim) logger.debug('add many operation took %s seconds', t.next())
def add_many(self, activities, trim=True, current_activities=None, *args, **kwargs): ''' Adds many activities to the feed Unfortunately we can't support the batch interface. The writes depend on the reads. Also subsequent writes will depend on these writes. So no batching is possible at all. :param activities: the list of activities ''' validate_list_of_strict(activities, (self.activity_class, FakeActivity)) # start by getting the aggregator aggregator = self.get_aggregator() t = timer() # get the current aggregated activities if current_activities is None: current_activities = self[:self.merge_max_length] msg_format = 'reading %s items took %s' logger.debug(msg_format, self.merge_max_length, t.next()) # merge the current activities with the new ones new, changed, deleted = aggregator.merge(current_activities, activities) logger.debug('merge took %s', t.next()) # new ones we insert, changed we do a delete and insert new_aggregated = self._update_from_diff(new, changed, deleted) new_aggregated = aggregator.rank(new_aggregated) # trim every now and then if trim and random.random() <= self.trim_chance: self.timeline_storage.trim(self.key, self.max_length) return new_aggregated
def test_large_remove_activity(self): # first built a large feed self.test_feed.max_length = 3600 activities = [] choices = [LoveVerb, AddVerb] for i in range(1, 3600): verb = choices[i % 2] activity = FakeActivity( i, verb, i, i, datetime.datetime.now() - datetime.timedelta(days=i)) activities.append(activity) self.test_feed.insert_activities(activities) self.test_feed.add_many(activities) to_remove = activities[200:700] remove_count = len(to_remove) feed_count = self.test_feed.count() t = timer() self.test_feed.remove_many(to_remove) msg_format = 'removing %s items from a feed of %s took %s seconds' print msg_format % (remove_count, feed_count, t.next())
def add_many(self, activities, trim=True, current_activities=None, *args, **kwargs): ''' Adds many activities to the feed Unfortunately we can't support the batch interface. The writes depend on the reads. Also subsequent writes will depend on these writes. So no batching is possible at all. :param activities: the list of activities ''' if activities and not isinstance(activities[0], Activity): raise ValueError('Expecting Activity not %s' % activities) # start by getting the aggregator aggregator = self.get_aggregator() t = timer() # get the current aggregated activities if current_activities is None: current_activities = self[:self.merge_max_length] msg_format = 'reading %s items took %s' logger.info(msg_format, self.merge_max_length, t.next()) # merge the current activities with the new ones new, changed, deleted = aggregator.merge( current_activities, activities) logger.info('merge took %s', t.next()) # new ones we insert, changed we do a delete and insert new_aggregated = self._update_from_diff(new, changed, deleted) new_aggregated = aggregator.rank(new_aggregated) # trim every now and then if trim and random.random() <= self.trim_chance: self.timeline_storage.trim(self.key, self.max_length) return new_aggregated
def benchmark_flat_feed(): t = timer() manager.feed_classes = {'flat': FashiolistaFeed} manager.add_entry(1, 1) print "Benchmarking flat feed took: %0.2fs" % t.next()
def benchmark_aggregated_feed(): t = timer() manager.feed_classes = {'aggregated': AggregatedFeed} manager.add_entry(1, 1) print "Benchmarking aggregated feed took: %0.2fs" % t.next()