def dumps(self, aggregated): #start by storing the group parts = [aggregated.group] check_reserved(aggregated.group, [';;']) #store the dates for date_field in self.date_fields: value = getattr(aggregated, date_field) epoch = datetime_to_epoch(value) if value is not None else -1 parts += [epoch] # add the activities serialization serialized_activities = [] for activity in aggregated.activities: serialized = LoveActivitySerializer.dumps(self, activity) check_reserved(serialized, [';', ';;']) serialized_activities.append(serialized) serialized_activities_part = ';'.join(serialized_activities) parts.append(serialized_activities_part) # add the minified activities parts.append(aggregated.minimized_activities) # stick everything together serialized_aggregated = ';;'.join(map(str, parts)) serialized = '%s%s' % (self.identifier, serialized_aggregated) return serialized
def serialization_id(self): ''' serialization_id is used to keep items locally sorted and unique (eg. used redis sorted sets' score or cassandra column names) serialization_id is also used to select random activities from the feed (eg. remove activities from feeds must be fast operation) for this reason the serialization_id should be unique and not change over time eg: activity.serialization_id = 1373266755000000000042008 1373266755000 activity creation time as epoch with millisecond resolution 0000000000042 activity left padded object_id (10 digits) 008 left padded activity verb id (3 digits) :returns: int --the serialization id ''' if self.object_id >= 10 ** 10 or self.verb.id >= 10 ** 3: raise TypeError('Fatal: object_id / verb have too many digits !') if not self.time: raise TypeError('Cant serialize activities without a time') milliseconds = str(int(datetime_to_epoch(self.time) * 1000)) serialization_id_str = '%s%0.10d%0.3d' % ( milliseconds, self.object_id, self.verb.id) serialization_id = int(serialization_id_str) return serialization_id
def serialization_id(self): ''' serialization_id is used to keep items locally sorted and unique (eg. used redis sorted sets' score or cassandra column names) serialization_id is also used to select random activities from the feed (eg. remove activities from feeds must be fast operation) for this reason the serialization_id should be unique and not change over time eg: activity.serialization_id = 1373266755000000000042008 1373266755000 activity creation time as epoch with millisecond resolution 0000000000042 activity left padded object_id (10 digits) 008 left padded activity verb id (3 digits) :returns: int --the serialization id ''' if self.object_id >= 10**10 or self.verb.id >= 10**3: raise TypeError('Fatal: object_id / verb have too many digits !') if not self.time: raise TypeError('Cant serialize activities without a time') milliseconds = str(int(datetime_to_epoch(self.time) * 1000)) serialization_id_str = '%s%0.10d%0.3d' % (milliseconds, self.object_id, self.verb.id) serialization_id = int(serialization_id_str) return serialization_id
def get_activity_score(self, aggregated_activity): ''' Ensures a unique score by appending the verb id at the end ''' verb_part = ''.join(map(str, [v.id for v in aggregated_activity.verbs])) epoch = datetime_to_epoch(aggregated_activity.updated_at) score = float(unicode(epoch) + verb_part) return score
def get_activity_score(self, aggregated_activity): ''' Ensures a unique score by appending the verb id at the end ''' verb_part = ''.join( map(str, [v.id for v in aggregated_activity.verbs])) epoch = datetime_to_epoch(aggregated_activity.updated_at) score = float(unicode(epoch) + verb_part) return score
def dumps(self, activity): self.check_type(activity) activity_time = datetime_to_epoch(activity.time) parts = [activity.actor_id, activity.verb.id, activity.object_id, activity.target_id or 0] extra_context = activity.extra_context.copy() pickle_string = '' if extra_context: pickle_string = pickle.dumps(activity.extra_context) parts += [activity_time, pickle_string] serialized_activity = '|'.join(map(str, parts)) return serialized_activity
def dumps(self, activity): self.check_type(activity) activity_time = datetime_to_epoch(activity.time) parts = [activity.actor_id, activity.verb.id, activity.object_id, activity.target_id or 0] extra_context = activity.extra_context.copy() pickle_string = '' if extra_context: pickle_string = pickle.dumps(activity.extra_context) parts += [activity_time, pickle_string] serialized_activity = ','.join(map(str, parts)) return serialized_activity
def serialization_id(self): """ Shorter serialization id than used by default """ if self.object_id >= 10 ** 10 or self.verb.id >= 10 ** 3: raise TypeError("Fatal: object_id / verb have too many digits !") if not self.time: raise TypeError("Cant serialize activities without a time") milliseconds = str(int(datetime_to_epoch(self.time) * 1000)) # shorter than the default version serialization_id_str = "%s%0.2d%0.2d" % (milliseconds, self.object_id % 100, self.verb.id) serialization_id = int(serialization_id_str) return serialization_id
def dumps(self, activity): #handle objects like the FeedEndMarker which have their own serialization if hasattr(activity, 'serialize'): serialized_activity = activity.serialize() else: activity_time = datetime_to_epoch(activity.time) parts = [activity.actor_id, activity.verb.id, activity.object_id, activity.target_id or 0] extra_context = activity.extra_context.copy() #store the entity id more efficiently entity_id = extra_context.pop('entity_id', 0) pickle_string = '' if extra_context: pickle_string = pickle.dumps(activity.extra_context) parts += [entity_id, activity_time, pickle_string] serialized_activity = ','.join(map(str, parts)) return serialized_activity
def serialization_id(self): ''' Shorter serialization id than used by default ''' if self.object_id >= 10**10 or self.verb.id >= 10**3: raise TypeError('Fatal: object_id / verb have too many digits !') if not self.time: raise TypeError('Cant serialize activities without a time') milliseconds = str(int(datetime_to_epoch(self.time) * 1000)) # shorter than the default version serialization_id_str = '%s%0.2d%0.2d' % (milliseconds, self.object_id % 100, self.verb.id) serialization_id = int(serialization_id_str) return serialization_id
def dumps(self, activity): self.check_type(activity) # handle objects like the FeedEndMarker which have their own # serialization if hasattr(activity, 'serialize'): serialized_activity = activity.serialize() else: activity_time = datetime_to_epoch(activity.time) parts = [activity.actor_id, activity.verb.id, activity.object_id, activity.target_id or 0] extra_context = activity.extra_context.copy() # store the entity id more efficiently entity_id = extra_context.pop('entity_id', 0) pickle_string = '' if extra_context: pickle_string = pickle.dumps(activity.extra_context) parts += [entity_id, activity_time, pickle_string] serialized_activity = ','.join(map(str, parts)) return serialized_activity
def serialization_id(self): ''' serialization_id is used to keep items locally sorted and unique (eg. used redis sorted sets' score or cassandra column names) serialization_id is also used to select random activities from the feed (eg. remove activities from feeds must be fast operation) for this reason the serialization_id should be unique and not change over time eg: activity.serialization_id = 1373266755000000000042008 1373266755000 activity creation time as epoch with millisecond resolution 0000000000042 activity left padded object_id (10 digits) 008 left padded activity verb id (3 digits) :returns: int --the serialization id ''' milliseconds = str(int(datetime_to_epoch(self.updated_at))) return milliseconds
def dumps(self, activity): self.check_type(activity) activity_time = datetime_to_epoch(activity.time) parts = [activity.actor_id, activity.verb.id, activity.object_id, activity.target_id or 0] extra_context = activity.extra_context.copy() pickle_string = '' # dictionaries are unsorted data structures # however, using pickled dictionary as a key still differs by letter sequence # this is a temporary solution works in our case import operator extra_context = dict(sorted(extra_context.iteritems(), key=operator.itemgetter(1))) if extra_context: pickle_string = pickle.dumps(extra_context) parts += [activity_time, pickle_string] serialized_activity = ','.join(map(str, parts)) return serialized_activity
def dumps(self, aggregated): self.check_type(aggregated) activity_serializer = self.activity_serializer_class() # start by storing the group parts = [aggregated.group] check_reserved(aggregated.group, ['\t\t']) # store the dates for date_field in self.date_fields: value = getattr(aggregated, date_field) epoch = datetime_to_epoch(value) if value is not None else -1 parts += [epoch] # add the activities serialization serialized_activities = [] if self.dehydrate: if not aggregated.dehydrated: aggregated = aggregated.get_dehydrated() serialized_activities = map(str, aggregated._activity_ids) else: for activity in aggregated.activities: serialized = activity_serializer.dumps(activity) check_reserved(serialized, ['\t', '\t\t']) serialized_activities.append(serialized) serialized_activities_part = ';'.join(serialized_activities) parts.append(serialized_activities_part) # add the minified activities parts.append(aggregated.minimized_activities) # stick everything together serialized_aggregated = '\t\t'.join(map(str, parts)) serialized = '%s%s' % (self.identifier, serialized_aggregated) return serialized
def dumps(self, aggregated): self.check_type(aggregated) activity_serializer = self.activity_serializer_class() # start by storing the group parts = [aggregated.group] check_reserved(aggregated.group, [';;']) # store the dates for date_field in self.date_fields: value = getattr(aggregated, date_field) epoch = datetime_to_epoch(value) if value is not None else -1 parts += [epoch] # add the activities serialization serialized_activities = [] if self.dehydrate: if not aggregated.dehydrated: aggregated = aggregated.get_dehydrated() serialized_activities = map(str, aggregated._activity_ids) else: for activity in aggregated.activities: serialized = activity_serializer.dumps(activity) check_reserved(serialized, [';', ';;']) serialized_activities.append(serialized) serialized_activities_part = ';'.join(serialized_activities) parts.append(serialized_activities_part) # add the minified activities parts.append(aggregated.minimized_activities) # stick everything together serialized_aggregated = ';;'.join(map(str, parts)) serialized = '%s%s' % (self.identifier, serialized_aggregated) return serialized
def pack(intval): return str(datetime_to_epoch(intval))
def super_column(self): verb_part = ''.join( map(str, [v.id for v in self.aggregated_activities.verbs])) epoch = datetime_to_epoch(self.updated_at) score = long(unicode(epoch) + verb_part) return score