class GlobalPMI(BaseRedisModel): """ Store in redis the PMI for the whole corpus. corpus = name of the corpus (used as pk also) ngrams = count for each referenced ngram in the corpus ncount = count for each length of ngram in the corpus """ corpus = fields.PKField() ngrams = fields.SortedSetField() ncount = fields.SortedSetField() MAX_LENGTH = 15 def stemm_list_to_string(self, stemms): # stemm.id is a tuple (lemme, POS_tag) return " ".join("%s/%s" % stemm.id for stemm in stemms) def add_ngram(self, ngram, amount): """ Ngram is expected to be a list of Stemm instances or a KeyEntity instances. """ ngram_key = self.stemm_list_to_string(ngram) self.ngrams.zincrby(ngram_key, amount=amount) self.ncount.zincrby(len(ngram), amount) def global_probability(self, ngram): """ Ngram is expected to be a list of Stemm instances or a KeyEntity instances. """ ngram_key = self.stemm_list_to_string(ngram) ngram_score = self.ngrams.zscore(ngram_key) or 1 ngram_length_score = self.ncount.zscore(len(ngram)) or 1 return 1.0 * ngram_score / ngram_length_score def global_pmi(self, ngram): ngram_probability = self.global_probability(ngram) # use iterable also for one element members_probability = product( self.global_probability([s]) for s in ngram if s.has_meaning_alone()) return math.log(ngram_probability / members_probability)
class PersonTest(TestRedisModel): namespace = "related-name-ns" name = fields.PKField() first_group = FKStringField("related-tests:Group") second_group = FKStringField('Group')
class Group(TestRedisModel): namespace = "related-name-ns" name = fields.PKField()
class Group(TestRedisModel): name = fields.PKField() parent = related.FKInstanceHashField('self', related_name='children') members = related.M2MSetField(Person, related_name='membership') lmembers = related.M2MListField(Person, related_name='lmembership') zmembers = related.M2MSortedSetField(Person, related_name='zmembership')
class Group(TestRedisModel): name = fields.PKField() status = fields.StringField(indexable=True) owner = FKInstanceHashField(Person, related_name='owned_groups') parent = FKStringField('self', related_name='children') members = M2MSetField(Person, related_name='membership')
class Group3(TestRedisModel): name = fields.PKField() members = M2MSortedSetField(Person, related_name='members_set3')
class Movie(TestRedisModelWithDynamicField): namespace = 'test_dynamic_related_field_should_work_with_m2msortedsetfield' name = limpyd_fields.PKField() personal_tags = dyn_related.DynamicM2MSortedSetField( Tag, related_name='movies_for_people')
class Person(TestRedisModel): name = fields.PKField() age = fields.StringField(indexable=True) prefered_group = FKStringField('Group') following = M2MSetField('self', related_name='followers')
class Tag(TestRedisModel): slug = limpyd_fields.PKField()
class Tag(TestRedisModel): namespace = 'test_dynamic_related_field_should_work_with_m2msortedsetfield' slug = limpyd_fields.PKField()
class Book(TestRedisModelWithDynamicField): namespace = 'test_dynamic_related_field_should_work_with_fkinstancehashfield' name = limpyd_fields.PKField() personal_main_tag = dyn_related.DynamicFKInstanceHashField( Tag, related_name='books')
class Tag(TestRedisModel): namespace = 'test_dynamic_related_field_should_work_with_fkinstancehashfield' slug = limpyd_fields.PKField()
class TriggerToDescriptor(BaseRedisModel): """ Helper to manage the trigger to descriptor relation. """ pk = fields.PKField() trigger_id = fields.HashableField(indexable=True) descriptor_id = fields.HashableField(indexable=True) weight = fields.HashableField(default=1) def __repr__(self): return "<TriggerToDescriptor %s>" % self.__str__() def __unicode__(self): return "%s=[%s]>%s" % (unicode( self.trigger), self.weight.hget(), unicode(self.descriptor)) def __str__(self): return "%s=[%s]>%s" % (str( self.trigger), self.weight.hget(), str(self.descriptor)) @property def trigger(self): """ Returns the trigger instance corresponding to the pk stored. """ if not hasattr(self, "_trigger") \ or self._trigger.pk.get() != self.trigger_id.hget(): # Fetch or refetch it self._trigger = Trigger(self.trigger_id.hget()) return self._trigger @property def descriptor(self): """ Return the descriptor instance corresponding to the pk stored. """ if not hasattr(self, "_descriptor") \ or self._descriptor.pk.get() != self.descriptor_id.hget(): # Fetch or refetch it self._descriptor = Descriptor(self.descriptor_id.hget()) return self._descriptor def post_command(self, sender, name, result, args, kwargs): if (isinstance(sender, fields.RedisField) and sender.name == "weight" and name in sender.available_modifiers and self.trigger_id.hget() is not None): # Means instantiation is done if int(self.weight.hget()) > int(self.trigger.max_weight.hget()): self.trigger.max_weight.hset(self.weight.hget()) if int(self.weight.hget()) > int( self.descriptor.max_weight.hget()): self.descriptor.max_weight.hset(self.weight.hget()) return result @property def pondered_weight(self): """ Give the weight of the relation, relative to the max weight of the trigger and the max weight of the descriptor. """ # current weigth relative to trigger max weight weight = float(self.weight.hget()) / float( self.trigger.max_weight.hget()) # current weight relative to descriptor max weight weight *= float(self.weight.hget()) / float( self.descriptor.max_weight.hget()) return weight @classmethod def get_or_connect(cls, trigger_id, descriptor_id): """ Get instances by pk to prevent from creating several times the same relation. """ pk = "%s|%s" % (trigger_id, descriptor_id) inst, created = super(cls, TriggerToDescriptor).get_or_connect(pk=pk) if created: # update the fields inst.trigger_id.hset(trigger_id) inst.descriptor_id.hset(descriptor_id) return inst, created @classmethod def remove_unique_connections(cls): """ Delete all the connections which occurred one during training. First loop on all the descriptors to consume less RAM. """ for descriptor_id in Descriptor.collection(): instances = cls.instances(descriptor_id=descriptor_id) for inst in instances: try: weight = int(inst.weight.hget()) except TypeError: sulci_logger.info( "Removing TriggerToDescriptor %s without weight, between Trigger %s and Descriptor %s" % (inst.pk.get(), inst.trigger_id.hget(), inst.descriptor_id.hget()), "RED") inst.delete() continue if weight <= 1: sulci_logger.info( "Removing TriggerToDescriptor %s, between Trigger %s and Descriptor %s" % (inst.pk.get(), inst.trigger_id.hget(), inst.descriptor_id.hget())) inst.delete() @classmethod def remove_useless_connections(cls, min=0.01): """ Remove all the connections where pondered_weight is lower than `min` (by default 0.01) """ for descriptor in Descriptor.instances().sort(): descriptor.remove_useless_connections(min)
class Base(TestRedisModel): abstract = True namespace = 'related-name-sub' name = fields.PKField() a_field = FKStringField( 'Other', related_name='%(namespace)s_%(model)s_related')
class Person(TestRedisModel): name = limpyd_fields.PKField()
class Other(TestRedisModel): namespace = 'related-name-sub' name = fields.PKField()
class Movie(TestRedisModelWithDynamicField): name = limpyd_fields.PKField() tags = related.M2MSetField(Tag, related_name='movies') # global public tags personal_tags = dyn_related.DynamicM2MSetField( Tag, related_name='movies_for_people') # private tags for each person
class GroupAsList(TestRedisModel): name = fields.PKField() members = M2MListField(Person, related_name='members_list')
class NotAutoPkModel(TestRedisModel): pk = fields.PKField() name = fields.StringField(indexable=True)
class Group2(TestRedisModel): name = fields.PKField() members = M2MListField(Person, related_name='members_set2')
class RedefinedNotAutoPkField(AutoPkModel): id = fields.PKField()
class Person(TestRedisModel): name = fields.PKField() prefered_group = related.FKStringField('Group', related_name='prefered_for')
class PersonTest(TestRedisModel): namespace = 'related-name' name = fields.PKField() most_hated_group = FKStringField( 'related-tests:Group', related_name='%(namespace)s_%(model)s_set')
class Plane(TestRedisModel): my_pk = fields.PKField() name = fields.InstanceHashField() is_first = fields.InstanceHashField(indexable=True)
class Plane2(TestRedisModel): my_pk = fields.PKField() name = fields.InstanceHashField() is_first = fields.InstanceHashField(indexable=True) collection_manager = ExtendedCollectionManager