Example #1
0
class GlobalPMI(BaseRedisModel):
    """
    Store in redis the PMI for the whole corpus.

    corpus = name of the corpus (used as pk also)
    ngrams = count for each referenced ngram in the corpus
    ncount = count for each length of ngram in the corpus
    """
    corpus = fields.PKField()
    ngrams = fields.SortedSetField()
    ncount = fields.SortedSetField()

    MAX_LENGTH = 15

    def stemm_list_to_string(self, stemms):
        # stemm.id is a tuple (lemme, POS_tag)
        return " ".join("%s/%s" % stemm.id for stemm in stemms)

    def add_ngram(self, ngram, amount):
        """
        Ngram is expected to be a list of Stemm instances or a KeyEntity instances.
        """
        ngram_key = self.stemm_list_to_string(ngram)
        self.ngrams.zincrby(ngram_key, amount=amount)
        self.ncount.zincrby(len(ngram), amount)

    def global_probability(self, ngram):
        """
        Ngram is expected to be a list of Stemm instances or a KeyEntity instances.
        """
        ngram_key = self.stemm_list_to_string(ngram)
        ngram_score = self.ngrams.zscore(ngram_key) or 1
        ngram_length_score = self.ncount.zscore(len(ngram)) or 1
        return 1.0 * ngram_score / ngram_length_score

    def global_pmi(self, ngram):
        ngram_probability = self.global_probability(ngram)
        # use iterable also for one element
        members_probability = product(
            self.global_probability([s]) for s in ngram
            if s.has_meaning_alone())
        return math.log(ngram_probability / members_probability)
Example #2
0
 class PersonTest(TestRedisModel):
     namespace = "related-name-ns"
     name = fields.PKField()
     first_group = FKStringField("related-tests:Group")
     second_group = FKStringField('Group')
Example #3
0
 class Group(TestRedisModel):
     namespace = "related-name-ns"
     name = fields.PKField()
class Group(TestRedisModel):
    name = fields.PKField()
    parent = related.FKInstanceHashField('self', related_name='children')
    members = related.M2MSetField(Person, related_name='membership')
    lmembers = related.M2MListField(Person, related_name='lmembership')
    zmembers = related.M2MSortedSetField(Person, related_name='zmembership')
Example #5
0
class Group(TestRedisModel):
    name = fields.PKField()
    status = fields.StringField(indexable=True)
    owner = FKInstanceHashField(Person, related_name='owned_groups')
    parent = FKStringField('self', related_name='children')
    members = M2MSetField(Person, related_name='membership')
Example #6
0
 class Group3(TestRedisModel):
     name = fields.PKField()
     members = M2MSortedSetField(Person, related_name='members_set3')
Example #7
0
 class Movie(TestRedisModelWithDynamicField):
     namespace = 'test_dynamic_related_field_should_work_with_m2msortedsetfield'
     name = limpyd_fields.PKField()
     personal_tags = dyn_related.DynamicM2MSortedSetField(
         Tag, related_name='movies_for_people')
Example #8
0
class Person(TestRedisModel):
    name = fields.PKField()
    age = fields.StringField(indexable=True)
    prefered_group = FKStringField('Group')
    following = M2MSetField('self', related_name='followers')
Example #9
0
class Tag(TestRedisModel):
    slug = limpyd_fields.PKField()
Example #10
0
 class Tag(TestRedisModel):
     namespace = 'test_dynamic_related_field_should_work_with_m2msortedsetfield'
     slug = limpyd_fields.PKField()
Example #11
0
 class Book(TestRedisModelWithDynamicField):
     namespace = 'test_dynamic_related_field_should_work_with_fkinstancehashfield'
     name = limpyd_fields.PKField()
     personal_main_tag = dyn_related.DynamicFKInstanceHashField(
         Tag, related_name='books')
Example #12
0
 class Tag(TestRedisModel):
     namespace = 'test_dynamic_related_field_should_work_with_fkinstancehashfield'
     slug = limpyd_fields.PKField()
Example #13
0
class TriggerToDescriptor(BaseRedisModel):
    """
    Helper to manage the trigger to descriptor relation.
    """
    pk = fields.PKField()
    trigger_id = fields.HashableField(indexable=True)
    descriptor_id = fields.HashableField(indexable=True)
    weight = fields.HashableField(default=1)

    def __repr__(self):
        return "<TriggerToDescriptor %s>" % self.__str__()

    def __unicode__(self):
        return "%s=[%s]>%s" % (unicode(
            self.trigger), self.weight.hget(), unicode(self.descriptor))

    def __str__(self):
        return "%s=[%s]>%s" % (str(
            self.trigger), self.weight.hget(), str(self.descriptor))

    @property
    def trigger(self):
        """
        Returns the trigger instance corresponding to the pk stored.
        """
        if not hasattr(self, "_trigger") \
                            or self._trigger.pk.get() != self.trigger_id.hget():
            # Fetch or refetch it
            self._trigger = Trigger(self.trigger_id.hget())
        return self._trigger

    @property
    def descriptor(self):
        """
        Return the descriptor instance corresponding to the pk stored.
        """
        if not hasattr(self, "_descriptor") \
                       or self._descriptor.pk.get() != self.descriptor_id.hget():
            # Fetch or refetch it
            self._descriptor = Descriptor(self.descriptor_id.hget())
        return self._descriptor

    def post_command(self, sender, name, result, args, kwargs):
        if (isinstance(sender, fields.RedisField) and sender.name == "weight"
                and name in sender.available_modifiers
                and self.trigger_id.hget()
                is not None):  # Means instantiation is done
            if int(self.weight.hget()) > int(self.trigger.max_weight.hget()):
                self.trigger.max_weight.hset(self.weight.hget())
            if int(self.weight.hget()) > int(
                    self.descriptor.max_weight.hget()):
                self.descriptor.max_weight.hset(self.weight.hget())
        return result

    @property
    def pondered_weight(self):
        """
        Give the weight of the relation, relative to the max weight of the
        trigger and the max weight of the descriptor.
        """
        # current weigth relative to trigger max weight
        weight = float(self.weight.hget()) / float(
            self.trigger.max_weight.hget())
        # current weight relative to descriptor max weight
        weight *= float(self.weight.hget()) / float(
            self.descriptor.max_weight.hget())
        return weight

    @classmethod
    def get_or_connect(cls, trigger_id, descriptor_id):
        """
        Get instances by pk to prevent from creating several times the same relation.
        """
        pk = "%s|%s" % (trigger_id, descriptor_id)
        inst, created = super(cls, TriggerToDescriptor).get_or_connect(pk=pk)
        if created:
            # update the fields
            inst.trigger_id.hset(trigger_id)
            inst.descriptor_id.hset(descriptor_id)
        return inst, created

    @classmethod
    def remove_unique_connections(cls):
        """
        Delete all the connections which occurred one during training.

        First loop on all the descriptors to consume less RAM.
        """
        for descriptor_id in Descriptor.collection():
            instances = cls.instances(descriptor_id=descriptor_id)
            for inst in instances:
                try:
                    weight = int(inst.weight.hget())
                except TypeError:
                    sulci_logger.info(
                        "Removing TriggerToDescriptor %s without weight, between Trigger %s and Descriptor %s"
                        % (inst.pk.get(), inst.trigger_id.hget(),
                           inst.descriptor_id.hget()), "RED")
                    inst.delete()
                    continue
                if weight <= 1:
                    sulci_logger.info(
                        "Removing TriggerToDescriptor %s, between Trigger %s and Descriptor %s"
                        % (inst.pk.get(), inst.trigger_id.hget(),
                           inst.descriptor_id.hget()))
                    inst.delete()

    @classmethod
    def remove_useless_connections(cls, min=0.01):
        """
        Remove all the connections where pondered_weight is lower than
        `min` (by default 0.01)
        """
        for descriptor in Descriptor.instances().sort():
            descriptor.remove_useless_connections(min)
Example #14
0
 class Base(TestRedisModel):
     abstract = True
     namespace = 'related-name-sub'
     name = fields.PKField()
     a_field = FKStringField(
         'Other', related_name='%(namespace)s_%(model)s_related')
Example #15
0
class Person(TestRedisModel):
    name = limpyd_fields.PKField()
Example #16
0
 class Other(TestRedisModel):
     namespace = 'related-name-sub'
     name = fields.PKField()
Example #17
0
class Movie(TestRedisModelWithDynamicField):
    name = limpyd_fields.PKField()
    tags = related.M2MSetField(Tag,
                               related_name='movies')  # global public tags
    personal_tags = dyn_related.DynamicM2MSetField(
        Tag, related_name='movies_for_people')  # private tags for each person
Example #18
0
 class GroupAsList(TestRedisModel):
     name = fields.PKField()
     members = M2MListField(Person, related_name='members_list')
Example #19
0
 class NotAutoPkModel(TestRedisModel):
     pk = fields.PKField()
     name = fields.StringField(indexable=True)
Example #20
0
 class Group2(TestRedisModel):
     name = fields.PKField()
     members = M2MListField(Person, related_name='members_set2')
Example #21
0
 class RedefinedNotAutoPkField(AutoPkModel):
     id = fields.PKField()
Example #22
0
class Person(TestRedisModel):
    name = fields.PKField()
    prefered_group = related.FKStringField('Group',
                                           related_name='prefered_for')
Example #23
0
 class PersonTest(TestRedisModel):
     namespace = 'related-name'
     name = fields.PKField()
     most_hated_group = FKStringField(
         'related-tests:Group',
         related_name='%(namespace)s_%(model)s_set')
Example #24
0
 class Plane(TestRedisModel):
     my_pk = fields.PKField()
     name = fields.InstanceHashField()
     is_first = fields.InstanceHashField(indexable=True)
Example #25
0
        class Plane2(TestRedisModel):
            my_pk = fields.PKField()
            name = fields.InstanceHashField()
            is_first = fields.InstanceHashField(indexable=True)

            collection_manager = ExtendedCollectionManager