Exemple #1
class GlobalPMI(BaseRedisModel):
    Store in redis the PMI for the whole corpus.

    corpus = name of the corpus (used as pk also)
    ngrams = count for each referenced ngram in the corpus
    ncount = count for each length of ngram in the corpus
    corpus = fields.PKField()
    ngrams = fields.SortedSetField()
    ncount = fields.SortedSetField()

    MAX_LENGTH = 15

    def stemm_list_to_string(self, stemms):
        # stemm.id is a tuple (lemme, POS_tag)
        return " ".join("%s/%s" % stemm.id for stemm in stemms)

    def add_ngram(self, ngram, amount):
        Ngram is expected to be a list of Stemm instances or a KeyEntity instances.
        ngram_key = self.stemm_list_to_string(ngram)
        self.ngrams.zincrby(ngram_key, amount=amount)
        self.ncount.zincrby(len(ngram), amount)

    def global_probability(self, ngram):
        Ngram is expected to be a list of Stemm instances or a KeyEntity instances.
        ngram_key = self.stemm_list_to_string(ngram)
        ngram_score = self.ngrams.zscore(ngram_key) or 1
        ngram_length_score = self.ncount.zscore(len(ngram)) or 1
        return 1.0 * ngram_score / ngram_length_score

    def global_pmi(self, ngram):
        ngram_probability = self.global_probability(ngram)
        # use iterable also for one element
        members_probability = product(
            self.global_probability([s]) for s in ngram
            if s.has_meaning_alone())
        return math.log(ngram_probability / members_probability)
Exemple #2
class GroupsContainer(TestRedisModel):
    namespace = 'contrib-collection'
    groups_set = fields.SetField()
    groups_list = fields.ListField()
    groups_sortedset = fields.SortedSetField()
class SortedSetModel(TestRedisModel):
    field = fields.SortedSetField(indexable=True)
class Student(TestRedisModel):
    exams = fields.SortedSetField(unique=True)
Exemple #5
class Queue(BaseJobsModel):
    name = fields.InstanceHashField(indexable=True)
    priority = fields.InstanceHashField(indexable=True, default=0)  # the higher, the better
    waiting = fields.ListField()
    success = fields.ListField()
    errors = fields.ListField()
    delayed = fields.SortedSetField()

    def get_queue(cls, name, priority=0, **fields_if_new):
        Get, or create, and return the wanted queue.
        If the queue is created, fields in fields_if_new will be set for the new
        queue_kwargs = {'name': name, 'priority': priority}
        retries = 0
        while retries < 10:
            retries += 1
                queue, created = cls.get_or_connect(**queue_kwargs)
            except IndexError:
                # Failure during the retrieval https://friendpaste.com/5U63a8aFuV44SEgQckgMP
                # => retry
            except ValueError:
                # more than one (race condition https://github.com/yohanboniface/redis-limpyd/issues/82 ?)
                    queue = cls.collection(**queue_kwargs).instances()[0]
                except IndexError:
                    # but no more now ?!
                    # => retry
                    created = False

            # ok we have our queue, stop now

        if created and fields_if_new:

        return queue

    def delay_job(self, job, delayed_until):
        Add the job to the delayed list (zset) of the queue.
        timestamp = datetime_to_score(delayed_until)
        self.delayed.zadd({job.ident: timestamp})

    def enqueue_job(self, job, prepend=False):
        Add the job to the waiting list, at the end (it's a fifo list). If
        `prepend` is True, add it at the beginning of the list.
        push_method = getattr(self.waiting, 'lpush' if prepend else 'rpush')

    def _get_iterable_for_names(names):
        Ensure that we have an iterable list of names, even if we have a single
        if isinstance(names, basestring):
            names = (names, )
        return names

    def get_all(cls, names):
        Return all queues for the given names (for all available priorities)
        names = cls._get_iterable_for_names(names)

        queues = []
        for queue_name in names:

        return queues

    def get_all_by_priority(cls, names):
        Return all the queues with the given names, sorted by priorities (higher
        priority first), then by name
        names = cls._get_iterable_for_names(names)

        queues = cls.get_all(names)

        # sort all queues by priority
        queues.sort(key=lambda q: int(q.priority.hget() or 0), reverse=True)

        return queues

    def get_waiting_keys(cls, names):
        Return a list of all queue waiting keys, to use with blpop
        return [queue.waiting.key for queue in cls.get_all_by_priority(names)]

    def count_waiting_jobs(cls, names):
        Return the number of all jobs waiting in queues with the given names
        return sum([queue.waiting.llen() for queue in cls.get_all(names)])

    def count_delayed_jobs(cls, names):
        Return the number of all delayed jobs in queues with the given names
        return sum([queue.delayed.zcard() for queue in cls.get_all(names)])

    def first_delayed(self):
        Return the first entry in the delayed zset (a tuple with the job's pk
        and the score of the zset, which it's delayed time as a timestamp)
        Returns None if no delayed jobs
        entries = self.delayed.zrange(0, 0, withscores=True)
        return entries[0] if entries else None

    def first_delayed_time(self):
        Get the timestamp representation of the first delayed job to be ready.
        # get the first job which will be ready
        first_entry = self.first_delayed

        return first_entry[1] if first_entry else None

    def requeue_delayed_jobs(self):
        Put all delayed jobs that are now ready, back in the queue waiting list
        Return a list of failures
        lock_key = self.make_key(
        connection = self.get_connection()

        if connection.exists(lock_key):
            # if locked, a worker is already on it, don't wait and exit
            return []

        with Lock(connection, lock_key, timeout=60):

            # stop here if we know we have nothing
            first_delayed_time = self.first_delayed_time
            if not first_delayed_time:
                return []

            # get when we are :)
            now_timestamp = datetime_to_score(datetime.utcnow())

            # the first job will be ready later, and so the other ones too, then
            # abort
            if float(first_delayed_time) > now_timestamp:
                return []

            failures = []
            while True:
                # get the first entry
                first_entry = self.first_delayed

                # no first entry, another worker took all from us !
                if not first_entry:

                # split into vars for readability
                job_ident, delayed_until = first_entry

                # if the date of the job is in the future, another work took the
                # job we wanted, so we let this job here and stop the loop as we
                # know (its a zset sorted by date) that no other jobs are ready
                if delayed_until > now_timestamp:

                # remove the entry we just got from the delayed ones

                # and add it to the waiting queue
                    job = Job.get_from_ident(job_ident)
                    if job.status.hget() == STATUSES.DELAYED:
                except Exception as e:
                    failures.append((job_ident, '%s' % e))

            return failures