class GlobalPMI(BaseRedisModel): """ Store in redis the PMI for the whole corpus. corpus = name of the corpus (used as pk also) ngrams = count for each referenced ngram in the corpus ncount = count for each length of ngram in the corpus """ corpus = fields.PKField() ngrams = fields.SortedSetField() ncount = fields.SortedSetField() MAX_LENGTH = 15 def stemm_list_to_string(self, stemms): # stemm.id is a tuple (lemme, POS_tag) return " ".join("%s/%s" % stemm.id for stemm in stemms) def add_ngram(self, ngram, amount): """ Ngram is expected to be a list of Stemm instances or a KeyEntity instances. """ ngram_key = self.stemm_list_to_string(ngram) self.ngrams.zincrby(ngram_key, amount=amount) self.ncount.zincrby(len(ngram), amount) def global_probability(self, ngram): """ Ngram is expected to be a list of Stemm instances or a KeyEntity instances. """ ngram_key = self.stemm_list_to_string(ngram) ngram_score = self.ngrams.zscore(ngram_key) or 1 ngram_length_score = self.ncount.zscore(len(ngram)) or 1 return 1.0 * ngram_score / ngram_length_score def global_pmi(self, ngram): ngram_probability = self.global_probability(ngram) # use iterable also for one element members_probability = product( self.global_probability([s]) for s in ngram if s.has_meaning_alone()) return math.log(ngram_probability / members_probability)
class GroupsContainer(TestRedisModel): namespace = 'contrib-collection' groups_set = fields.SetField() groups_list = fields.ListField() groups_sortedset = fields.SortedSetField()
class SortedSetModel(TestRedisModel): field = fields.SortedSetField(indexable=True)
class Student(TestRedisModel): exams = fields.SortedSetField(unique=True)
class Queue(BaseJobsModel): name = fields.InstanceHashField(indexable=True) priority = fields.InstanceHashField(indexable=True, default=0) # the higher, the better waiting = fields.ListField() success = fields.ListField() errors = fields.ListField() delayed = fields.SortedSetField() @classmethod def get_queue(cls, name, priority=0, **fields_if_new): """ Get, or create, and return the wanted queue. If the queue is created, fields in fields_if_new will be set for the new queue. """ queue_kwargs = {'name': name, 'priority': priority} retries = 0 while retries < 10: retries += 1 try: queue, created = cls.get_or_connect(**queue_kwargs) except IndexError: # Failure during the retrieval https://friendpaste.com/5U63a8aFuV44SEgQckgMP # => retry continue except ValueError: # more than one (race condition https://github.com/yohanboniface/redis-limpyd/issues/82 ?) try: queue = cls.collection(**queue_kwargs).instances()[0] except IndexError: # but no more now ?! # => retry continue else: created = False # ok we have our queue, stop now break if created and fields_if_new: queue.set_fields(**fields_if_new) return queue def delay_job(self, job, delayed_until): """ Add the job to the delayed list (zset) of the queue. """ timestamp = datetime_to_score(delayed_until) self.delayed.zadd({job.ident: timestamp}) def enqueue_job(self, job, prepend=False): """ Add the job to the waiting list, at the end (it's a fifo list). If `prepend` is True, add it at the beginning of the list. """ push_method = getattr(self.waiting, 'lpush' if prepend else 'rpush') push_method(job.ident) @staticmethod def _get_iterable_for_names(names): """ Ensure that we have an iterable list of names, even if we have a single name """ if isinstance(names, basestring): names = (names, ) return names @classmethod def get_all(cls, names): """ Return all queues for the given names (for all available priorities) """ names = cls._get_iterable_for_names(names) queues = [] for queue_name in names: queues.extend(list(cls.collection(name=queue_name).instances())) return queues @classmethod def get_all_by_priority(cls, names): """ Return all the queues with the given names, sorted by priorities (higher priority first), then by name """ names = cls._get_iterable_for_names(names) queues = cls.get_all(names) # sort all queues by priority queues.sort(key=lambda q: int(q.priority.hget() or 0), reverse=True) return queues @classmethod def get_waiting_keys(cls, names): """ Return a list of all queue waiting keys, to use with blpop """ return [queue.waiting.key for queue in cls.get_all_by_priority(names)] @classmethod def count_waiting_jobs(cls, names): """ Return the number of all jobs waiting in queues with the given names """ return sum([queue.waiting.llen() for queue in cls.get_all(names)]) @classmethod def count_delayed_jobs(cls, names): """ Return the number of all delayed jobs in queues with the given names """ return sum([queue.delayed.zcard() for queue in cls.get_all(names)]) @property def first_delayed(self): """ Return the first entry in the delayed zset (a tuple with the job's pk and the score of the zset, which it's delayed time as a timestamp) Returns None if no delayed jobs """ entries = self.delayed.zrange(0, 0, withscores=True) return entries[0] if entries else None @property def first_delayed_time(self): """ Get the timestamp representation of the first delayed job to be ready. """ # get the first job which will be ready first_entry = self.first_delayed return first_entry[1] if first_entry else None def requeue_delayed_jobs(self): """ Put all delayed jobs that are now ready, back in the queue waiting list Return a list of failures """ lock_key = self.make_key( self._name, self.pk.get(), "requeue_all_delayed_ready_jobs", ) connection = self.get_connection() if connection.exists(lock_key): # if locked, a worker is already on it, don't wait and exit return [] with Lock(connection, lock_key, timeout=60): # stop here if we know we have nothing first_delayed_time = self.first_delayed_time if not first_delayed_time: return [] # get when we are :) now_timestamp = datetime_to_score(datetime.utcnow()) # the first job will be ready later, and so the other ones too, then # abort if float(first_delayed_time) > now_timestamp: return [] failures = [] while True: # get the first entry first_entry = self.first_delayed # no first entry, another worker took all from us ! if not first_entry: break # split into vars for readability job_ident, delayed_until = first_entry # if the date of the job is in the future, another work took the # job we wanted, so we let this job here and stop the loop as we # know (its a zset sorted by date) that no other jobs are ready if delayed_until > now_timestamp: break # remove the entry we just got from the delayed ones self.delayed.zrem(job_ident) # and add it to the waiting queue try: job = Job.get_from_ident(job_ident) if job.status.hget() == STATUSES.DELAYED: job.status.hset(STATUSES.WAITING) self.enqueue_job(job) except Exception as e: failures.append((job_ident, '%s' % e)) return failures