def triggers(self): """ Select triggers available for the current keyentities. """ if self._triggers is None: self._triggers = set() for kp in self.keyentities: try: t = Trigger.get(original=unicode(kp)) self._triggers.add((t, kp.trigger_score)) except Trigger.DoesNotExist: pass return self._triggers
def train(self, inst): """ For the moment, human defined descriptors are a string with "," separator. """ if isinstance(inst, (int, str)): # We guess we have a pk here inst = config.content_model_getter(inst) text = getattr(inst, config.SULCI_CONTENT_PROPERTY) descriptors = config.descriptors_getter(inst) if not descriptors or not text: sulci_logger.info(u"Skipping item without data") return validated_descriptors = set() # Retrieve descriptors for d in descriptors: if not d: continue # d = d.strip().replace(u"’", u"'") # We create the descriptor not in thesaurus for now # because descriptors in article and thesaurus are not # always matching. Will be improved. dsc, created = Descriptor.get_or_connect(name=d) dsc.count.hincrby(1) # Retrieve the primeval value # dsc = dsc.primeval validated_descriptors.add(dsc) if created: sulci_logger.info(u"Lairning descriptor not in thesaurus : %s" % unicode(dsc), "RED") # Retrieve keytentities : try: S = SemanticalTagger( text, thesaurus=self.thesaurus, pos_tagger=self.pos_tagger, lexicon=self.pos_tagger.lexicon ) S.deduplicate_keyentities() # During lairning, try to filter except ValueError: # SemanticalTagger raise ValueError if text is empty return current_triggers = set() for ke in S.keyentities: # Retrieve or create triggers t, created = Trigger.get_or_connect(original=unicode(ke)) current_triggers.add(t) t.count.hincrby(1) # t.current_score = ke.trigger_score # For now, only create all the relations for d in validated_descriptors: for t in current_triggers: t.connect(d, 1)