class SuggestCategoriesView(formbase.PageForm):
    """Suggest categories to the user and let him set them.
    """

    implements(IPlonePageForm)
    label = _(u"Suggested categories")
    description = _(u"Choose among the proposed subjects. Clicking on apply "\
        "will add the chosen categories to the existing ones.")

    def getSuggestedSubjects(self):
        """
        """
        classifier = getUtility(IContentClassifier)
        uid = IClassifiable(self.context).UID
        return classifier.probabilityClassify(uid)

    @property
    def form_fields(self):
        """
        """
        ff = form.Fields(ISuggestCategories)
        suggestions = self.getSuggestedSubjects()
        if not suggestions:
            url = getMultiAdapter((self.context, self.request),
                                  name='absolute_url')()
            IStatusMessage(self.request).addStatusMessage(
                _(u"Classifier has not been trained or has "\
                   "not sufficient information."), type="error")
            self.request.response.redirect(url)
            return []
        subject_prob_list = [
            (suggestions.prob(subject), subject)
            for subject in suggestions.samples()]
        subject_prob_list = sorted(subject_prob_list, reverse=True)
        vocab_terms = []
        for (probability, subject) in subject_prob_list:
            label = "%s %2.1f%%"%(subject, probability*100)
            vocab_terms.append(SimpleTerm(value=subject,
                                          token=b64encode(subject),
                                          title=label))
        choice = schema.Choice(vocabulary=SimpleVocabulary(vocab_terms))
        ff['suggestions'].field.value_type = choice
        return ff

    @form.action(_(u"Apply"))
    def action_submit(self, action, data):
        """
        """
        obj = IClassifiable(self.context)
        subjects = obj.categories
        for subject in data['suggestions']:
            if subject not in subjects:
                subjects.append(subject)
        obj.categories = subjects
        url = getMultiAdapter((self.context, self.request),
                              name='absolute_url')()
        IStatusMessage(self.request).addStatusMessage(
            _(u"Categories saved."), type="info")
        self.request.response.redirect(url)
        return ''
class ISuggestCategories(Interface):
    """
    """
    suggestions = schema.List(
        title = _(u"Suggestions"),
        description = _(u""),
        default = [])
class IClassifierSettingsSchema(Interface):
    """Classifier settings
    """

    train_after_update = schema.Bool(
        title=_(u"Train after update"),
        description=_(u"Enabling this will trigger training the classifier " \
            "every time tagged content is added, modified or deleted. " \
            "Disabling it means you will have to periodically manually " \
            "retrain the classifier."))
 def form_fields(self):
     """
     """
     ff = form.Fields(ISuggestCategories)
     suggestions = self.getSuggestedSubjects()
     if not suggestions:
         url = getMultiAdapter((self.context, self.request),
                               name='absolute_url')()
         IStatusMessage(self.request).addStatusMessage(
             _(u"Classifier has not been trained or has "\
                "not sufficient information."), type="error")
         self.request.response.redirect(url)
         return []
     subject_prob_list = [
         (suggestions.prob(subject), subject)
         for subject in suggestions.samples()]
     subject_prob_list = sorted(subject_prob_list, reverse=True)
     vocab_terms = []
     for (probability, subject) in subject_prob_list:
         label = "%s %2.1f%%"%(subject, probability*100)
         vocab_terms.append(SimpleTerm(value=subject,
                                       token=b64encode(subject),
                                       title=label))
     choice = schema.Choice(vocabulary=SimpleVocabulary(vocab_terms))
     ff['suggestions'].field.value_type = choice
     return ff
 def form_fields(self):
     """
     """
     ff = form.Fields(ISuggestCategories)
     suggestions = self.getSuggestedSubjects()
     if not suggestions:
         url = getMultiAdapter((self.context, self.request),
                               name='absolute_url')()
         IStatusMessage(self.request).addStatusMessage(
             _(u"Classifier has not been trained or has "\
                "not sufficient information."), type="error")
         self.request.response.redirect(url)
         return []
     subject_prob_list = [
         (suggestions.prob(subject), subject)
         for subject in suggestions.samples()]
     subject_prob_list = sorted(subject_prob_list, reverse=True)
     vocab_terms = []
     for (probability, subject) in subject_prob_list:
         label = "%s %2.1f%%"%(subject, probability*100)
         vocab_terms.append(SimpleTerm(value=subject,
                                       token=b64encode(subject),
                                       title=label))
     choice = schema.Choice(vocabulary=SimpleVocabulary(vocab_terms))
     ff['suggestions'].field.value_type = choice
     return ff
class ClassifierSettings(ControlPanelForm):

    form_fields = form.FormFields(IClassifierSettingsSchema)
    label = _("Classification settings")
    description = _("Settings for collective.classification.")
    form_name = _("Classification settings")

    @form.action(_(u"Save"))
    def save_action(self, action, data):
        form.applyChanges(self.context, self.form_fields, data, self.adapters)
        self.status = _(u"Changes saved.")

    @form.action(_(u"Retrain classifier"))
    def retrain_classifier_action(self, action, data):
        classifier = getUtility(IContentClassifier)
        classifier.train()
        self.status = _(u"Classifier trained.")

    @form.action(_(u"Statistics"), validator=null_validator)
    def stats_action(self, action, data):
        """Displays the stats view.
        """
        url = getMultiAdapter((self.context, self.request),
                              name='absolute_url')()
        self.request.response.redirect(url + '/@@classification-stats')
        return ''

    @form.action(_(u"Cancel"), validator=null_validator)
    def cancel_action(self, action, data):
        self.status = _(u"Changes cancelled.")
        url = getMultiAdapter((self.context, self.request),
                              name='absolute_url')()
        self.request.response.redirect(url + '/plone_control_panel')
        return ''
 def retrain_classifier_action(self, action, data):
     storage = getUtility(INounPhraseStorage)
     classifier = getUtility(IContentClassifier)
     classifier.clear()
     catalog = getToolByName(self.context, "portal_catalog")
     types_to_search = storage.friendlyTypes or self._friendlyContentTypes()
     trainContent = catalog.searchResults(portal_type=types_to_search)
     for item in trainContent:
         if item.Subject:
             classifier.addTrainingDocument(item["UID"], item["Subject"])
     classifier.train()
     self.status = _(u"Classifier trained.")
 def action_submit(self, action, data):
     """
     """
     obj = IClassifiable(self.context)
     subjects = obj.categories
     for subject in data["suggestions"]:
         if subject not in subjects:
             subjects.append(subject)
     obj.categories = subjects
     url = getMultiAdapter((self.context, self.request), name="absolute_url")()
     IStatusMessage(self.request).addStatusMessage(_(u"Categories saved."), type="info")
     self.request.response.redirect(url)
     return ""
 def action_submit(self, action, data):
     """
     """
     obj = IClassifiable(self.context)
     subjects = obj.categories
     for subject in data['suggestions']:
         if subject not in subjects:
             subjects.append(subject)
     obj.categories = subjects
     url = getMultiAdapter((self.context, self.request),
                           name='absolute_url')()
     IStatusMessage(self.request).addStatusMessage(
         _(u"Categories saved."), type="info")
     self.request.response.redirect(url)
     return ''
    def retrain_termextractor_action(self, action, data):
        storage = getUtility(INounPhraseStorage)
        storage.clear()

        catalog = getToolByName(self.context, "portal_catalog")
        types_to_search = storage.friendlyTypes or self._friendlyContentTypes()
        trainContent = catalog.searchResults(portal_type=types_to_search)
        for item in trainContent:
            # NOTE: Why can't I obtain item.SearchableText?
            # Is it too big to be returned in catalog brains?
            obj = item.getObject()
            uid = obj.UID()
            text = convertHtmlToWebIntelligentPlainText(obj.SearchableText())
            storage.addDocument(uid, text)
        self.status = _(
            u"Term extractor trained and NP storage updated." " You will need to re-train the classifier as well."
        )
    def save_action(self, action, data):
        form.applyChanges(self.context, self.form_fields, data, self.adapters)
        extractor = getUtility(ITermExtractor)

        # Check if user has changed the tagger...
        ttype = data["tagger_type"]
        tcategories = data["brown_categories"]
        if extractor.tagger_metadata["type"] != ttype or extractor.tagger_metadata["categories"] != tcategories:
            if ttype == "N-Gram":
                tagged_sents = brown.tagged_sents(categories=tcategories)
                tagger = getUtility(IPOSTagger, name="collective.classification.taggers.NgramTagger")
                tagger.train(tagged_sents)
                extractor.setTagger(tagger, {"type": "N-Gram", "categories": tcategories})
            else:
                tagger = getUtility(IPOSTagger, name="collective.classification.taggers.PennTreebankTagger")
                extractor.setTagger(tagger, {"type": "Pen TreeBank", "categories": []})
        self.status = _(u"Changes saved. You will need to reparse the " "content and then retrain the classifier.")
Esempio n. 12
0
class ClassificationStatsView(formbase.PageForm):
    form_fields = form.Fields(IStats)
    template = ViewPageTemplateFile('classificationstats.pt')

    def __init__(self, *args, **kwargs):
        """
        """
        super(ClassificationStatsView, self).__init__(*args,**kwargs)
        catalog = getToolByName(self.context, 'portal_catalog')
        self.classifier = getUtility(IContentClassifier)
        self.informativeFeatures = self.classifier.informativeFeatures()
        self.parsedDocs = len(catalog._catalog.getIndex('noun_terms')._unindex)

    @form.action(_(u"Apply"))
    def action_apply(self, action, data):
        """
        """
        self.informativeFeatures = \
            self.classifier.informativeFeatures(data['no_features'])
class ClusterizeView(formbase.PageForm):

    form_fields = form.Fields(IClusterize)
    template = ViewPageTemplateFile('clusterize.pt')

    @form.action(_(u"Clusterize"))
    def action_clusterize(self, action, data):
        """
        """
        catalog = getToolByName(self.context, 'portal_catalog')
        clusterer = KMeans()
        clusters = clusterer.clusterize(data['no_clusters'],
                                        data['no_noun_ranks'],
                                        repeats=data['repeats'])
        result = []
        for cluster in clusters.values():
            clusterlist = []
            for uid in cluster:
                item = catalog.unrestrictedSearchResults(UID=uid)[0]
                clusterlist.append(
                    (item.getURL(), item.Title, item.Description))
            result.append(clusterlist)
        self.clusters = result
class IClusterize(Interface):

    no_clusters = schema.Int(
        title=_(u"Number of clusters"),
        description=_(u""),
        required=True,
    )

    no_noun_ranks = schema.Int(
        title=_(u"Important nouns to keep"),
        description=_(u"Indicates how many nouns to keep when building the" \
            "list of most frequent nouns in the text."),
        default=20,
        required=True)

    repeats = schema.Int(
        title=_(u"Number of runs"),
        description=_(u""),
        default=10,
        required=True,
    )
 def save_action(self, action, data):
     form.applyChanges(self.context, self.form_fields, data, self.adapters)
     self.status = _(u"Changes saved.")
Esempio n. 16
0
class IStats(Interface):
    no_features = schema.Int(
        title=_(u"Number of informative features to show"),
        required=True,
        default=10)
 def retrain_classifier_action(self, action, data):
     classifier = getUtility(IContentClassifier)
     classifier.train()
     self.status = _(u"Classifier trained.")
 def cancel_action(self, action, data):
     self.status = _(u"Changes cancelled.")
     url = getMultiAdapter((self.context, self.request),
                           name='absolute_url')()
     self.request.response.redirect(url + '/plone_control_panel')
     return ''