예제 #1
0
    def classify_proba(self, form, threshold=0.0):
        """
        Return dict with probabilities of ``form`` and its fields belonging
        to various form and field classes::

            {
                'form': {'type1': prob1, 'type2': prob2, ...},
                'fields': {
                    'name': {'type1': prob1, 'type2': prob2, ...},
                    ...
                }
            }

        ``form`` should be an lxml HTML <form> element.
        Only classes with probability >= ``threshold`` are preserved.
        """
        form_types_proba = self.form_classifier.classify_proba(form, threshold)
        form_type = max(form_types_proba, key=lambda p: form_types_proba[p])
        field_elems = get_fields_to_annotate(form)
        xseq = fieldtype_model.get_form_features(form, form_type, field_elems)
        yseq = self._field_model.predict_marginals_single(xseq)

        return {
            'form': form_types_proba,
            'fields': {
                elem.name: thresholded(probs, threshold)
                for elem, probs in zip(field_elems, yseq)
            },
        }
예제 #2
0
 def classify(self, form):
     """
     Return ``{'form': 'type', 'fields': {'name': 'type', ...}}``
     dict with form type and types of its visible submittable fields.
     """
     form_type = self.form_classifier.classify(form)
     field_elems = get_fields_to_annotate(form)
     xseq = fieldtype_model.get_form_features(form, form_type, field_elems)
     yseq = self._field_model.predict_single(xseq)
     return {
         'form': form_type,
         'fields': {
             elem.name: cls
             for elem, cls in zip(field_elems, yseq)
         }
     }
예제 #3
0
    def classify(self, form, fields=True):
        """
        Return ``{'form': 'type', 'fields': {'name': 'type', ...}}``
        dict with form type and types of its visible submittable fields.

        If ``fields`` argument is False, only information about form type is
        returned: ``{'form': 'type'}``.
        """
        form_type = self.form_classifier.classify(form)
        res = {'form': form_type}
        if fields:
            field_elems = get_fields_to_annotate(form)
            xseq = fieldtype_model.get_form_features(form, form_type, field_elems)
            yseq = self._field_model.predict_single(xseq)
            res['fields'] = {
                elem.name: cls
                for elem, cls in zip(field_elems, yseq)
            }
        return res
예제 #4
0
    def classify_proba(self, form, threshold=0.0, fields=True):
        """
        Return dict with probabilities of ``form`` and its fields belonging
        to various form and field classes::

            {
                'form': {'type1': prob1, 'type2': prob2, ...},
                'fields': {
                    'name': {'type1': prob1, 'type2': prob2, ...},
                    ...
                }
            }

        ``form`` should be an lxml HTML <form> element.
        Only classes with probability >= ``threshold`` are preserved.

        If ``fields`` is False, only information about the form is returned::

            {
                'form': {'type1': prob1, 'type2': prob2, ...}
            }

        """
        form_types_proba = self.form_classifier.classify_proba(form, threshold)
        res = {'form': form_types_proba}

        if fields:
            form_type = max(form_types_proba,
                            key=lambda p: form_types_proba[p])
            field_elems = get_fields_to_annotate(form)
            xseq = fieldtype_model.get_form_features(form, form_type,
                                                     field_elems)
            yseq = self._field_model.predict_marginals_single(xseq)
            res['fields'] = {
                elem.name: thresholded(probs, threshold)
                for elem, probs in zip(field_elems, yseq)
            }

        return res