def classify_proba(self, form, threshold=0.0): """ Return dict with probabilities of ``form`` and its fields belonging to various form and field classes:: { 'form': {'type1': prob1, 'type2': prob2, ...}, 'fields': { 'name': {'type1': prob1, 'type2': prob2, ...}, ... } } ``form`` should be an lxml HTML <form> element. Only classes with probability >= ``threshold`` are preserved. """ form_types_proba = self.form_classifier.classify_proba(form, threshold) form_type = max(form_types_proba, key=lambda p: form_types_proba[p]) field_elems = get_fields_to_annotate(form) xseq = fieldtype_model.get_form_features(form, form_type, field_elems) yseq = self._field_model.predict_marginals_single(xseq) return { 'form': form_types_proba, 'fields': { elem.name: thresholded(probs, threshold) for elem, probs in zip(field_elems, yseq) }, }
def classify(self, form): """ Return ``{'form': 'type', 'fields': {'name': 'type', ...}}`` dict with form type and types of its visible submittable fields. """ form_type = self.form_classifier.classify(form) field_elems = get_fields_to_annotate(form) xseq = fieldtype_model.get_form_features(form, form_type, field_elems) yseq = self._field_model.predict_single(xseq) return { 'form': form_type, 'fields': { elem.name: cls for elem, cls in zip(field_elems, yseq) } }
def classify(self, form, fields=True): """ Return ``{'form': 'type', 'fields': {'name': 'type', ...}}`` dict with form type and types of its visible submittable fields. If ``fields`` argument is False, only information about form type is returned: ``{'form': 'type'}``. """ form_type = self.form_classifier.classify(form) res = {'form': form_type} if fields: field_elems = get_fields_to_annotate(form) xseq = fieldtype_model.get_form_features(form, form_type, field_elems) yseq = self._field_model.predict_single(xseq) res['fields'] = { elem.name: cls for elem, cls in zip(field_elems, yseq) } return res
def classify_proba(self, form, threshold=0.0, fields=True): """ Return dict with probabilities of ``form`` and its fields belonging to various form and field classes:: { 'form': {'type1': prob1, 'type2': prob2, ...}, 'fields': { 'name': {'type1': prob1, 'type2': prob2, ...}, ... } } ``form`` should be an lxml HTML <form> element. Only classes with probability >= ``threshold`` are preserved. If ``fields`` is False, only information about the form is returned:: { 'form': {'type1': prob1, 'type2': prob2, ...} } """ form_types_proba = self.form_classifier.classify_proba(form, threshold) res = {'form': form_types_proba} if fields: form_type = max(form_types_proba, key=lambda p: form_types_proba[p]) field_elems = get_fields_to_annotate(form) xseq = fieldtype_model.get_form_features(form, form_type, field_elems) yseq = self._field_model.predict_marginals_single(xseq) res['fields'] = { elem.name: thresholded(probs, threshold) for elem, probs in zip(field_elems, yseq) } return res