def test_get_fields_to_annotate(): tree = load_html(FORM1) form = get_forms(tree)[0] elems = get_fields_to_annotate(form) assert all(getattr(el, 'name', None) for el in elems) names = get_field_names(elems) assert names == ['foo', 'bar', 'ch', 'baz', 'go', 'cancel'] assert set(names) == {el.name for el in elems}
def FormAnnotator(ann, annotate_fields=True, annotate_types=True, max_fields=80): """ Widget for annotating a single HTML form. """ assert annotate_fields or annotate_types form_types_inv = ann.form_schema.types_inv children = [] if annotate_types: children += [FormTypeSelect(ann)] tpl = """ <h4> {tp} <a href='{url}'>{url}</a> <small>{key} #{index}</small> </h4> """ header = widgets.HTML( tpl.format(url=ann.url, index=ann.index, key=ann.key, tp=form_types_inv.get(ann.type, '?'))) children += [header] if annotate_fields: pages = [] names = get_field_names(get_fields_to_annotate(ann.form)) if len(names) > max_fields: children += [ widgets.HTML("<h4>Too many fields ({})</h4>".format( len(names))) ] else: for name in names: field_type_select = FieldTypeSelect(ann, name) html_view = HtmlView(ann.form, name) page = widgets.Box(children=[field_type_select, html_view]) pages.append(page) field_tabs = widgets.Tab(children=pages, padding=4) for idx, name in enumerate(names): field_tabs.set_title(idx, name) children += [field_tabs] else: children += [HtmlView(ann.form)] return widgets.VBox(children, padding=8)
def FormAnnotator(ann, annotate_fields=True, annotate_types=True, max_fields=80): """ Widget for annotating a single HTML form. """ assert annotate_fields or annotate_types form_types_inv = ann.form_schema.types_inv children = [] if annotate_types: children += [FormTypeSelect(ann)] tpl = """ <h4> {tp} <a href='{url}'>{url}</a> <small>{key} #{index}</small> </h4> """ header = widgets.HTML(tpl.format( url=ann.url, index=ann.index, key=ann.key, tp=form_types_inv.get(ann.type, '?') )) children += [header] if annotate_fields: pages = [] names = get_field_names(get_fields_to_annotate(ann.form)) if len(names) > max_fields: children += [ widgets.HTML("<h4>Too many fields ({})</h4>".format(len(names))) ] else: for name in names: field_type_select = FieldTypeSelect(ann, name) html_view = HtmlView(ann.form, name) page = widgets.Box(children=[field_type_select, html_view]) pages.append(page) field_tabs = widgets.Tab(children=pages, padding=4) for idx, name in enumerate(names): field_tabs.set_title(idx, name) children += [field_tabs] else: children += [HtmlView(ann.form)] return widgets.VBox(children, padding=8)
def add_result(self, html, url, form_answers=None, visible_html_fields=None, index=None, add_empty=True): """ Save HTML source and its <form> and form field types. """ forms = get_forms(load_html(html)) if not add_empty: if not len(forms): return if all(len(get_fields_to_annotate(form)) == 0 for form in forms): return if form_answers is None: form_schema = self.get_form_schema() form_answers = [form_schema.na_value for _ in forms] else: assert len(form_answers) == len(forms) if visible_html_fields is None: field_schema = self.get_field_schema() visible_html_fields = [{ name: field_schema.na_value for name in get_field_names(get_fields_to_annotate(form)) } for form in forms] filename = self.generate_filename(url) path = os.path.relpath(filename, self.folder) if index is None: index = self.get_index() index[path] = { "url": url, "forms": form_answers, "visible_html_fields": visible_html_fields, } with open(filename, 'wb') as f: if not isinstance(html, bytes): html = html.encode('utf8') f.write(html) self.write_index(index) return path