Ejemplo n.º 1
0
def test_get_fields_to_annotate():
    tree = load_html(FORM1)
    form = get_forms(tree)[0]
    elems = get_fields_to_annotate(form)
    assert all(getattr(el, 'name', None) for el in elems)
    names = get_field_names(elems)
    assert names == ['foo', 'bar', 'ch', 'baz', 'go', 'cancel']
    assert set(names) == {el.name for el in elems}
Ejemplo n.º 2
0
def test_get_fields_to_annotate():
    tree = load_html(FORM1)
    form = get_forms(tree)[0]
    elems = get_fields_to_annotate(form)
    assert all(getattr(el, 'name', None) for el in elems)
    names = get_field_names(elems)
    assert names == ['foo', 'bar', 'ch', 'baz', 'go', 'cancel']
    assert set(names) == {el.name for el in elems}
Ejemplo n.º 3
0
def FormAnnotator(ann,
                  annotate_fields=True,
                  annotate_types=True,
                  max_fields=80):
    """
    Widget for annotating a single HTML form.
    """
    assert annotate_fields or annotate_types
    form_types_inv = ann.form_schema.types_inv

    children = []

    if annotate_types:
        children += [FormTypeSelect(ann)]

    tpl = """
    <h4>
        {tp} <a href='{url}'>{url}</a>
        <small>{key} #{index}</small>
    </h4>
    """
    header = widgets.HTML(
        tpl.format(url=ann.url,
                   index=ann.index,
                   key=ann.key,
                   tp=form_types_inv.get(ann.type, '?')))
    children += [header]

    if annotate_fields:
        pages = []
        names = get_field_names(get_fields_to_annotate(ann.form))
        if len(names) > max_fields:
            children += [
                widgets.HTML("<h4>Too many fields ({})</h4>".format(
                    len(names)))
            ]
        else:
            for name in names:
                field_type_select = FieldTypeSelect(ann, name)
                html_view = HtmlView(ann.form, name)
                page = widgets.Box(children=[field_type_select, html_view])
                pages.append(page)

            field_tabs = widgets.Tab(children=pages, padding=4)
            for idx, name in enumerate(names):
                field_tabs.set_title(idx, name)

            children += [field_tabs]
    else:
        children += [HtmlView(ann.form)]

    return widgets.VBox(children, padding=8)
Ejemplo n.º 4
0
def FormAnnotator(ann, annotate_fields=True, annotate_types=True, max_fields=80):
    """
    Widget for annotating a single HTML form.
    """
    assert annotate_fields or annotate_types
    form_types_inv = ann.form_schema.types_inv

    children = []

    if annotate_types:
        children += [FormTypeSelect(ann)]

    tpl = """
    <h4>
        {tp} <a href='{url}'>{url}</a>
        <small>{key} #{index}</small>
    </h4>
    """
    header = widgets.HTML(tpl.format(
        url=ann.url,
        index=ann.index,
        key=ann.key,
        tp=form_types_inv.get(ann.type, '?')
    ))
    children += [header]

    if annotate_fields:
        pages = []
        names = get_field_names(get_fields_to_annotate(ann.form))
        if len(names) > max_fields:
            children += [
                widgets.HTML("<h4>Too many fields ({})</h4>".format(len(names)))
            ]
        else:
            for name in names:
                field_type_select = FieldTypeSelect(ann, name)
                html_view = HtmlView(ann.form, name)
                page = widgets.Box(children=[field_type_select, html_view])
                pages.append(page)

            field_tabs = widgets.Tab(children=pages, padding=4)
            for idx, name in enumerate(names):
                field_tabs.set_title(idx, name)

            children += [field_tabs]
    else:
        children += [HtmlView(ann.form)]

    return widgets.VBox(children, padding=8)
Ejemplo n.º 5
0
    def add_result(self,
                   html,
                   url,
                   form_answers=None,
                   visible_html_fields=None,
                   index=None,
                   add_empty=True):
        """
        Save HTML source and its <form> and form field types.
        """
        forms = get_forms(load_html(html))
        if not add_empty:
            if not len(forms):
                return

            if all(len(get_fields_to_annotate(form)) == 0 for form in forms):
                return

        if form_answers is None:
            form_schema = self.get_form_schema()
            form_answers = [form_schema.na_value for _ in forms]
        else:
            assert len(form_answers) == len(forms)

        if visible_html_fields is None:
            field_schema = self.get_field_schema()
            visible_html_fields = [{
                name: field_schema.na_value
                for name in get_field_names(get_fields_to_annotate(form))
            } for form in forms]

        filename = self.generate_filename(url)
        path = os.path.relpath(filename, self.folder)
        if index is None:
            index = self.get_index()
        index[path] = {
            "url": url,
            "forms": form_answers,
            "visible_html_fields": visible_html_fields,
        }
        with open(filename, 'wb') as f:
            if not isinstance(html, bytes):
                html = html.encode('utf8')
            f.write(html)
        self.write_index(index)
        return path
Ejemplo n.º 6
0
    def add_result(self, html, url, form_answers=None,
                   visible_html_fields=None, index=None,
                   add_empty=True):
        """
        Save HTML source and its <form> and form field types.
        """
        forms = get_forms(load_html(html))
        if not add_empty:
            if not len(forms):
                return

            if all(len(get_fields_to_annotate(form)) == 0 for form in forms):
                return

        if form_answers is None:
            form_schema = self.get_form_schema()
            form_answers = [form_schema.na_value for _ in forms]
        else:
            assert len(form_answers) == len(forms)

        if visible_html_fields is None:
            field_schema = self.get_field_schema()
            visible_html_fields = [{
                name: field_schema.na_value
                for name in get_field_names(get_fields_to_annotate(form))
            } for form in forms]

        filename = self.generate_filename(url)
        path = os.path.relpath(filename, self.folder)
        if index is None:
            index = self.get_index()
        index[path] = {
            "url": url,
            "forms": form_answers,
            "visible_html_fields": visible_html_fields,
        }
        with open(filename, 'wb') as f:
            if not isinstance(html, bytes):
                html = html.encode('utf8')
            f.write(html)
        self.write_index(index)
        return path