예제 #1
0
 def validate(self):
     if not is_valid_xml_tag(self.name):
         invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name)
         msg = ("The name '{}' is an invalid XML tag, it contains an "
                "invalid character '{}'. Names must begin with a letter, "
                "colon, or underscore, subsequent characters can include "
                "numbers, dashes, and periods".format(
                    self.name, invalid_char.group(0)))
         raise PyXFormError(msg)
예제 #2
0
 def validate(self):
     if not is_valid_xml_tag(self.name):
         invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name)
         msg = (
             "The name '{}' is an invalid XML tag, it contains an "
             "invalid character '{}'. Names must begin with a letter, "
             "colon, or underscore, subsequent characters can include "
             "numbers, dashes, and periods".format(self.name, invalid_char.group(0))
         )
         raise PyXFormError(msg)
예제 #3
0
def workbook_to_json(
        workbook_dict, form_name=None,
        default_language=u"default", warnings=None):
    """
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    """
    # ensure required headers are present
    if warnings is None:
        warnings = []
    is_valid = False
    workbook_dict = {x.lower(): y for x,y in workbook_dict.items()}
    for row in workbook_dict.get(constants.SURVEY, []):
        is_valid = 'type' in [z.lower() for z in row]
        if is_valid:
            break
    if not is_valid:
        raise PyXFormError(
            u"The survey sheet is either empty or missing important "
            u"column headers.")

    row_format_string = '[row : %s]'

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them
    # or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace
    # syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects
    # (settings, choices, survey_sheet):
    # ########## Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []),
        aliases.settings_header, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
    replace_smart_quotes_in_dict(settings)

    default_language = settings.get(
        constants.DEFAULT_LANGUAGE, default_language)

    # add_none_option is a boolean that when true,
    # indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.SMS_KEYWORD: sms_keyword,
        constants.DEFAULT_LANGUAGE: default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause
        # problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    }
    # Here the default settings are overridden by those in the settings sheet
    json_dict.update(settings)

    # ########## Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated,
    # but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(
        columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    for choice_item in choices_sheet:
        replace_smart_quotes_in_dict(choice_item)

    choices_sheet = dealias_and_group_headers(
        choices_sheet, aliases.list_header, use_double_colons,
        default_language)
    # ########## Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,
        constants.LIST_NAME)

    choices = combined_lists
    # Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if 'name' not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                warnings.append(
                    "On the choices sheet there is a option with no label. " +
                    info)
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
            option_keys = list(option.keys())
            for headername in option_keys:
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnedabout.add(headername)
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" +
                                        headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    # ########## Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError(
            "You must have a sheet named (case-sensitive): " +
            constants.SURVEY)
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(
        survey_sheet, aliases.survey_header,
        use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)

    osm_sheet = dealias_and_group_headers(workbook_dict.get(constants.OSM, []),
                                              aliases.list_header,
                                              True)
    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
    # #################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
    #                 included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag
    # this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(aliases.control.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
        r")) (?P<list_name>\S+)" +
        "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>(" +
        '|'.join(aliases.cascading.keys()) +
        r")) (?P<cascading_level>\S+)?$")
    osm_regexp = re.compile(
        r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) +
        ')) (?P<list_name>\S+)')

    # Rows from the survey sheet that should be nested in meta
    survey_meta = []

    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        # Disabled should probably be first
        # so the attributes below can be disabled.
        if u"disabled" in row:
            warnings.append(
                row_format_string % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):
                continue

        # skip empty rows
        if len(row) == 0:
            continue

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            # then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                warnings.append(
                    row_format_string % row_number +
                    " Row without name, text, or label is being skipped:\n" +
                    str(row))
                continue
            raise PyXFormError(
                row_format_string % row_number +
                " Question with no type.\n" + str(row))

        # Pull out questions that will go in meta block
        if question_type == 'audit':
            # Force audit name to always be "audit" to follow XForms spec
            if 'name' in row and row['name'] not in [None, '', 'audit']:
                raise PyXFormError(row_format_string % row_number +
                    " Audits must always be named 'audit.'" +
                    " The name column should be left blank.")

            row['name'] = 'audit'
            survey_meta.append(row)
            continue

        if question_type == 'calculate':
            calculation = row.get('bind', {}).get('calculate')
            if not calculation:
                raise PyXFormError(
                    row_format_string % row_number + " Missing calculation.")

        # Check if the question is actually a setting specified
        # on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))
            continue

        # Try to parse question as a end control statement
        # (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = aliases.control[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) +
                        ", Control type: " + str(control_type))
                stack.pop()
                table_list = None
                continue

        # Make sure the row has a valid name
        if constants.NAME not in row:
            if row['type'] == 'note':
                # autogenerate names for notes without them
                row['name'] = "generated_note_name_" + str(row_number)
            # elif 'group' in row['type'].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
            else:
                raise PyXFormError(row_format_string % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = row_format_string % row_number
            error_message += " Invalid question name [" + \
                             question_name.encode('utf-8') + "] "
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers," \
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
            warnings.append(
                row_format_string % row_number +
                " Question has no label: " + str(row))

        # Try to parse question as begin control statement
        # (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type,
                # and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array
                # (so following questions are nested under it)
                # until an end command is encountered.
                control_type = aliases.control[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(
                            row_format_string % row_number +
                            " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            row_format_string % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Generate a new node for the jr:count column so
                # xpath expressions can be used.
                repeat_count_expression = new_json_dict.get(
                    'control', {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict['name'] + "_count"
                    parent_children_array.append({
                        "name": generated_node_name,
                        "bind": {
                            "readonly": "true()",
                            "calculate": repeat_count_expression,
                        },
                        "type": "calculate",
                    })
                    new_json_dict['control']['jr:count'] = \
                        "${" + generated_node_name + "}"

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                            "name":
                                "generated_table_list_label_" + str(row_number)
                        }
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                                new_json_dict[constants.LABEL]
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                                new_json_dict['hint']
                            del new_json_dict['hint']
                        child_list.append(generated_label_element)
                if 'intent' in new_json_dict:
                    new_json_dict['control'] = \
                        new_json_dict.get(u"control", {})
                    new_json_dict['control']['intent'] = \
                        new_json_dict['intent']

                parent_children_array.append(new_json_dict)
                stack.append((control_type, child_list))
                continue

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Cascading select needs a name.")
                # cascading_json = get_cascading_json(
                # cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or \
                   'questions' not in cascading_choices[0]:
                    raise PyXFormError(
                        "Found a cascading_select " + cascading_level +
                        ", but could not find " + cascading_level +
                        "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if 'bind' in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq['bind'] = row['bind']

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(
                                    lambda x: replace_prefix(x, prefix), v)
                        return d

                    parent_children_array.append(
                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.select[parse_dict["select_command"]]
                if select_type == 'select one external' \
                        and 'choice_filter' not in row:
                    warnings.append(
                        row_format_string % row_number +
                        u" select one external is only meant for"
                        u" filtered selects.")
                    select_type = aliases.select['select_one']
                list_name = parse_dict["list_name"]
                list_file_name, file_extension = os.path.splitext(list_name)

                if list_name not in choices \
                        and select_type != 'select one external' \
                        and file_extension not in ['.csv', '.xml']:
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the choices sheet name is "
                            u"all in small caps and has columns 'list name', "
                            u"'name', and 'label' (or aliased column names).")
                    raise PyXFormError(
                        row_format_string % row_number +
                        " List name not in choices sheet: " + list_name)

                # Validate select_multiple choice names by making sure
                # they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY \
                        and file_extension not in ['.csv', '.xml']:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" +
                                list_name + "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    # With this code we no longer need to handle or_other
                    # questions in survey builder.
                    # However, it depends on being able to use choice filters
                    # and xpath expressions that return empty sets.
                    # choices[list_name].append(
                    # {
                    #     'name': 'other',
                    #     'label': {default_language : 'Other'},
                    #     'orOther': 'true',
                    # })
                    # or_other_xpath = 'isNull(orOther)'
                    # if 'choice_filter' in row:
                    #   row['choice_filter'] += ' or ' + or_other_xpath
                    # else:
                    #   row['choice_filter'] = or_other_xpath

                    # specify_other_question = \
                    # {
                    #       'type':'text',
                    #       'name': row['name'] + '_specify_other',
                    #       'label':
                    #        'Specify Other for:\n"' + row['label'] + '"',
                    #       'bind' : {'relevant':
                    #                "selected(../%s, 'other')" % row['name']},
                    #     }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                    else:
                        new_json_dict['itemset'] = list_name
                        json_dict['choices'] = choices
                elif file_extension in ['.csv', '.xml']:
                    new_json_dict['itemset'] = list_name
                else:
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                if table_list is not None:
                    # Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                            constants.NAME:
                                "reserved_name_for_field_list_labels_" +
                                str(row_number),
                            # Adding row number for uniqueness # noqa
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            # Do we care about filtered selects in table lists?
                            # 'itemset' : list_name,
                        }
                        parent_children_array.append(table_list_header)

                    if table_list != list_name:
                        error_message = row_format_string % row_number
                        error_message += " Badly formatted table list," \
                                         " list names don't match: " + \
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = \
                        new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                parent_children_array.append(new_json_dict)
                if specify_other_question:
                    parent_children_array.append(specify_other_question)
                continue

        # Try to parse question as osm:
        osm_parse = osm_regexp.search(question_type)
        if osm_parse:
            parse_dict = osm_parse.groupdict()
            new_dict = row.copy()
            new_dict['type'] = constants.OSM

            if parse_dict.get('list_name') is not None:
                tags = osm_tags.get(parse_dict.get('list_name'))
                for tag in tags:
                    if osm_tags.get(tag.get('name')):
                        tag['choices'] = osm_tags.get(tag.get('name'))
                new_dict['tags'] = tags

            parent_children_array.append(new_dict)

            continue

        # range question_type
        if question_type == 'range':
            new_dict = process_range_question_type(row)
            parent_children_array.append(new_dict)
            continue

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:
        parent_children_array.append(row)

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    if settings.get('flat', False):
        # print "Generating flattened instance..."
        add_flat_annotations(stack[0][1])

    meta_children = [] + survey_meta

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
    else:
        # Automatically add an instanceID element:
        meta_children.append({
            "name": "instanceID",
            "bind": {
                "readonly": "true()",
                "calculate": settings.get(
                    "instance_id", "concat('uuid:', uuid())"),
            },
            "type": "calculate",
        })

    if 'instance_name' in settings:
        # Automatically add an instanceName element:
        meta_children.append({
            "name": "instanceName",
            "bind": {
                "calculate": settings['instance_name']
            },
            "type": "calculate",
        })

    if len(meta_children) > 0:
        meta_element = \
            {
                "name": "meta",
                "type": "group",
                "control": {
                    "bodyless": True
                },
                "children": meta_children
            }
        noop, survey_children_array = stack[0]
        survey_children_array.append(meta_element)

    # print_pyobj_to_json(json_dict)
    return json_dict
예제 #4
0
 def validate(self):
     if not is_valid_xml_tag(self.name):
         msg = "The name '%s' is an invalid xml tag. Names must begin with" \
               " a letter, colon, or underscore, subsequent characters can" \
               " include numbers, dashes, and periods." % self.name
         raise PyXFormError(msg)
예제 #5
0
 def validate(self):
     if not is_valid_xml_tag(self.name):
         msg = "The name '%s' is an invalid xml tag. Names must begin with" \
               " a letter, colon, or underscore, subsequent characters can" \
               " include numbers, dashes, and periods." % self.name
         raise PyXFormError(msg)