def validate(self): if not is_valid_xml_tag(self.name): invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name) msg = ("The name '{}' is an invalid XML tag, it contains an " "invalid character '{}'. Names must begin with a letter, " "colon, or underscore, subsequent characters can include " "numbers, dashes, and periods".format( self.name, invalid_char.group(0))) raise PyXFormError(msg)
def validate(self): if not is_valid_xml_tag(self.name): invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name) msg = ( "The name '{}' is an invalid XML tag, it contains an " "invalid character '{}'. Names must begin with a letter, " "colon, or underscore, subsequent characters can include " "numbers, dashes, and periods".format(self.name, invalid_char.group(0)) ) raise PyXFormError(msg)
def workbook_to_json( workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ # ensure required headers are present if warnings is None: warnings = [] is_valid = False workbook_dict = {x.lower(): y for x,y in workbook_dict.items()} for row in workbook_dict.get(constants.SURVEY, []): is_valid = 'type' in [z.lower() for z in row] if is_valid: break if not is_valid: raise PyXFormError( u"The survey sheet is either empty or missing important " u"column headers.") row_format_string = '[row : %s]' # Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) # We check for double columns to determine whether to use them # or single colons to delimit grouped headers. # Single colons are bad because they conflict with with the xform namespace # syntax (i.e. jr:constraintMsg), # so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) # Break the spreadsheet dict into easier to access objects # (settings, choices, survey_sheet): # ########## Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), aliases.settings_header, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} replace_smart_quotes_in_dict(settings) default_language = settings.get( constants.DEFAULT_LANGUAGE, default_language) # add_none_option is a boolean that when true, # indicates a none option should automatically be added to selects. # It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = aliases.yes_no.get( settings[u"add_none_option"], False) # Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) sms_keyword = settings.get(constants.SMS_KEYWORD, id_string) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: form_name, constants.TITLE: id_string, constants.ID_STRING: id_string, constants.SMS_KEYWORD: sms_keyword, constants.DEFAULT_LANGUAGE: default_language, # By default the version is based on the date and time yyyymmddhh # Leaving default version out for now since it might cause # problems for formhub. # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"), constants.CHILDREN: [] } # Here the default settings are overridden by those in the settings sheet json_dict.update(settings) # ########## Choices sheet ########## # Columns and "choices and columns" sheets are deprecated, # but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get( constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, aliases.list_header, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers( columns_sheet, aliases.list_header, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) for choice_item in choices_sheet: replace_smart_quotes_in_dict(choice_item) choices_sheet = dealias_and_group_headers( choices_sheet, aliases.list_header, use_double_colons, default_language) # ########## Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, []) if len(cascading_choices): if 'choices' in cascading_choices[0]: choices_sheet = choices_sheet + cascading_choices[0]['choices'] combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists # Make sure all the options have the required properties: warnedabout = set() for list_name, options in choices.items(): for option in options: if 'name' not in option: info = "[list_name : " + list_name + ']' raise PyXFormError("On the choices sheet there is " "a option with no name. " + info) if 'label' not in option: info = "[list_name : " + list_name + ']' warnings.append( "On the choices sheet there is a option with no label. " + info) # chrislrobert's fix for a cryptic error message: # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa option_keys = list(option.keys()) for headername in option_keys: # Using warnings and removing the bad columns # instead of throwing errors because some forms # use choices column headers for notes. if ' ' in headername: if headername not in warnedabout: warnedabout.add(headername) warnings.append("On the choices sheet there is " + "a column (\"" + headername + "\") with an illegal header. " + "Headers cannot include spaces.") del option[headername] elif headername == '': warnings.append("On the choices sheet there is a value" + " in a column with no header.") del option[headername] # ########## Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError( "You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] # Process the headers: clean_text_values_enabled = aliases.yes_no.get( settings.get("clean_text_values", "true()")) if clean_text_values_enabled: survey_sheet = clean_text_values(survey_sheet) survey_sheet = dealias_and_group_headers( survey_sheet, aliases.survey_header, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) osm_sheet = dealias_and_group_headers(workbook_dict.get(constants.OSM, []), aliases.list_header, True) osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME) # ################################# # Parse the survey sheet while generating a survey in our json format: row_number = 1 # We start at 1 because the column header row is not # included in the survey sheet (presumably). # A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] # If a group has a table-list appearance flag # this will be set to the name of the list table_list = None # For efficiency we compile all the regular expressions # that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(aliases.control.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(aliases.control.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile( r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) + r")) (?P<list_name>\S+)" + "( (?P<specify_other>(or specify other|or_other|or other)))?$") cascading_regexp = re.compile( r"^(?P<cascading_command>(" + '|'.join(aliases.cascading.keys()) + r")) (?P<cascading_level>\S+)?$") osm_regexp = re.compile( r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) + ')) (?P<list_name>\S+)') # Rows from the survey sheet that should be nested in meta survey_meta = [] for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] # Disabled should probably be first # so the attributes below can be disabled. if u"disabled" in row: warnings.append( row_format_string % row_number + " The 'disabled' column header is not part of the current" + " spec. We recommend using relevant instead.") disabled = row.pop(u"disabled") if aliases.yes_no.get(disabled): continue # skip empty rows if len(row) == 0: continue # Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, # then its a comment row, and we skip it with warning if not ((constants.NAME in row) or (constants.LABEL in row)): warnings.append( row_format_string % row_number + " Row without name, text, or label is being skipped:\n" + str(row)) continue raise PyXFormError( row_format_string % row_number + " Question with no type.\n" + str(row)) # Pull out questions that will go in meta block if question_type == 'audit': # Force audit name to always be "audit" to follow XForms spec if 'name' in row and row['name'] not in [None, '', 'audit']: raise PyXFormError(row_format_string % row_number + " Audits must always be named 'audit.'" + " The name column should be left blank.") row['name'] = 'audit' survey_meta.append(row) continue if question_type == 'calculate': calculation = row.get('bind', {}).get('calculate') if not calculation: raise PyXFormError( row_format_string % row_number + " Missing calculation.") # Check if the question is actually a setting specified # on the survey sheet settings_type = aliases.settings_header.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue # Try to parse question as a end control statement # (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = aliases.control[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( row_format_string % row_number + " Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue # Make sure the row has a valid name if constants.NAME not in row: if row['type'] == 'note': # autogenerate names for notes without them row['name'] = "generated_note_name_" + str(row_number) # elif 'group' in row['type'].lower(): # # autogenerate names for groups without them # row['name'] = "generated_group_name_" + str(row_number) else: raise PyXFormError(row_format_string % row_number + " Question or group with no name.") question_name = unicode(row[constants.NAME]) if not is_valid_xml_tag(question_name): error_message = row_format_string % row_number error_message += " Invalid question name [" + \ question_name.encode('utf-8') + "] " error_message += "Names must begin with a letter, colon,"\ + " or underscore." error_message += "Subsequent characters can include numbers," \ + " dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in aliases.label_optional_types: # TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned # about false positives. warnings.append( row_format_string % row_number + " Question has no label: " + str(row)) # Try to parse question as begin control statement # (i.e. begin loop/repeat/group): begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: # Create a new json dict with children, and the proper type, # and add it to parent_children_array in place of a question. # parent_children_array will then be set to its children array # (so following questions are nested under it) # until an end command is encountered. control_type = aliases.control[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): # TODO: Perhaps warn and make repeat into a group? raise PyXFormError( row_format_string % row_number + " Repeat loop without list name.") list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError( row_format_string % row_number + " List name not in columns sheet: " + list_name) new_json_dict[constants.COLUMNS] = choices[list_name] # Generate a new node for the jr:count column so # xpath expressions can be used. repeat_count_expression = new_json_dict.get( 'control', {}).get('jr:count') if repeat_count_expression: generated_node_name = new_json_dict['name'] + "_count" parent_children_array.append({ "name": generated_node_name, "bind": { "readonly": "true()", "calculate": repeat_count_expression, }, "type": "calculate", }) new_json_dict['control']['jr:count'] = \ "${" + generated_node_name + "}" # Code to deal with table_list appearance flags # (for groups of selects) ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance") if ctrl_ap == constants.TABLE_LIST: table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" # Generate a note label element so hints and labels # work as expected in table-lists. # see https://github.com/modilabs/pyxform/issues/62 if 'label' in new_json_dict or 'hint' in new_json_dict: generated_label_element = { "type": "note", "name": "generated_table_list_label_" + str(row_number) } if 'label' in new_json_dict: generated_label_element[constants.LABEL] = \ new_json_dict[constants.LABEL] del new_json_dict[constants.LABEL] if 'hint' in new_json_dict: generated_label_element['hint'] = \ new_json_dict['hint'] del new_json_dict['hint'] child_list.append(generated_label_element) if 'intent' in new_json_dict: new_json_dict['control'] = \ new_json_dict.get(u"control", {}) new_json_dict['control']['intent'] = \ new_json_dict['intent'] parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError( row_format_string % row_number + " Cascading select needs a name.") # cascading_json = get_cascading_json( # cascading_choices, cascading_prefix, cascading_level) if len(cascading_choices) <= 0 or \ 'questions' not in cascading_choices[0]: raise PyXFormError( "Found a cascading_select " + cascading_level + ", but could not find " + cascading_level + "in cascades sheet.") cascading_json = cascading_choices[0]['questions'] json_dict['choices'] = choices include_bindings = False if 'bind' in row: include_bindings = True for cq in cascading_json: # include bindings if include_bindings: cq['bind'] = row['bind'] def replace_prefix(d, prefix): for k, v in d.items(): if isinstance(v, basestring): d[k] = v.replace('$PREFIX$', prefix) elif isinstance(v, dict): d[k] = replace_prefix(v, prefix) elif isinstance(v, list): d[k] = map( lambda x: replace_prefix(x, prefix), v) return d parent_children_array.append( replace_prefix(cq, cascading_prefix)) continue # so the row isn't put in as is # Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = aliases.select[parse_dict["select_command"]] if select_type == 'select one external' \ and 'choice_filter' not in row: warnings.append( row_format_string % row_number + u" select one external is only meant for" u" filtered selects.") select_type = aliases.select['select_one'] list_name = parse_dict["list_name"] list_file_name, file_extension = os.path.splitext(list_name) if list_name not in choices \ and select_type != 'select one external' \ and file_extension not in ['.csv', '.xml']: if not choices: raise PyXFormError( u"There should be a choices sheet in this xlsform." u" Please ensure that the choices sheet name is " u"all in small caps and has columns 'list name', " u"'name', and 'label' (or aliased column names).") raise PyXFormError( row_format_string % row_number + " List name not in choices sheet: " + list_name) # Validate select_multiple choice names by making sure # they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY \ and file_extension not in ['.csv', '.xml']: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added " "to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") specify_other_question = None if parse_dict.get("specify_other") is not None: select_type += u" or specify other" # With this code we no longer need to handle or_other # questions in survey builder. # However, it depends on being able to use choice filters # and xpath expressions that return empty sets. # choices[list_name].append( # { # 'name': 'other', # 'label': {default_language : 'Other'}, # 'orOther': 'true', # }) # or_other_xpath = 'isNull(orOther)' # if 'choice_filter' in row: # row['choice_filter'] += ' or ' + or_other_xpath # else: # row['choice_filter'] = or_other_xpath # specify_other_question = \ # { # 'type':'text', # 'name': row['name'] + '_specify_other', # 'label': # 'Specify Other for:\n"' + row['label'] + '"', # 'bind' : {'relevant': # "selected(../%s, 'other')" % row['name']}, # } new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type if row.get('choice_filter'): if select_type == 'select one external': new_json_dict['query'] = list_name else: new_json_dict['itemset'] = list_name json_dict['choices'] = choices elif file_extension in ['.csv', '.xml']: new_json_dict['itemset'] = list_name else: new_json_dict[constants.CHOICES] = choices[list_name] # Code to deal with table_list appearance flags # (for groups of selects) if table_list is not None: # Then this row is the first select in a table list if not isinstance(table_list, basestring): table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number), # Adding row number for uniqueness # noqa constants.CONTROL: {u"appearance": u"label"}, constants.CHOICES: choices[list_name], # Do we care about filtered selects in table lists? # 'itemset' : list_name, } parent_children_array.append(table_list_header) if table_list != list_name: error_message = row_format_string % row_number error_message += " Badly formatted table list," \ " list names don't match: " + \ table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[u"control"] = \ new_json_dict.get(u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) if specify_other_question: parent_children_array.append(specify_other_question) continue # Try to parse question as osm: osm_parse = osm_regexp.search(question_type) if osm_parse: parse_dict = osm_parse.groupdict() new_dict = row.copy() new_dict['type'] = constants.OSM if parse_dict.get('list_name') is not None: tags = osm_tags.get(parse_dict.get('list_name')) for tag in tags: if osm_tags.get(tag.get('name')): tag['choices'] = osm_tags.get(tag.get('name')) new_dict['tags'] = tags parent_children_array.append(new_dict) continue # range question_type if question_type == 'range': new_dict = process_range_question_type(row) parent_children_array.append(new_dict) continue # TODO: Consider adding some question_type validation here. # Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0])) if settings.get('flat', False): # print "Generating flattened instance..." add_flat_annotations(stack[0][1]) meta_children = [] + survey_meta if aliases.yes_no.get(settings.get("omit_instanceID")): if settings.get("public_key"): raise PyXFormError( "Cannot omit instanceID, it is required for encryption.") else: # Automatically add an instanceID element: meta_children.append({ "name": "instanceID", "bind": { "readonly": "true()", "calculate": settings.get( "instance_id", "concat('uuid:', uuid())"), }, "type": "calculate", }) if 'instance_name' in settings: # Automatically add an instanceName element: meta_children.append({ "name": "instanceName", "bind": { "calculate": settings['instance_name'] }, "type": "calculate", }) if len(meta_children) > 0: meta_element = \ { "name": "meta", "type": "group", "control": { "bodyless": True }, "children": meta_children } noop, survey_children_array = stack[0] survey_children_array.append(meta_element) # print_pyobj_to_json(json_dict) return json_dict
def validate(self): if not is_valid_xml_tag(self.name): msg = "The name '%s' is an invalid xml tag. Names must begin with" \ " a letter, colon, or underscore, subsequent characters can" \ " include numbers, dashes, and periods." % self.name raise PyXFormError(msg)