def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ if warnings is None: # Set warnings to a list that will be discarded. warnings = [] rowFormatString = "[row : %s]" # Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) # We check for double columns to determine whether to use them or single colons to delimit grouped headers. # Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg), # so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) # Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons ) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) # add_none_option is a boolean that when true, indicates a none option should automatically be added to selects. # It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], False) # Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: form_name, constants.TITLE: id_string, constants.ID_STRING: id_string, constants.DEFAULT_LANGUAGE: default_language, # By default the version is based on the date and time yyyymmddhh # Leaving default version out for now since it might cause problems for formhub. # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"), constants.CHILDREN: [], } # Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## # Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language ) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language) combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME ) choices = combined_lists # Make sure all the options have the required properties: for list_name, options in choices.items(): for option in options: if "name" not in option: info = "[list_name : " + list_name + "]" raise PyXFormError("On the choices sheet there is a option with no name. " + info) if "label" not in option: info = "[list_name : " + list_name + "]" warnings.append("On the choices sheet there is a option with no label. " + info) ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {}) ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] # Process the headers: clean_text_values_enabled = yes_no_aliases.get(settings.get("clean_text_values", "true()")) if clean_text_values_enabled: survey_sheet = clean_text_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## # Parse the survey sheet while generating a survey in our json format: row_number = 1 # We start at 1 because the column header row is not included in the survey sheet (presumably). # A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] # If a group has a table-list appearance flag this will be set to the name of the list table_list = None # For efficiency we compile all the regular expressions that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))$") begin_control_regex = re.compile( r"^(?P<begin>begin)(\s|_)(?P<type>(" + "|".join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$" ) select_regexp = re.compile( r"^(?P<select_command>(" + "|".join(select_aliases.keys()) + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$" ) cascading_regexp = re.compile( r"^(?P<cascading_command>(" + "|".join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$" ) for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] # Disabled should probably be first so the attributes below can be disabled. if u"disabled" in row: warnings.append( rowFormatString % row_number + " The 'disabled' column header is not part of the current spec." + "We recommend using relevant instead." ) disabled = row.pop(u"disabled") if yes_no_aliases.get(disabled): continue # skip empty rows if len(row) == 0: continue # Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, then its a comment row, and we skip it with warning if not ((constants.NAME in row) and (constants.LABEL in row)): warnings.append( rowFormatString % row_number + " Row without name, text, or label is being skipped:\n" + str(row) ) continue raise PyXFormError(rowFormatString % row_number + " Question with no type.") continue # Check if the question is actually a setting specified on the survey sheet settings_type = settings_header_aliases.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue # Try to parse question as a end control statement (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = control_aliases[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( rowFormatString % row_number + " Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type) ) stack.pop() table_list = None continue # Make sure the row has a valid name if not constants.NAME in row: # TODO: It could be slick if had nameless groups generate a flat model # with only a body element. if row["type"] == "note": # autogenerate names for notes without them row["name"] = "generated_note_name_" + str(row_number) else: raise PyXFormError(rowFormatString % row_number + " Question or group with no name.") question_name = unicode(row[constants.NAME]) if not is_valid_xml_tag(question_name): error_message = rowFormatString % row_number error_message += " Invalid question name [" + question_name + "]" error_message += "Names must begin with a letter, colon, or underscore." error_message += "Subsequent characters can include numbers, dashes, and periods." raise PyXFormError(error_message) if ( constants.LABEL not in row and row.get(constants.MEDIA) is None and question_type not in label_optional_types ): # TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned about false positives. warnings.append(rowFormatString % row_number + " Question has no label: " + str(row)) # Try to parse question as begin control statement (i.e. begin loop/repeat/group): begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: # Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question. # parent_children_array will then be set to its children array (so following questions are nested under it) # until an end command is encountered. control_type = control_aliases[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): # TODO: Perhaps warn and make repeat into a group? raise PyXFormError(rowFormatString % row_number + " Repeat loop without list name.") list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError( rowFormatString % row_number + " List name not in columns sheet: " + list_name ) new_json_dict[constants.COLUMNS] = choices[list_name] # Code to deal with table_list appearance flags (for groups of selects) if new_json_dict.get(u"control", {}).get(u"appearance") == constants.TABLE_LIST: table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" # Generate a note label element so hints and labels # work as expected in table-lists. # see https://github.com/modilabs/pyxform/issues/62 if "label" in new_json_dict or "hint" in new_json_dict: generated_label_element = { "type": "note", "name": "generated_table_list_label_" + str(row_number), } if "label" in new_json_dict: generated_label_element[constants.LABEL] = new_json_dict[constants.LABEL] del new_json_dict[constants.LABEL] if "hint" in new_json_dict: generated_label_element["hint"] = new_json_dict["hint"] del new_json_dict["hint"] child_list.append(generated_label_element) parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError(rowFormatString % row_number + " Cascading select needs a name.") cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level) for c in cascading_json: parent_children_array.append(c) continue # so the row isn't put in as is # Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = select_aliases[parse_dict["select_command"]] list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError(rowFormatString % row_number + " List name not in choices sheet: " + list_name) # Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if " " in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]" ) specify_other_question = None if parse_dict.get("specify_other") is not None: select_type += u" or specify other" # #With this code we no longer need to handle or_other questions in survey builder. # #However, it depends on being able to use choice filters and xpath expressions that return empty sets. # choices[list_name].append( # { # 'name': 'other', # 'label': {default_language : 'Other'}, # 'orOther': 'true', # }) # or_other_xpath = 'isNull(orOther)' # if 'choice_filter' in row: # row['choice_filter'] += ' or ' + or_other_xpath # else: # row['choice_filter'] = or_other_xpath # # specify_other_question = \ # { # 'type':'text', # 'name': row['name'] + '_specify_other', # 'label':'Specify Other for:\n"' + row['label'] + '"', # 'bind' : {'relevant': "selected(../%s, 'other')" % row['name']}, # } new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type new_json_dict["itemset"] = list_name if row.get("choice_filter"): json_dict["choices"] = choices else: new_json_dict[constants.CHOICES] = choices[list_name] # Code to deal with table_list appearance flags (for groups of selects) if table_list is not None: if not isinstance(table_list, basestring): # Then this row is the first select in a table list table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number), # Adding row number for uniqueness constants.CONTROL: {u"appearance": u"label"}, constants.CHOICES: choices[list_name], # Do we care about filtered selects in table lists? #'itemset' : list_name, } parent_children_array.append(table_list_header) if table_list <> list_name: error_message = rowFormatString % row_number error_message += ( " Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name ) raise PyXFormError(error_message) control = new_json_dict[u"control"] = new_json_dict.get(u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) if specify_other_question: parent_children_array.append(specify_other_question) continue # TODO: Consider adding some question_type validation here. # Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0])) # Automatically add an instanceID element: if yes_no_aliases.get(settings.get("omit_instanceID")): if settings.get("public_key"): raise PyXFormError("Cannot omit instanceID, it is required for encryption.") else: meta_element = { "name": "meta", "type": "group", "control": {"bodyless": True}, "children": [ { "name": "instanceID", "bind": {"readonly": "true()", "calculate": "concat('uuid:', uuid())"}, "type": "calculate", } ], } noop, survey_children_array = stack[0] survey_children_array.append(meta_element) # print_pyobj_to_json(json_dict) return json_dict
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ if warnings is None: #Set warnings to a list that will be discarded. warnings = [] #Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) #We check for double columns to determine whether to use them or single colons to delimit grouped headers. #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg), #so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers( workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects. #It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = yes_no_aliases.get( settings[u"add_none_option"], u"false()") == u"true()" #Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) json_dict = { constants.TYPE: constants.SURVEY, constants.NAME: form_name, constants.TITLE: id_string, constants.ID_STRING: id_string, constants.DEFAULT_LANGUAGE: default_language, constants.CHILDREN: [] } #Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get( constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers( choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language) combined_lists = group_dictionaries_by_key( choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {}) ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] #Process the headers: survey_sheet = clean_unicode_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## #Parse the survey sheet while generating a survey in our json format: row_number = 1 #We start at 1 because the column header row is not included in the survey sheet (presumably). #A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] #If a group has a table-list appearance flag this will be set to the name of the list table_list = None begin_table_list = False #For efficiency we compile all the regular expressions that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile( r"^(?P<select_command>(" + '|'.join(select_aliases.keys()) + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$" ) cascading_regexp = re.compile(r"^(?P<cascading_command>(" + '|'.join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$") for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] #Disabled should probably be first so the attributes below can be disabled. if u"disabled" in row: warnings.append( "The 'disabled' column header is not part of the current spec. We recommend using relevant instead." ) disabled = row.pop(u"disabled") if disabled in yes_no_aliases: disabled = yes_no_aliases[disabled] if disabled == 'true()': continue #skip empty rows if len(row) == 0: continue #Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, then its a comment row, and we skip it with warning if not ((constants.NAME in row) and (constants.LABEL in row)): warnings.append( "Row wihtout name, text, or label is being skipped " + str(row_number) + ": " + str(row)) continue raise PyXFormError("Question with no type on row " + str(row_number)) continue #Check if the question is actually a setting specified on the survey sheet settings_type = settings_header_aliases.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue #Try to parse question as a end control statement (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = control_aliases[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError( "Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue #Make sure the question has a valid name question_name = unicode(row.get(constants.NAME)) if not question_name: raise PyXFormError("Question with no name on row " + str(row_number)) if not is_valid_xml_tag(question_name): error_message = "Invalid question name [" + question_name + "] on row " + str( row_number) + "\n" error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in label_optional_types: #TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned about false positives. warnings.append("Warning unlabeled question in row " + str(row_number) + ": " + str(row)) #Try to parse question as begin control statement (i.e. begin loop/repeat/group: begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question. #parent_children_array will then be set to its children array (so following questions are nested under it) #until an end command is encountered. control_type = control_aliases[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): #TODO: Perhaps warn and make repeat into a group? raise PyXFormError("Repeat without list name " + " Error on row: " + str(row_number)) list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in columns sheet: " + list_name + " Error on row: " + str(row_number)) new_json_dict[constants.COLUMNS] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if new_json_dict.get( u"control", {}).get(u"appearance") == constants.TABLE_LIST: begin_table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError( "Cascading select needs a name. Error on row: %s" % row_number) cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level) for c in cascading_json: parent_children_array.append(c) continue # so the row isn't put in as is #Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = select_aliases[parse_dict["select_command"]] list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in choices sheet: " + list_name + " Error on row: " + str(row_number)) #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError( "Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") if parse_dict.get("specify_other") is not None: select_type += u" or specify other" new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type new_json_dict[constants.CHOICES] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if table_list or begin_table_list: if begin_table_list: #If this row is the first select in a table list table_list = list_name table_list_header = { constants.TYPE: select_type, constants.NAME: "reserved_name_for_field_list_labels_" + str(row_number), #Adding row number for uniqueness constants.CONTROL: { u"appearance": u"label" }, constants.CHOICES: choices[list_name] } parent_children_array.append(table_list_header) begin_table_list = False if table_list <> list_name: error_message = "Error on row: " + str( row_number) + "\n" error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[u"control"] = new_json_dict.get( u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) continue #TODO: Consider adding some question_type validation here. #Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("unmatched begin statement: " + str(stack[-1][0])) #print_pyobj_to_json(json_dict) return json_dict
def workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None): """ workbook_dict -- nested dictionaries representing a spreadsheet. should be similar to those returned by xls_to_dict form_name -- The spreadsheet's filename default_language -- default_language does two things: 1. In the xform the default language is the language reverted to when there is no translation available for some itext element. Because of this every itext element must have a default language translation. 2. In the workbook if media/labels/hints that do not have a language suffix will be treated as though their suffix is the default language. If the default language is used as a suffix for media/labels/hints, then the suffixless version will be overwritten. warnings -- an optional list which warnings will be appended to returns a nested dictionary equivalent to the format specified in the json form spec. """ if warnings is None: #Set warnings to a list that will be discarded. warnings = [] #Make sure the passed in vars are unicode form_name = unicode(form_name) default_language = unicode(default_language) #We check for double columns to determine whether to use them or single colons to delimit grouped headers. #Single colons are bad because they conflict with with the xform namespace syntax (i.e. jr:constraintMsg), #so we only use them if we have to for backwards compatibility. use_double_colons = has_double_colon(workbook_dict) #Break the spreadsheet dict into easier to access objects (settings, choices, survey_sheet): ########### Settings sheet ########## settings_sheet = dealias_and_group_headers(workbook_dict.get(constants.SETTINGS, []), settings_header_aliases, use_double_colons) settings = settings_sheet[0] if len(settings_sheet) > 0 else {} default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language) #add_none_option is a boolean that when true, indicates a none option should automatically be added to selects. #It should probably be deprecated but I haven't checked yet. if u"add_none_option" in settings: settings[u"add_none_option"] = yes_no_aliases.get(settings[u"add_none_option"], u"false()") == u"true()" #Here we create our json dict root with default settings: id_string = settings.get(constants.ID_STRING, form_name) json_dict = { constants.TYPE : constants.SURVEY, constants.NAME : form_name, constants.TITLE : id_string, constants.ID_STRING : id_string, constants.DEFAULT_LANGUAGE : default_language, constants.CHILDREN : [] } #Here the default settings are overridden by those in the settings sheet json_dict.update(settings) ########### Choices sheet ########## #Columns and "choices and columns" sheets are deprecated, but we combine them with the choices sheet for backwards-compatibility. choices_and_columns_sheet = workbook_dict.get(constants.CHOICES_AND_COLUMNS, {}) choices_and_columns_sheet = dealias_and_group_headers(choices_and_columns_sheet, list_header_aliases, use_double_colons, default_language) columns_sheet = workbook_dict.get(constants.COLUMNS, []) columns_sheet = dealias_and_group_headers(columns_sheet, list_header_aliases, use_double_colons, default_language) choices_sheet = workbook_dict.get(constants.CHOICES, []) choices_sheet = dealias_and_group_headers(choices_sheet, list_header_aliases, use_double_colons, default_language) combined_lists = group_dictionaries_by_key(choices_and_columns_sheet + choices_sheet + columns_sheet, constants.LIST_NAME) choices = combined_lists ########### Cascading Select sheet ########### cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, {}) ########### Survey sheet ########### if constants.SURVEY not in workbook_dict: raise PyXFormError("You must have a sheet named (case-sensitive): " + constants.SURVEY) survey_sheet = workbook_dict[constants.SURVEY] #Process the headers: survey_sheet = clean_unicode_values(survey_sheet) survey_sheet = dealias_and_group_headers(survey_sheet, survey_header_aliases, use_double_colons, default_language) survey_sheet = dealias_types(survey_sheet) ################################## #Parse the survey sheet while generating a survey in our json format: row_number = 1 #We start at 1 because the column header row is not included in the survey sheet (presumably). #A stack is used to keep track of begin/end expressions stack = [(None, json_dict.get(constants.CHILDREN))] #If a group has a table-list appearance flag this will be set to the name of the list table_list = None begin_table_list = False #For efficiency we compile all the regular expressions that will be used to parse types: end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))$") begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" + '|'.join(control_aliases.keys()) + r"))( (over )?(?P<list_name>\S+))?$") select_regexp = re.compile(r"^(?P<select_command>(" + '|'.join(select_aliases.keys()) + r")) (?P<list_name>\S+)( (?P<specify_other>(or specify other|or_other|or other)))?$") cascading_regexp = re.compile(r"^(?P<cascading_command>(" + '|'.join(cascading_aliases.keys()) + r")) (?P<cascading_level>\S+)?$") for row in survey_sheet: row_number += 1 prev_control_type, parent_children_array = stack[-1] #Disabled should probably be first so the attributes below can be disabled. if u"disabled" in row: warnings.append("The 'disabled' column header is not part of the current spec. We recommend using relevant instead.") disabled = row.pop(u"disabled") if disabled in yes_no_aliases: disabled = yes_no_aliases[disabled] if disabled == 'true()': continue #skip empty rows if len(row) == 0: continue #Get question type question_type = row.get(constants.TYPE) if not question_type: # if name and label are also missing, then its a comment row, and we skip it with warning if not ((constants.NAME in row) and (constants.LABEL in row)): warnings.append("Row wihtout name, text, or label is being skipped " + str(row_number) + ": " + str(row)) continue raise PyXFormError("Question with no type on row " + str(row_number)) continue #Check if the question is actually a setting specified on the survey sheet settings_type = settings_header_aliases.get(question_type) if settings_type: json_dict[settings_type] = unicode(row.get(constants.NAME)) continue #Try to parse question as a end control statement (i.e. end loop/repeat/group): end_control_parse = end_control_regex.search(question_type) if end_control_parse: parse_dict = end_control_parse.groupdict() if parse_dict.get("end") and "type" in parse_dict: control_type = control_aliases[parse_dict["type"]] if prev_control_type != control_type or len(stack) == 1: raise PyXFormError("Unmatched end statement. Previous control type: " + str(prev_control_type) + ", Control type: " + str(control_type)) stack.pop() table_list = None continue #Make sure the question has a valid name question_name = unicode(row.get(constants.NAME)) if not question_name: raise PyXFormError("Question with no name on row " + str(row_number)) if not is_valid_xml_tag(question_name): error_message = "Invalid question name [" + question_name + "] on row " + str(row_number) + "\n" error_message += "Names must begin with a letter, colon, or underscore. Subsequent characters can include numbers, dashes, and periods." raise PyXFormError(error_message) if constants.LABEL not in row and \ row.get(constants.MEDIA) is None and \ question_type not in label_optional_types: #TODO: Should there be a default label? # Not sure if we should throw warnings for groups... # Warnings can be ignored so I'm not too concerned about false positives. warnings.append("Warning unlabeled question in row " + str(row_number) + ": " + str(row)) #Try to parse question as begin control statement (i.e. begin loop/repeat/group: begin_control_parse = begin_control_regex.search(question_type) if begin_control_parse: parse_dict = begin_control_parse.groupdict() if parse_dict.get("begin") and "type" in parse_dict: #Create a new json dict with children, and the proper type, and add it to parent_children_array in place of a question. #parent_children_array will then be set to its children array (so following questions are nested under it) #until an end command is encountered. control_type = control_aliases[parse_dict["type"]] new_json_dict = row.copy() new_json_dict[constants.TYPE] = control_type child_list = list() new_json_dict[constants.CHILDREN] = child_list if control_type is constants.LOOP: if not parse_dict.get("list_name"): #TODO: Perhaps warn and make repeat into a group? raise PyXFormError("Repeat without list name " + " Error on row: " + str(row_number)) list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in columns sheet: " + list_name + " Error on row: " + str(row_number)) new_json_dict[constants.COLUMNS] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if new_json_dict.get(u"control",{}).get(u"appearance") == constants.TABLE_LIST: begin_table_list = True new_json_dict[u"control"][u"appearance"] = u"field-list" parent_children_array.append(new_json_dict) stack.append((control_type, child_list)) continue # try to parse as a cascading select cascading_parse = cascading_regexp.search(question_type) if cascading_parse: parse_dict = cascading_parse.groupdict() if parse_dict.get("cascading_command"): cascading_level = parse_dict["cascading_level"] cascading_prefix = row.get(constants.NAME) if not cascading_prefix: raise PyXFormError("Cascading select needs a name. Error on row: %s" % row_number) cascading_json = get_cascading_json(cascading_choices, cascading_prefix, cascading_level) for c in cascading_json: parent_children_array.append(c) continue # so the row isn't put in as is #Try to parse question as a select: select_parse = select_regexp.search(question_type) if select_parse: parse_dict = select_parse.groupdict() if parse_dict.get("select_command"): select_type = select_aliases[parse_dict["select_command"]] list_name = parse_dict["list_name"] if list_name not in choices: raise PyXFormError("List name not in choices sheet: " + list_name + " Error on row: " + str(row_number)) #Validate select_multiple choice names by making sure they have no spaces (will cause errors in exports). if select_type == constants.SELECT_ALL_THAT_APPLY: for choice in choices[list_name]: if ' ' in choice[constants.NAME]: raise PyXFormError("Choice names with spaces cannot be added to multiple choice selects. See [" + choice[constants.NAME] + "] in [" + list_name + "]") if parse_dict.get("specify_other") is not None: select_type += u" or specify other" new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type new_json_dict[constants.CHOICES] = choices[list_name] #Code to deal with table_list appearance flags (for groups of selects) if table_list or begin_table_list: if begin_table_list: #If this row is the first select in a table list table_list = list_name table_list_header = { constants.TYPE : select_type, constants.NAME : "reserved_name_for_field_list_labels_" + str(row_number), #Adding row number for uniqueness constants.CONTROL : { u"appearance" : u"label" }, constants.CHOICES : choices[list_name] } parent_children_array.append(table_list_header) begin_table_list = False if table_list <> list_name: error_message = "Error on row: " + str(row_number) + "\n" error_message += "Badly formatted table list, list names don't match: " + table_list + " vs. " + list_name raise PyXFormError(error_message) control = new_json_dict[u"control"] = new_json_dict.get(u"control", {}) control[u"appearance"] = "list-nolabel" parent_children_array.append(new_json_dict) continue #TODO: Consider adding some question_type validation here. #Put the row in the json dict as is: parent_children_array.append(row) if len(stack) != 1: raise PyXFormError("unmatched begin statement: " + str(stack[-1][0])) #print_pyobj_to_json(json_dict) return json_dict