예제 #1
def xls_value_to_unicode(value, value_type, datemode):
    Take a xls formatted value and try to make a unicode string
    if value_type == xlrd.XL_CELL_BOOLEAN:
        return "TRUE" if value else "FALSE"
    elif value_type == xlrd.XL_CELL_NUMBER:
        # Try to display as an int if possible.
        int_value = int(value)
        if int_value == value:
            return unicode(int_value)
            return unicode(value)
    elif value_type is xlrd.XL_CELL_DATE:
        # Warn that it is better to single quote as a string.
        # error_location = cellFormatString % (ss_row_idx, ss_col_idx)
        # raise Exception(
        #   "Cannot handle excel formatted date at " + error_location)
        datetime_or_time_only = xlrd.xldate_as_tuple(value, datemode)
        if datetime_or_time_only[:3] == (0, 0, 0):
            # must be time only
            return unicode(datetime.time(*datetime_or_time_only[3:]))
        return unicode(datetime.datetime(*datetime_or_time_only))
        # ensure unicode and replace nbsp spaces with normal ones
        # to avoid this issue:
        # https://github.com/modilabs/pyxform/issues/83
        return unicode(value).replace(unichr(160), " ")
예제 #2
    def insert_output_values(self, text, context=None):
        Replace all the ${variables} in text with xpaths.
        Returns that and a boolean indicating if there were any ${variables}
        def _var_repl_output_function(matchobj):
            return self._var_repl_output_function(matchobj, context)

        # There was a bug where escaping is completely turned off in labels
        # where variable replacement is used.
        # For exampke, `${name} < 3` causes an error but `< 3` does not.
        # This is my hacky fix for it, which does string escaping prior to
        # variable replacement:
        text_node = PatchedText()
        text_node.data = text
        xml_text = text_node.toxml()

        # need to make sure we have reason to replace
        # since at this point < is &lt,
        # the net effect &lt gets translated again to &amp;lt;
        if unicode(xml_text).find("{") != -1:
            result = re.sub(BRACKETED_TAG_REGEX, _var_repl_output_function,
            return result, not result == xml_text
        return text, False
예제 #3
 def xls_value_to_unicode(value, value_type):
     Take a xls formatted value and try to make a unicode string
     if value_type == xlrd.XL_CELL_BOOLEAN:
         return u"TRUE" if value else u"FALSE"
     elif value_type == xlrd.XL_CELL_NUMBER:
         # Try to display as an int if possible.
         int_value = int(value)
         if int_value == value:
             return unicode(int_value)
             return unicode(value)
     elif value_type is xlrd.XL_CELL_DATE:
         # Warn that it is better to single quote as a string.
         # error_location = cellFormatString % (ss_row_idx, ss_col_idx)
         # raise Exception(
         #   "Cannot handle excel formatted date at " + error_location)
         datetime_or_time_only = xlrd.xldate_as_tuple(
             value, workbook.datemode)
         if datetime_or_time_only[:3] == (0, 0, 0):
             # must be time only
             return unicode(datetime.time(*datetime_or_time_only[3:]))
         return unicode(datetime.datetime(*datetime_or_time_only))
         # ensure unicode and replace nbsp spaces with normal ones
         # to avoid this issue:
         # https://github.com/modilabs/pyxform/issues/83
         return unicode(value).replace(unichr(160), ' ')
예제 #4
    def insert_output_values(self, text):
        Replace all the ${variables} in text with xpaths.
        Returns that and a boolean indicating if there were any ${variables}
        # There was a bug where escaping is completely turned off in labels
        # where variable replacement is used.
        # For exampke, `${name} < 3` causes an error but `< 3` does not.
        # This is my hacky fix for it, which does string escaping prior to
        # variable replacement:
        text_node = PatchedText()
        text_node.data = text
        xml_text = text_node.toxml()

        bracketed_tag = r"\$\{(.*?)\}"
        # need to make sure we have reason to replace
        # since at this point < is &lt,
        # the net effect &lt gets translated again to &amp;lt;
        if unicode(xml_text).find('{') != -1:
            result = re.sub(
                bracketed_tag, self._var_repl_output_function,
            return result, not result == xml_text
        return text, False
예제 #5
 def parse(self, xml_str):
     clean_xml_str = xml_str.strip()
     clean_xml_str = re.sub(unicode(r">\s+<"), unicode("><"), clean_xml_str)
     self._xml_obj = minidom.parseString(clean_xml_str)
     self._root_node = self._xml_obj.documentElement
     self._dict = _xml_node_to_dict(self._root_node)
     self._flat_dict = {}
     for path, value in _flatten_dict(self._dict, []):
         self._flat_dict["/".join(path[1:])] = value
예제 #6
 def parse(self, xml_str):
     clean_xml_str = xml_str.strip()
     clean_xml_str = re.sub(unicode(r">\s+<"), unicode("><"), clean_xml_str)
     self._xml_obj = minidom.parseString(clean_xml_str)
     self._root_node = self._xml_obj.documentElement
     self._dict = _xml_node_to_dict(self._root_node)
     self._flat_dict = {}
     for path, value in _flatten_dict(self._dict, []):
         self._flat_dict["/".join(path[1:])] = value
예제 #7
    def _generate_last_saved_instance(element):
        for expression_type in constants.EXTERNAL_INSTANCES:
            last_saved_expression = re.search(
            if last_saved_expression:
                return True

        return re.search(LAST_SAVED_REGEX, unicode(
            element["choice_filter"])) or re.search(
                LAST_SAVED_REGEX, unicode(element["default"]))
예제 #8
def csv_to_dict(path_or_file):
    if isinstance(path_or_file, basestring):
        csv_data = open(path_or_file, 'rb')
        csv_data = path_or_file

    _dict = OrderedDict()

    def first_column_as_sheet_name(row):
        if len(row) == 0:
            return None, None
        elif len(row) == 1:
            return row[0], None
            s_or_c = row[0]
            content = row[1:]
            if s_or_c == '':
                s_or_c = None
            # concatenate all the strings in content
            if reduce(lambda x, y: x + y, content) == '':
                # content is a list of empty strings
                content = None
            return s_or_c, content

    reader = csv.reader(csv_data, encoding='utf-8')
    sheet_name = None
    current_headers = None
    for row in reader:
        survey_or_choices, content = first_column_as_sheet_name(row)
        if survey_or_choices is not None:
            sheet_name = survey_or_choices
            if sheet_name not in _dict:
                _dict[unicode(sheet_name)] = []
            current_headers = None
        if content is not None:
            if current_headers is None:
                current_headers = content
                _dict[u"%s_header" % sheet_name] = \
                _d = OrderedDict()
                for key, val in zip(current_headers, content):
                    if val != "":
                        # Slight modification so values are striped
                        # this is because csvs often spaces following commas
                        # (but the csv reader might already handle that.)
                        _d[unicode(key)] = unicode(val.strip())
    return _dict
예제 #10
파일: survey.py 프로젝트: gushil/pyxform
    def _generate_static_instances(list_name, choice_list):
        Generates <instance> elements for static data
        (e.g. choices for select type questions)

        Note that per commit message 0578242 and in xls2json.py R539, an
        instance is only output for select items defined in the choices sheet
        when the item has a choice_filter, and it is that way for backwards
        instance_element_list = []
        multi_language = isinstance(choice_list[0].get("label"), dict)
        has_media = bool(choice_list[0].get("media"))

        for idx, choice in enumerate(choice_list):
            choice_element_list = []
            # Add a unique id to the choice element in case there is itext
            # it references
            if (
                or has_media
                or has_dynamic_label(choice_list, multi_language)
                itext_id = "-".join([list_name, str(idx)])
                choice_element_list.append(node("itextId", itext_id))

            for name, value in sorted(choice.items()):
                if isinstance(value, basestring) and name != "label":
                    choice_element_list.append(node(name, unicode(value)))
                if (
                    not multi_language
                    and not has_media
                    and not has_dynamic_label(choice_list, multi_language)
                    and isinstance(value, basestring)
                    and name == "label"
                    choice_element_list.append(node(name, unicode(value)))

            instance_element_list.append(node("item", *choice_element_list))

        return InstanceInfo(
                "instance", node("root", *instance_element_list), id=list_name
예제 #12
    def insert_xpaths(self, text):
        Replace all instances of ${var} with the xpath to var.
        bracketed_tag = r"\$\{(.*?)\}"

        return re.sub(bracketed_tag, self._var_repl_function, unicode(text))
예제 #13
 def test_get_bin_paths__unsupported_raises(self):
     """Should raise an error if a mapping for the file name isn't found."""
     file_path = self.last_check = os.path.join(TEST_PATH, "bacon.zip")
     with self.assertRaises(PyXFormError) as ctx:
     self.assertIn("Did not find", unicode(ctx.exception))
예제 #15
 def _write_json(file_path, content):
     Save the JSON data to a file.
     with io.open(file_path, mode="w", newline="\n") as out_file:
         data = json.dumps(content, indent=2, sort_keys=True)
예제 #17
    def _generate_static_instances(list_name, choice_list):
        Generates <instance> elements for static data
        (e.g. choices for select type questions)

        Note that per commit message 0578242 and in xls2json.py R539, an
        instance is only output for select items defined in the choices sheet
        when the item has a choice_filter, and it is that way for backwards
        instance_element_list = []
        for idx, choice in enumerate(choice_list):
            choice_element_list = []
            # Add a unique id to the choice element in case there is itext
            # it references
            itext_id = '-'.join(['static_instance', list_name, str(idx)])
            choice_element_list.append(node("itextId", itext_id))

            for choicePropertyName, choicePropertyValue in choice.items():
                if isinstance(choicePropertyValue, basestring) \
                        and choicePropertyName != 'label':
                        node(choicePropertyName, unicode(choicePropertyValue)))
            instance_element_list.append(node("item", *choice_element_list))
        return InstanceInfo(type=u"choice",
                                          node("root", *instance_element_list),
예제 #18
파일: survey.py 프로젝트: XLSForm/pyxform
    def _generate_static_instances(list_name, choice_list):
        Generates <instance> elements for static data
        (e.g. choices for select type questions)

        Note that per commit message 0578242 and in xls2json.py R539, an
        instance is only output for select items defined in the choices sheet
        when the item has a choice_filter, and it is that way for backwards
        instance_element_list = []
        for idx, choice in enumerate(choice_list):
            choice_element_list = []
            # Add a unique id to the choice element in case there is itext
            # it references
            itext_id = "-".join(["static_instance", list_name, str(idx)])
            choice_element_list.append(node("itextId", itext_id))

            for name, value in choice.items():
                if isinstance(value, basestring) and name != "label":
                    choice_element_list.append(node(name, unicode(value)))

            instance_element_list.append(node("item", *choice_element_list))

        return InstanceInfo(
                "instance", node("root", *instance_element_list), id=list_name
예제 #21
파일: survey.py 프로젝트: jpic/pyxform
    def _generate_static_instances(self):
        Generates <instance> elements for static data
        (e.g. choices for select type questions)
        for list_name, choice_list in self.choices.items():
            instance_element_list = []
            for idx, choice in zip(range(len(choice_list)), choice_list):
                choice_element_list = []
                # Add a unique id to the choice element incase there is itext
                # it refrences
                itext_id = '-'.join(['static_instance', list_name, str(idx)])
                choice_element_list.append(node("itextId", itext_id))

                for choicePropertyName, choicePropertyValue in choice.items():
                    if isinstance(choicePropertyValue, basestring) \
                            and choicePropertyName != 'label':
            yield node("instance",
                       node("root", *instance_element_list),
예제 #22
예제 #23
 def __init__(self, path_or_file):
     path = path_or_file
     if type(path_or_file) is file:
         path = path.name
     self._dict = parse_file_to_workbook_dict(path)
     self._path = path
     self._id = unicode(get_filename(path))
     self._name = self._print_name = self._title = self._id
예제 #24
    def insert_xpaths(self, text, context, use_current=False):
        Replace all instances of ${var} with the xpath to var.
        def _var_repl_function(matchobj):
            return self._var_repl_function(matchobj, context, use_current)

        return re.sub(BRACKETED_TAG_REGEX, _var_repl_function, unicode(text))
예제 #25
    def test_check__fail__not_installed(self):
        """Should raise an error if there's no installation detected."""
        self.update_info.installed_path = os.path.join(TEST_PATH, ".nothing")
        with self.assertRaises(PyXFormError) as ctx:

        error = unicode(ctx.exception)
        self.assertIn("Check failed!", error)
        self.assertIn("No installed release found", error)
예제 #27
        def get_pulldata_functions(element):
            Returns a list of different pulldata(... function strings if
            pulldata function is defined at least once for any of:
            calculate, constraint, readonly, required, relevant

            :param: element (pyxform.survey.Survey):
            functions_present = []
            for formula_name in constants.EXTERNAL_INSTANCES:
                if "pulldata(" in unicode(element["bind"].get(formula_name)):
            if "pulldata(" in unicode(element["choice_filter"]):
            if "pulldata(" in unicode(element["default"]):

            return functions_present
예제 #28
 def xml_instance(self):
     survey = self.get_root()
     attributes = {}
     attributes.update(self.get(u'instance', {}))
     for key, value in attributes.items():
         attributes[key] = survey.insert_xpaths(value)
     if self.get(u"default"):
         return node(self.name, unicode(self.get(u"default")), **attributes)
     return node(self.name, **attributes)
예제 #29
 def _cleanup_errors(error_message):
     pattern = "(/[a-z0-9\-_]+(?:/[a-z0-9\-_]+)+)"
     error_message = re.sub(
         pattern, ErrorCleaner._replace_xpath_with_tokens,
         error_message, flags=re.I)
     lines = unicode(error_message).strip().splitlines()
     no_dupes = [line for i, line in enumerate(lines)
                 if line != lines[i-1] or i == 0]
     return no_dupes
예제 #30
    def test_find_download_url__no_files(self):
        """Should raise an error if no files attached to release."""
        file_name = "windows.zip"
        json_data = self.updater._read_json(file_path=self.latest_odk)

        with self.assertRaises(PyXFormError) as ctx:
예제 #32
    def xml_instance(self, **kwargs):
        survey = self.get_root()
        attributes = {}
        attributes.update(self.get("instance", {}))
        for key, value in attributes.items():
            attributes[key] = survey.insert_xpaths(value, self)

        if self.get("default") and not default_is_dynamic(self.default, self.type):
            return node(self.name, unicode(self.get("default")), **attributes)
        return node(self.name, **attributes)
예제 #33
 def __init__(self, path_or_file):
     path = path_or_file
         path = path.name
     except AttributeError:
     self._dict = parse_file_to_workbook_dict(path)
     self._path = path
     self._id = unicode(get_filename(path))
     self._name = self._print_name = self._title = self._id
예제 #34
    def insert_xpaths(self, text, context, use_current=False):
        Replace all instances of ${var} with the xpath to var.
        def _var_repl_function(matchobj):
            return self._var_repl_function(matchobj, context, use_current)

        bracketed_tag = r"\$\{(.*?)\}"

        return re.sub(bracketed_tag, _var_repl_function, unicode(text))
예제 #35
 def _cleanup_errors(error_message):
     pattern = r"(/[a-z0-9\-_]+(?:/[a-z0-9\-_]+)+)"
     error_message = re.sub(
         pattern, ErrorCleaner._replace_xpath_with_tokens, error_message, flags=re.I
     lines = unicode(error_message).strip().splitlines()
     no_dupes = [
         line for i, line in enumerate(lines) if line != lines[i - 1] or i == 0
     return no_dupes
예제 #36
파일: question.py 프로젝트: XLSForm/pyxform
    def xml_instance(self, **kwargs):
        survey = self.get_root()
        attributes = {}
        attributes.update(self.get("instance", {}))
        for key, value in attributes.items():
            attributes[key] = survey.insert_xpaths(value, self)

        if self.get("default"):
            return node(self.name, unicode(self.get("default")), **attributes)
        return node(self.name, **attributes)
예제 #37
파일: survey.py 프로젝트: XLSForm/pyxform
    def insert_xpaths(self, text, context, use_current=False):
        Replace all instances of ${var} with the xpath to var.

        def _var_repl_function(matchobj):
            return self._var_repl_function(matchobj, context, use_current)

        bracketed_tag = r"\$\{(.*?)\}"

        return re.sub(bracketed_tag, _var_repl_function, unicode(text))
예제 #38
예제 #41
예제 #42
예제 #43
def parse_file_to_json(path, default_name=None, default_language=u"default",
                       warnings=None, file_object=None):
    A wrapper for workbook_to_json
    if warnings is None:
        warnings = []
    workbook_dict = parse_file_to_workbook_dict(path, file_object)
    if default_name is None:
        default_name = unicode(get_filename(path))
    return workbook_to_json(
        workbook_dict, default_name, default_language, warnings)
예제 #44
def main_cli():
        parser = _create_parser()
        args = parser.parse_args()
        kwargs = args.__dict__.copy()
        del kwargs["command"]
    except PyXFormError as main_error:
    if 0 < len(capture_handler.watcher.records):
        for line in capture_handler.watcher.output["INFO"]:
예제 #45
예제 #46
 def test_unzip_extract_file__bad_crc_raises(self):
     """Should raise an error if the zip file CRC doesn't match."""
     with get_temp_dir() as temp_dir, ZipFile(
         self.zip_file, mode="r"
     ) as zip_file, self.assertRaises(BadZipFile) as ctx:
         zip_item = [
             x for x in zip_file.infolist() if x.filename.endswith("validate")
         zip_item.CRC = 12345
         file_out_path = os.path.join(temp_dir, "validate")
             open_zip_file=zip_file, zip_item=zip_item, file_out_path=file_out_path
     self.assertIn("Bad CRC-32 for file", unicode(ctx.exception))
예제 #47
예제 #48
파일: updater.py 프로젝트: XLSForm/pyxform
def main_cli():
    logger = logging.getLogger(name="pyxform_validator_update")

        parser = _create_parser()
        args = parser.parse_args()
        kwargs = args.__dict__.copy()
        del kwargs["command"]
    except PyXFormError as main_error:
    if 0 < len(capture_handler.watcher.records):
        for line in capture_handler.watcher.output["INFO"]:
예제 #49
    def xml_instance(self):
        result = Section.xml_instance(self)

        # set these first to prevent overwriting id and version
        for key, value in self.attribute.items():
            result.setAttribute(unicode(key), value)

        result.setAttribute(u"id", self.id_string)

        # add instance xmlns attribute to the instance node
        if self.instance_xmlns:
            result.setAttribute(u"xmlns", self.instance_xmlns)

        if self.version:
            result.setAttribute(u"version", self.version)

        return result
예제 #51
    def test_check__fail__install_check(self):
        """Should raise an error if the installation check fails."""
        new = self.utc_now - timedelta(minutes=15.0)

        with get_temp_dir() as mod_root, self.assertRaises(PyXFormError) as ctx:
            update_info = get_update_info(check_ok=True, mod_root=mod_root)
            update_info.latest_path = self.install_fake_old
                file_path=update_info.last_check_path, content=new

            self.updater.update(update_info=update_info, file_name="linux.zip")
            update_info.install_check = install_check_fail

        error = unicode(ctx.exception)
        self.assertIn("Check failed!", error)
        self.assertIn("installed release does not appear to work", error)
예제 #52
    def test_update__installed__fail__already_latest(self):
        """Should stop install and raise an error with relevant info."""
        new = self.utc_now - timedelta(minutes=15.0)

        with get_temp_dir() as mod_root, self.assertRaises(PyXFormError) as ctx:
            update_info = get_update_info(check_ok=True, mod_root=mod_root)
            update_info.latest_path = self.install_fake
                file_path=update_info.last_check_path, content=new

            self.updater.update(update_info=update_info, file_name="linux.zip")
            update_info.latest_path = self.install_fake
            self.updater.update(update_info=update_info, file_name="linux.zip")

        error = unicode(ctx.exception)
        self.assertIn("Update failed!", error)
        self.assertIn("installed release appears to be the latest", error)
예제 #53
    def test_update__not_installed__fail__install_check(self):
        """Should stop install and raise an error with relevant info."""
        new = self.utc_now - timedelta(minutes=15.0)

        with get_temp_dir() as mod_root, self.assertRaises(PyXFormError) as ctx:
            update_info = get_update_info(check_ok=False, mod_root=mod_root)
            update_info.latest_path = self.install_fake
                file_path=update_info.last_check_path, content=new

            self.updater.update(update_info=update_info, file_name="linux.zip")

        error = unicode(ctx.exception)
        self.assertIn("Update failed!", error)
        self.assertIn("latest release does not appear to work", error)
예제 #54
파일: updater.py 프로젝트: XLSForm/pyxform
    def _install(update_info, file_name):
        Install the latest release.
            latest = _UpdateHandler._get_latest(update_info=update_info)
            file_path = os.path.join(update_info.bin_new_path, file_name)
            new_bin_file_path = os.path.join(
                update_info.bin_new_path, update_info.validator_basename

            if os.path.exists(update_info.bin_new_path):

            installed = os.path.join(update_info.bin_new_path, "installed.json")
            _UpdateHandler._write_json(file_path=installed, content=latest)
            url = _UpdateHandler._find_download_url(
                update_info=update_info, json_data=latest, file_name=file_name
            _UpdateHandler._download_file(url=url, file_path=file_path)

            if is_zipfile(file_path) and os.path.splitext(file_path)[1] == ".zip":
                os.rename(file_path, new_bin_file_path)

            # For macos / linux: chmod ug+x the new bin file. No-op on Windows.
            current_mode = os.stat(new_bin_file_path).st_mode
            os.chmod(new_bin_file_path, current_mode | S_IXUSR | S_IXGRP)

        except PyXFormError as e:
            raise PyXFormError("\n\nUpdate failed!\n\n" + unicode(e))
            return latest
예제 #55
파일: survey.py 프로젝트: XLSForm/pyxform
    def xml_instance(self, **kwargs):
        result = Section.xml_instance(self, **kwargs)

        # set these first to prevent overwriting id and version
        for key, value in self.attribute.items():
            result.setAttribute(unicode(key), value)

        result.setAttribute("id", self.id_string)

        # add instance xmlns attribute to the instance node
        if self.instance_xmlns:
            result.setAttribute("xmlns", self.instance_xmlns)

        if self.version:
            result.setAttribute("version", self.version)

        if self.prefix:
            result.setAttribute("odk:prefix", self.prefix)

        if self.delimiter:
            result.setAttribute("odk:delimiter", self.delimiter)

        return result
예제 #56
 def get_abbreviated_xpath(self):
     lineage = self.get_lineage()
     if len(lineage) >= 2:
         return u"/".join([unicode(n.name) for n in lineage[1:]])
         return lineage[0].name
예제 #57
파일: updater.py 프로젝트: XLSForm/pyxform
 def _write_last_check(file_path, content):
     Write the .last_check file.
     with io.open(file_path, mode="w", newline="\n") as out_file:
예제 #58
def workbook_to_json(
        workbook_dict, form_name=None,
        default_language=u"default", warnings=None):
    workbook_dict -- nested dictionaries representing a spreadsheet.
                    should be similar to those returned by xls_to_dict
    form_name -- The spreadsheet's filename
    default_language -- default_language does two things:
    1. In the xform the default language is the language reverted to when
       there is no translation available for some itext element. Because
       of this every itext element must have a default language translation.
    2. In the workbook if media/labels/hints that do not have a
       language suffix will be treated as though their suffix is the
       default language.
       If the default language is used as a suffix for media/labels/hints,
       then the suffixless version will be overwritten.
    warnings -- an optional list which warnings will be appended to

    returns a nested dictionary equivalent to the format specified in the
    json form spec.
    # ensure required headers are present
    if warnings is None:
        warnings = []
    is_valid = False
    workbook_dict = {x.lower(): y for x,y in workbook_dict.items()}
    for row in workbook_dict.get(constants.SURVEY, []):
        is_valid = 'type' in [z.lower() for z in row]
        if is_valid:
    if not is_valid:
        raise PyXFormError(
            u"The survey sheet is either empty or missing important "
            u"column headers.")

    row_format_string = '[row : %s]'

    # Make sure the passed in vars are unicode
    form_name = unicode(form_name)
    default_language = unicode(default_language)

    # We check for double columns to determine whether to use them
    # or single colons to delimit grouped headers.
    # Single colons are bad because they conflict with with the xform namespace
    # syntax (i.e. jr:constraintMsg),
    # so we only use them if we have to for backwards compatibility.
    use_double_colons = has_double_colon(workbook_dict)

    # Break the spreadsheet dict into easier to access objects
    # (settings, choices, survey_sheet):
    # ########## Settings sheet ##########
    settings_sheet = dealias_and_group_headers(
        workbook_dict.get(constants.SETTINGS, []),
        aliases.settings_header, use_double_colons)
    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}

    default_language = settings.get(
        constants.DEFAULT_LANGUAGE, default_language)

    # add_none_option is a boolean that when true,
    # indicates a none option should automatically be added to selects.
    # It should probably be deprecated but I haven't checked yet.
    if u"add_none_option" in settings:
        settings[u"add_none_option"] = aliases.yes_no.get(
            settings[u"add_none_option"], False)

    # Here we create our json dict root with default settings:
    id_string = settings.get(constants.ID_STRING, form_name)
    sms_keyword = settings.get(constants.SMS_KEYWORD, id_string)
    json_dict = {
        constants.TYPE: constants.SURVEY,
        constants.NAME: form_name,
        constants.TITLE: id_string,
        constants.ID_STRING: id_string,
        constants.SMS_KEYWORD: sms_keyword,
        constants.DEFAULT_LANGUAGE: default_language,
        # By default the version is based on the date and time yyyymmddhh
        # Leaving default version out for now since it might cause
        # problems for formhub.
        # constants.VERSION : datetime.datetime.now().strftime("%Y%m%d%H"),
        constants.CHILDREN: []
    # Here the default settings are overridden by those in the settings sheet

    # ########## Choices sheet ##########
    # Columns and "choices and columns" sheets are deprecated,
    # but we combine them with the choices sheet for backwards-compatibility.
    choices_and_columns_sheet = workbook_dict.get(
        constants.CHOICES_AND_COLUMNS, {})
    choices_and_columns_sheet = dealias_and_group_headers(
        choices_and_columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    columns_sheet = workbook_dict.get(constants.COLUMNS, [])
    columns_sheet = dealias_and_group_headers(
        columns_sheet, aliases.list_header,
        use_double_colons, default_language)

    choices_sheet = workbook_dict.get(constants.CHOICES, [])
    for choice_item in choices_sheet:

    choices_sheet = dealias_and_group_headers(
        choices_sheet, aliases.list_header, use_double_colons,
    # ########## Cascading Select sheet ###########
    cascading_choices = workbook_dict.get(constants.CASCADING_CHOICES, [])
    if len(cascading_choices):
        if 'choices' in cascading_choices[0]:
            choices_sheet = choices_sheet + cascading_choices[0]['choices']

    combined_lists = group_dictionaries_by_key(
        choices_and_columns_sheet + choices_sheet + columns_sheet,

    choices = combined_lists
    # Make sure all the options have the required properties:
    warnedabout = set()
    for list_name, options in choices.items():
        for option in options:
            if 'name' not in option:
                info = "[list_name : " + list_name + ']'
                raise PyXFormError("On the choices sheet there is "
                                   "a option with no name. " + info)
            if 'label' not in option:
                info = "[list_name : " + list_name + ']'
                    "On the choices sheet there is a option with no label. " +
            # chrislrobert's fix for a cryptic error message:
            # see: https://code.google.com/p/opendatakit/issues/detail?id=832&start=200 # noqa
            option_keys = list(option.keys())
            for headername in option_keys:
                # Using warnings and removing the bad columns
                # instead of throwing errors because some forms
                # use choices column headers for notes.
                if ' ' in headername:
                    if headername not in warnedabout:
                        warnings.append("On the choices sheet there is " +
                                        "a column (\"" +
                                        headername +
                                        "\") with an illegal header. " +
                                        "Headers cannot include spaces.")
                    del option[headername]
                elif headername == '':
                    warnings.append("On the choices sheet there is a value" +
                                    " in a column with no header.")
                    del option[headername]
    # ########## Survey sheet ###########
    if constants.SURVEY not in workbook_dict:
        raise PyXFormError(
            "You must have a sheet named (case-sensitive): " +
    survey_sheet = workbook_dict[constants.SURVEY]
    # Process the headers:
    clean_text_values_enabled = aliases.yes_no.get(
        settings.get("clean_text_values", "true()"))
    if clean_text_values_enabled:
        survey_sheet = clean_text_values(survey_sheet)
    survey_sheet = dealias_and_group_headers(
        survey_sheet, aliases.survey_header,
        use_double_colons, default_language)
    survey_sheet = dealias_types(survey_sheet)

    osm_sheet = dealias_and_group_headers(workbook_dict.get(constants.OSM, []),
    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
    # #################################

    # Parse the survey sheet while generating a survey in our json format:

    row_number = 1  # We start at 1 because the column header row is not
    #                 included in the survey sheet (presumably).
    # A stack is used to keep track of begin/end expressions
    stack = [(None, json_dict.get(constants.CHILDREN))]
    # If a group has a table-list appearance flag
    # this will be set to the name of the list
    table_list = None
    # For efficiency we compile all the regular expressions
    # that will be used to parse types:
    end_control_regex = re.compile(r"^(?P<end>end)(\s|_)(?P<type>(" +
                                   '|'.join(aliases.control.keys()) + r"))$")
    begin_control_regex = re.compile(r"^(?P<begin>begin)(\s|_)(?P<type>(" +
                                     '|'.join(aliases.control.keys()) +
                                     r"))( (over )?(?P<list_name>\S+))?$")
    select_regexp = re.compile(
        r"^(?P<select_command>(" + '|'.join(aliases.select.keys()) +
        r")) (?P<list_name>\S+)" +
        "( (?P<specify_other>(or specify other|or_other|or other)))?$")
    cascading_regexp = re.compile(
        r"^(?P<cascading_command>(" +
        '|'.join(aliases.cascading.keys()) +
        r")) (?P<cascading_level>\S+)?$")
    osm_regexp = re.compile(
        r"(?P<osm_command>(" + '|'.join(aliases.osm.keys()) +
        ')) (?P<list_name>\S+)')

    # Rows from the survey sheet that should be nested in meta
    survey_meta = []

    for row in survey_sheet:
        row_number += 1
        prev_control_type, parent_children_array = stack[-1]
        # Disabled should probably be first
        # so the attributes below can be disabled.
        if u"disabled" in row:
                row_format_string % row_number +
                " The 'disabled' column header is not part of the current" +
                " spec. We recommend using relevant instead.")
            disabled = row.pop(u"disabled")
            if aliases.yes_no.get(disabled):

        # skip empty rows
        if len(row) == 0:

        # Get question type
        question_type = row.get(constants.TYPE)
        if not question_type:
            # if name and label are also missing,
            # then its a comment row, and we skip it with warning
            if not ((constants.NAME in row) or (constants.LABEL in row)):
                    row_format_string % row_number +
                    " Row without name, text, or label is being skipped:\n" +
            raise PyXFormError(
                row_format_string % row_number +
                " Question with no type.\n" + str(row))

        # Pull out questions that will go in meta block
        if question_type == 'audit':
            # Force audit name to always be "audit" to follow XForms spec
            if 'name' in row and row['name'] not in [None, '', 'audit']:
                raise PyXFormError(row_format_string % row_number +
                    " Audits must always be named 'audit.'" +
                    " The name column should be left blank.")

            row['name'] = 'audit'

        if question_type == 'calculate':
            calculation = row.get('bind', {}).get('calculate')
            if not calculation:
                raise PyXFormError(
                    row_format_string % row_number + " Missing calculation.")

        # Check if the question is actually a setting specified
        # on the survey sheet
        settings_type = aliases.settings_header.get(question_type)
        if settings_type:
            json_dict[settings_type] = unicode(row.get(constants.NAME))

        # Try to parse question as a end control statement
        # (i.e. end loop/repeat/group):
        end_control_parse = end_control_regex.search(question_type)
        if end_control_parse:
            parse_dict = end_control_parse.groupdict()
            if parse_dict.get("end") and "type" in parse_dict:
                control_type = aliases.control[parse_dict["type"]]
                if prev_control_type != control_type or len(stack) == 1:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Unmatched end statement. Previous control type: " +
                        str(prev_control_type) +
                        ", Control type: " + str(control_type))
                table_list = None

        # Make sure the row has a valid name
        if constants.NAME not in row:
            if row['type'] == 'note':
                # autogenerate names for notes without them
                row['name'] = "generated_note_name_" + str(row_number)
            # elif 'group' in row['type'].lower():
            #     # autogenerate names for groups without them
            #     row['name'] = "generated_group_name_" + str(row_number)
                raise PyXFormError(row_format_string % row_number +
                                   " Question or group with no name.")
        question_name = unicode(row[constants.NAME])
        if not is_valid_xml_tag(question_name):
            error_message = row_format_string % row_number
            error_message += " Invalid question name [" + \
                             question_name.encode('utf-8') + "] "
            error_message += "Names must begin with a letter, colon,"\
                             + " or underscore."
            error_message += "Subsequent characters can include numbers," \
                             + " dashes, and periods."
            raise PyXFormError(error_message)

        if constants.LABEL not in row and \
           row.get(constants.MEDIA) is None and \
           question_type not in aliases.label_optional_types:
            # TODO: Should there be a default label?
            #      Not sure if we should throw warnings for groups...
            #      Warnings can be ignored so I'm not too concerned
            #      about false positives.
                row_format_string % row_number +
                " Question has no label: " + str(row))

        # Try to parse question as begin control statement
        # (i.e. begin loop/repeat/group):
        begin_control_parse = begin_control_regex.search(question_type)
        if begin_control_parse:
            parse_dict = begin_control_parse.groupdict()
            if parse_dict.get("begin") and "type" in parse_dict:
                # Create a new json dict with children, and the proper type,
                # and add it to parent_children_array in place of a question.
                # parent_children_array will then be set to its children array
                # (so following questions are nested under it)
                # until an end command is encountered.
                control_type = aliases.control[parse_dict["type"]]
                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = control_type
                child_list = list()
                new_json_dict[constants.CHILDREN] = child_list
                if control_type is constants.LOOP:
                    if not parse_dict.get("list_name"):
                        # TODO: Perhaps warn and make repeat into a group?
                        raise PyXFormError(
                            row_format_string % row_number +
                            " Repeat loop without list name.")
                    list_name = parse_dict["list_name"]
                    if list_name not in choices:
                        raise PyXFormError(
                            row_format_string % row_number +
                            " List name not in columns sheet: " + list_name)
                    new_json_dict[constants.COLUMNS] = choices[list_name]

                # Generate a new node for the jr:count column so
                # xpath expressions can be used.
                repeat_count_expression = new_json_dict.get(
                    'control', {}).get('jr:count')
                if repeat_count_expression:
                    generated_node_name = new_json_dict['name'] + "_count"
                        "name": generated_node_name,
                        "bind": {
                            "readonly": "true()",
                            "calculate": repeat_count_expression,
                        "type": "calculate",
                    new_json_dict['control']['jr:count'] = \
                        "${" + generated_node_name + "}"

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                ctrl_ap = new_json_dict.get(u"control", {}).get(u"appearance")
                if ctrl_ap == constants.TABLE_LIST:
                    table_list = True
                    new_json_dict[u"control"][u"appearance"] = u"field-list"
                    # Generate a note label element so hints and labels
                    # work as expected in table-lists.
                    # see https://github.com/modilabs/pyxform/issues/62
                    if 'label' in new_json_dict or 'hint' in new_json_dict:
                        generated_label_element = {
                            "type": "note",
                                "generated_table_list_label_" + str(row_number)
                        if 'label' in new_json_dict:
                            generated_label_element[constants.LABEL] = \
                            del new_json_dict[constants.LABEL]
                        if 'hint' in new_json_dict:
                            generated_label_element['hint'] = \
                            del new_json_dict['hint']
                if 'intent' in new_json_dict:
                    new_json_dict['control'] = \
                        new_json_dict.get(u"control", {})
                    new_json_dict['control']['intent'] = \

                stack.append((control_type, child_list))

        # try to parse as a cascading select
        cascading_parse = cascading_regexp.search(question_type)
        if cascading_parse:
            parse_dict = cascading_parse.groupdict()
            if parse_dict.get("cascading_command"):
                cascading_level = parse_dict["cascading_level"]
                cascading_prefix = row.get(constants.NAME)
                if not cascading_prefix:
                    raise PyXFormError(
                        row_format_string % row_number +
                        " Cascading select needs a name.")
                # cascading_json = get_cascading_json(
                # cascading_choices, cascading_prefix, cascading_level)
                if len(cascading_choices) <= 0 or \
                   'questions' not in cascading_choices[0]:
                    raise PyXFormError(
                        "Found a cascading_select " + cascading_level +
                        ", but could not find " + cascading_level +
                        "in cascades sheet.")
                cascading_json = cascading_choices[0]['questions']
                json_dict['choices'] = choices
                include_bindings = False
                if 'bind' in row:
                    include_bindings = True
                for cq in cascading_json:
                    # include bindings
                    if include_bindings:
                        cq['bind'] = row['bind']

                    def replace_prefix(d, prefix):
                        for k, v in d.items():
                            if isinstance(v, basestring):
                                d[k] = v.replace('$PREFIX$', prefix)
                            elif isinstance(v, dict):
                                d[k] = replace_prefix(v, prefix)
                            elif isinstance(v, list):
                                d[k] = map(
                                    lambda x: replace_prefix(x, prefix), v)
                        return d

                        replace_prefix(cq, cascading_prefix))
                continue  # so the row isn't put in as is

        # Try to parse question as a select:
        select_parse = select_regexp.search(question_type)
        if select_parse:
            parse_dict = select_parse.groupdict()
            if parse_dict.get("select_command"):
                select_type = aliases.select[parse_dict["select_command"]]
                if select_type == 'select one external' \
                        and 'choice_filter' not in row:
                        row_format_string % row_number +
                        u" select one external is only meant for"
                        u" filtered selects.")
                    select_type = aliases.select['select_one']
                list_name = parse_dict["list_name"]
                list_file_name, file_extension = os.path.splitext(list_name)

                if list_name not in choices \
                        and select_type != 'select one external' \
                        and file_extension not in ['.csv', '.xml']:
                    if not choices:
                        raise PyXFormError(
                            u"There should be a choices sheet in this xlsform."
                            u" Please ensure that the choices sheet name is "
                            u"all in small caps and has columns 'list name', "
                            u"'name', and 'label' (or aliased column names).")
                    raise PyXFormError(
                        row_format_string % row_number +
                        " List name not in choices sheet: " + list_name)

                # Validate select_multiple choice names by making sure
                # they have no spaces (will cause errors in exports).
                if select_type == constants.SELECT_ALL_THAT_APPLY \
                        and file_extension not in ['.csv', '.xml']:
                    for choice in choices[list_name]:
                        if ' ' in choice[constants.NAME]:
                            raise PyXFormError(
                                "Choice names with spaces cannot be added "
                                "to multiple choice selects. See [" +
                                choice[constants.NAME] + "] in [" +
                                list_name + "]")

                specify_other_question = None
                if parse_dict.get("specify_other") is not None:
                    select_type += u" or specify other"
                    # With this code we no longer need to handle or_other
                    # questions in survey builder.
                    # However, it depends on being able to use choice filters
                    # and xpath expressions that return empty sets.
                    # choices[list_name].append(
                    # {
                    #     'name': 'other',
                    #     'label': {default_language : 'Other'},
                    #     'orOther': 'true',
                    # })
                    # or_other_xpath = 'isNull(orOther)'
                    # if 'choice_filter' in row:
                    #   row['choice_filter'] += ' or ' + or_other_xpath
                    # else:
                    #   row['choice_filter'] = or_other_xpath

                    # specify_other_question = \
                    # {
                    #       'type':'text',
                    #       'name': row['name'] + '_specify_other',
                    #       'label':
                    #        'Specify Other for:\n"' + row['label'] + '"',
                    #       'bind' : {'relevant':
                    #                "selected(../%s, 'other')" % row['name']},
                    #     }

                new_json_dict = row.copy()
                new_json_dict[constants.TYPE] = select_type

                if row.get('choice_filter'):
                    if select_type == 'select one external':
                        new_json_dict['query'] = list_name
                        new_json_dict['itemset'] = list_name
                        json_dict['choices'] = choices
                elif file_extension in ['.csv', '.xml']:
                    new_json_dict['itemset'] = list_name
                    new_json_dict[constants.CHOICES] = choices[list_name]

                # Code to deal with table_list appearance flags
                # (for groups of selects)
                if table_list is not None:
                    # Then this row is the first select in a table list
                    if not isinstance(table_list, basestring):
                        table_list = list_name
                        table_list_header = {
                            constants.TYPE: select_type,
                                "reserved_name_for_field_list_labels_" +
                            # Adding row number for uniqueness # noqa
                            constants.CONTROL: {u"appearance": u"label"},
                            constants.CHOICES: choices[list_name],
                            # Do we care about filtered selects in table lists?
                            # 'itemset' : list_name,

                    if table_list != list_name:
                        error_message = row_format_string % row_number
                        error_message += " Badly formatted table list," \
                                         " list names don't match: " + \
                                         table_list + " vs. " + list_name
                        raise PyXFormError(error_message)

                    control = new_json_dict[u"control"] = \
                        new_json_dict.get(u"control", {})
                    control[u"appearance"] = "list-nolabel"
                if specify_other_question:

        # Try to parse question as osm:
        osm_parse = osm_regexp.search(question_type)
        if osm_parse:
            parse_dict = osm_parse.groupdict()
            new_dict = row.copy()
            new_dict['type'] = constants.OSM

            if parse_dict.get('list_name') is not None:
                tags = osm_tags.get(parse_dict.get('list_name'))
                for tag in tags:
                    if osm_tags.get(tag.get('name')):
                        tag['choices'] = osm_tags.get(tag.get('name'))
                new_dict['tags'] = tags



        # range question_type
        if question_type == 'range':
            new_dict = process_range_question_type(row)

        # TODO: Consider adding some question_type validation here.

        # Put the row in the json dict as is:

    if len(stack) != 1:
        raise PyXFormError("Unmatched begin statement: " + str(stack[-1][0]))

    if settings.get('flat', False):
        # print "Generating flattened instance..."

    meta_children = [] + survey_meta

    if aliases.yes_no.get(settings.get("omit_instanceID")):
        if settings.get("public_key"):
            raise PyXFormError(
                "Cannot omit instanceID, it is required for encryption.")
        # Automatically add an instanceID element:
            "name": "instanceID",
            "bind": {
                "readonly": "true()",
                "calculate": settings.get(
                    "instance_id", "concat('uuid:', uuid())"),
            "type": "calculate",

    if 'instance_name' in settings:
        # Automatically add an instanceName element:
            "name": "instanceName",
            "bind": {
                "calculate": settings['instance_name']
            "type": "calculate",

    if len(meta_children) > 0:
        meta_element = \
                "name": "meta",
                "type": "group",
                "control": {
                    "bodyless": True
                "children": meta_children
        noop, survey_children_array = stack[0]

    # print_pyobj_to_json(json_dict)
    return json_dict