def test_xl_date_ambigous_with_exception(self): """Test non standard sheet with exception is processed successfully.""" filename = "xl_date_ambiguous_v1.xlsx" path_to_excel_file = os.path.join(DIR, "bug_example_xls", filename) with self.assertRaises(PyXFormError) as e: xls_to_dict(path_to_excel_file) msg = ("The xls file provided has an invalid date on the" " survey sheet, under the default column on row number 5") self.assertEqual(msg, str(e.exception))
def test_xl_date_ambigous_with_exception(self): """Test non standard sheet with exception is processed successfully.""" filename = "xl_date_ambiguous_v1.xlsx" path_to_excel_file = os.path.join(DIR, "bug_example_xls", filename) with self.assertRaises(PyXFormError) as e: xls_to_dict(path_to_excel_file) msg = ( "The xls file provided has an invalid date on the" " survey sheet, under the default column on row number 5" ) self.assertEqual(msg, str(e.exception))
def _b64_xls_to_dict(base64_encoded_upload): decoded_str = base64.b64decode(base64_encoded_upload) try: xls_with_renamed_sheet = rename_xls_sheet(BytesIO(decoded_str), from_sheet='library', to_sheet='survey') except ConflictSheetError: raise ValueError('An import cannot have both "survey" and' ' "library" sheets.') except NoFromSheetError: # library did not exist in the xls file survey_dict = xls2json_backends.xls_to_dict(BytesIO(decoded_str)) else: survey_dict = xls2json_backends.xls_to_dict(xls_with_renamed_sheet) survey_dict['library'] = survey_dict.pop('survey') return _strip_header_keys(survey_dict)
def test_default_sheet_name_to_survey(self): xls_path = utils.path_to_text_fixture("survey_no_name.xlsx") dict_value = xls_to_dict(xls_path) print(json.dumps(dict_value)) self.assertTrue("survey" in json.dumps(dict_value)) self.assertTrue("state" in json.dumps(dict_value)) self.assertTrue("The State" in json.dumps(dict_value))
def _load_assets_from_url(self, url, messages, **kwargs): destination = kwargs.get('destination', False) destination_kls = kwargs.get('destination_kls', False) has_necessary_perm = kwargs.get('has_necessary_perm', False) req = requests.get(url, allow_redirects=True) fif = HttpContentParse(request=req).parse() fif.remove_invalid_assets() fif.remove_empty_collections() destination_collection = destination \ if (destination_kls == 'collection') else False if destination_collection and not has_necessary_perm: # redundant check raise exceptions.PermissionDenied( 'user cannot load assets into this collection') collections_to_assign = [] for item in fif._parsed: extra_args = { 'owner': self.user, 'name': item._name_base, } if item.get_type() == 'collection': item._orm = create_assets(item.get_type(), extra_args) elif item.get_type() == 'asset': kontent = xls2json_backends.xls_to_dict(item.readable) if not destination: extra_args['content'] = _strip_header_keys(kontent) item._orm = create_assets(item.get_type(), extra_args) else: # The below is copied from `_parse_b64_upload` pretty much as is # TODO: review and test carefully asset = destination asset.content = kontent asset.save() messages['updated'].append({ 'uid': asset.uid, 'kind': 'asset', 'owner__username': self.user.username, }) if item.parent: collections_to_assign.append([ item._orm, item.parent._orm, ]) elif destination_collection: collections_to_assign.append([ item._orm, destination_collection, ]) for (orm_obj, parent_item) in collections_to_assign: orm_obj.parent = parent_item orm_obj.save()
def test_xls_to_dict__extra_sheet_names_are_returned_by_parser(self): """Should return all sheet names so that later steps can do spellcheck.""" d = xls_to_dict(os.path.join(example_xls.PATH, "extra_sheet_names.xlsx")) self.assertIn("survey", d) self.assertIn("my_sheet", d) self.assertIn("stettings", d) self.assertIn("choices", d)
def _parse_input(self): if self.filetype == "xls": self._dict = xls_to_dict(self._path) elif self.filetype == "csv": self._dict = csv_to_dict(self._path) self._sheet_names = self._dict.keys() self._set_choices_and_columns_sheet_name() self._strip_unicode_values() self._fix_int_values() self._group_dictionaries()
def test_equivalency(self): equivalent_fixtures = ['group', 'loop', #'gps', 'specify_other', 'include', 'text_and_integer', \ 'include_json', 'yes_or_no_question'] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.assertEqual(csv_inp, xls_inp)
def test_equivalency(self): equivalent_fixtures = ['group', 'loop', #'gps', 'specify_other', 'include', 'text_and_integer', \ 'include_json', 'yes_or_no_question'] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.maxDiff = None self.assertEqual(csv_inp, xls_inp)
def test_xls_to_dict(self): # convert a CSV to XLS using our new method new_xls = pyxform_utils.convert_csv_to_xls(simple_yn) # convert our new XLS to dict (using pyxform) xls_dict = xls2json_backends.xls_to_dict(new_xls) # convert the original CSV to dict (using pyxform) csv_dict = xls2json_backends.csv_to_dict(StringIO(simple_yn)) # Our function, "pyxform_utils.csv_to_xls" performs (CSV -> XLS) # This assertion tests equivalence of # (CSV) -> dict_representation # (CSV -> XLS) -> dict_representation self.assertEqual(csv_dict, xls_dict)
def test_equivalency(self): equivalent_fixtures = [ "group", "loop", # 'gps', "specify_other", "include", "text_and_integer", "include_json", "yes_or_no_question", ] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.maxDiff = None self.assertEqual(csv_inp, xls_inp)
def test_equivalency(self): equivalent_fixtures = [ "group", "loop", #'gps', "specify_other", "include", "text_and_integer", "include_json", "yes_or_no_question", ] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.maxDiff = None self.assertEqual(csv_inp, xls_inp)
def _load_assets_from_url(self, url, messages, **kwargs): destination = kwargs.get('destination', False) destination_kls = kwargs.get('destination_kls', False) has_necessary_perm = kwargs.get('has_necessary_perm', False) req = requests.get(url, allow_redirects=True) fif = HttpContentParse(request=req).parse() fif.remove_invalid_assets() fif.remove_empty_collections() destination_collection = destination \ if (destination_kls == 'collection') else False if destination_collection and not has_necessary_perm: # redundant check raise exceptions.PermissionDenied('user cannot load assets into this collection') collections_to_assign = [] for item in fif._parsed: extra_args = { 'owner': self.user, 'name': item._name_base, } if item.get_type() == 'collection': item._orm = create_assets(item.get_type(), extra_args) elif item.get_type() == 'asset': kontent = xls2json_backends.xls_to_dict(item.readable) extra_args['content'] = _strip_header_keys(kontent) item._orm = create_assets(item.get_type(), extra_args) if item.parent: collections_to_assign.append([ item._orm, item.parent._orm, ]) elif destination_collection: collections_to_assign.append([ item._orm, destination_collection, ]) for (orm_obj, parent_item) in collections_to_assign: orm_obj.parent = parent_item orm_obj.save()
def parse_file_to_workbook_dict(path, file_object=None): """ Given a xls or csv workbook file use xls2json_backends to create a python workbook_dict. workbook_dicts are organized as follows: {sheetname : [{column_header : column_value_in_array_indexed_row}]} """ (filepath, filename) = os.path.split(path) if not filename: raise PyXFormError("No filename.") (shortname, extension) = os.path.splitext(filename) if not extension: raise PyXFormError("No extension.") if extension == ".xls" or extension == ".xlsx": return xls_to_dict(file_object if file_object is not None else path) elif extension == ".csv": return csv_to_dict(file_object if file_object is not None else path) else: raise PyXFormError("File was not recognized")
def to_csv_io(self, asset_xls_io, id_string): ''' Convert the output of `Asset.to_xls_io()` or `Asset.to_versioned_xls_io()` into a CSV appropriate for KC's `text_xls_form` ''' xls_dict = xls_to_dict(asset_xls_io) csv_io = cStringIO.StringIO() writer = unicodecsv.writer(csv_io, delimiter=',', quotechar='"', quoting=unicodecsv.QUOTE_MINIMAL) settings_arr = xls_dict.get('settings', []) if len(settings_arr) == 0: settings_dict = {} else: settings_dict = settings_arr[0] if 'form_id' in settings_dict: del settings_dict['form_id'] settings_dict['id_string'] = id_string settings_dict['form_title'] = self.asset.name xls_dict['settings'] = [settings_dict] for sheet_name, rows in xls_dict.items(): if re.search(r'_header$', sheet_name): continue writer.writerow([sheet_name]) out_keys = [] out_rows = [] for row in rows: out_row = [] for key in row.keys(): if key not in out_keys: out_keys.append(key) for out_key in out_keys: out_row.append(row.get(out_key, None)) out_rows.append(out_row) writer.writerow([None] + out_keys) for out_row in out_rows: writer.writerow([None] + out_row) return csv_io
def to_csv_io(self, asset_xls_io, id_string): ''' Convert the output of `Asset.to_xls_io()` or `Asset.to_versioned_xls_io()` into a CSV appropriate for KC's `text_xls_form` ''' xls_dict = xls_to_dict(asset_xls_io) csv_io = cStringIO.StringIO() writer = unicodecsv.writer( csv_io, delimiter=',', quotechar='"', quoting=unicodecsv.QUOTE_MINIMAL ) settings_arr = xls_dict.get('settings', []) if len(settings_arr) == 0: settings_dict = {} else: settings_dict = settings_arr[0] if 'form_id' in settings_dict: del settings_dict['form_id'] settings_dict['id_string'] = id_string settings_dict['form_title'] = self.asset.name xls_dict['settings'] = [settings_dict] for sheet_name, rows in xls_dict.items(): if re.search(r'_header$', sheet_name): continue writer.writerow([sheet_name]) out_keys = [] out_rows = [] for row in rows: out_row = [] for key in row.keys(): if key not in out_keys: out_keys.append(key) for out_key in out_keys: out_row.append(row.get(out_key, None)) out_rows.append(out_row) writer.writerow([None] + out_keys) for out_row in out_rows: writer.writerow([None] + out_row) return csv_io
def _b64_xls_to_dict(base64_encoded_upload): decoded_str = base64.b64decode(base64_encoded_upload) survey_dict = xls2json_backends.xls_to_dict(BytesIO(decoded_str)) return _strip_header_keys(survey_dict)
list_name, village_code, village_label, village_label, ao, region, district)) self._set_ao(village) self._set_region_info(region, village, ao) self._set_district_info(district, village, region, ao) return data def write_location_cascade_csv(self, filename='location.csv'): headers = ('list name', 'name', 'label:English', 'label:Somali', 'ao', 'region', 'district') with codecs.open(filename, 'w') as f: writer = csv.writer(f) writer.writerow(headers) writer.writerows(self.ao_cascade()) writer.writerows(self.region_cascade()) writer.writerows(self.district_cascade()) writer.writerows(self.village_cascade()) if __name__ == '__main__': path = 'data.xlsx' doc = xls_to_dict(path) vdb = doc['VDB'] districts = doc['District Names'] regions = doc['Region Name'] g = GenerateLocationCascade(regions, vdb) g.write_location_cascade_csv()
for j in i['children']: if 'label' in j: j['group'] = g l.append(add_question(j)) else: i['group'] = g l.append(add_question(i)) return l if __name__ == '__main__': XLS_FILE = 'data/codebook_xls.xls' # we can convert the xls to a better shaped json with pyxform from pyxform.xls2json_backends import xls_to_dict workbook_dict = xls_to_dict(XLS_FILE) from pyxform.xls2json import workbook_to_json workbook_json = workbook_to_json(workbook_dict, form_name=None, default_language=u"default", warnings=None) # cast the questions into a list of object, each with type, name, label, group and choices l = feed_workbook(workbook_json['children']) rows = "" for i in l: rows = rows + i.to_html() import html_utils as h html = h.html_head + h.html_body_header + ' '.join(
def parse_xlsform(fp): warnings = [] json_survey = xls2json.workbook_to_json(xls_to_dict(fp), None, 'default', warnings) survey = builder.create_survey_element_from_dict(json_survey) return survey.xml().toprettyxml(indent=' ')