def convert_csv_to_xls(csv_repr): """ This method should be moved into pyxform """ def _add_contents_to_sheet(sheet, contents): cols = [] for row in contents: for key in row.keys(): if key not in cols: cols.append(key) for ci, col in enumerate(cols): sheet.write(0, ci, col) for ri, row in enumerate(contents): for ci, col in enumerate(cols): val = row.get(col, None) if val: sheet.write(ri+1, ci, val) encoded_csv = csv_repr.decode("utf-8").encode("utf-8") dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv)) workbook = xlwt.Workbook() for sheet_name in dict_repr.keys(): # pyxform.xls2json_backends adds "_header" items for each sheet..... if not re.match(r".*_header$", sheet_name): cur_sheet = workbook.add_sheet(sheet_name) _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name]) # TODO: As XLS files are binary, I believe this should be `io.BytesIO()`. string_io = StringIO.StringIO() workbook.save(string_io) string_io.seek(0) return string_io
def convert_csv_to_xls(csv_repr): """ This method should be moved into pyxform """ def _add_contents_to_sheet(sheet, contents): cols = [] for row in contents: for key in row.keys(): if key not in cols: cols.append(key) for ci, col in enumerate(cols): sheet.write(0, ci, col) for ri, row in enumerate(contents): for ci, col in enumerate(cols): val = row.get(col, None) if val: sheet.write(ri + 1, ci, val) encoded_csv = csv_repr.decode("utf-8").encode("utf-8") dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv)) workbook = xlwt.Workbook() for sheet_name in dict_repr.keys(): # pyxform.xls2json_backends adds "_header" items for each sheet..... if not re.match(r".*_header$", sheet_name): cur_sheet = workbook.add_sheet(sheet_name) _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name]) string_io = StringIO.StringIO() workbook.save(string_io) string_io.seek(0) return string_io
def csv_to_xls(csv_repr): csv_repr = ''.join([ line for line in csv_repr if line.strip().strip('"') ]) def _add_contents_to_sheet(sheet, contents): cols = [] for row in contents: for key in row.keys(): if key not in cols: cols.append(key) for ci, col in enumerate(cols): sheet.write(0, ci, col) for ri, row in enumerate(contents): for ci, col in enumerate(cols): val = row.get(col, None) if val: sheet.write(ri + 1, ci, val) encoded_csv = csv_repr.decode("utf-8").encode("utf-8") dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv)) workbook = xlwt.Workbook() for sheet_name in dict_repr.keys(): # pyxform.xls2json_backends adds "_header" items for each sheet..... if not re.match(r".*_header$", sheet_name): cur_sheet = workbook.add_sheet(sheet_name) _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name]) # TODO: As XLS files are binary, I believe this should be `io.BytesIO()`. string_io = StringIO.StringIO() workbook.save(string_io) string_io.seek(0) return string_io
def test_a_unicode_csv_works(self): """ Simply tests that xls2json_backends.csv_to_dict does not have a problem with a csv with unicode characters """ utf_csv_path = utils.path_to_text_fixture("utf_csv.csv") dict_value = csv_to_dict(utf_csv_path) self.assertTrue("\\ud83c" in json.dumps(dict_value))
def test_a_unicode_csv_works(self): """ Simply tests that xls2json_backends.csv_to_dict does not have a problem with a csv with unicode characters """ utf_csv_path = utils.path_to_text_fixture("utf_csv.csv") dict_value = csv_to_dict(utf_csv_path) self.assertTrue("\ud83c" in json.dumps(dict_value))
def convert_csv_to_ss_structure(csv_repr): dict_repr = dict( xls2json_backends.csv_to_dict( StringIO.StringIO(csv_repr.encode("utf-8")))) for key in dict_repr.keys(): if re.match('.*_header$', key): del dict_repr[key] return dict_repr
def create_survey_from_csv_text( csv_text, default_name='KoBoFormSurvey', default_language=u'default', warnings=None, ): workbook_dict = xls2json_backends.csv_to_dict( StringIO.StringIO(csv_text.encode("utf-8"))) return create_survey_from_ss_struct(workbook_dict, default_name, default_language, warnings)
def test_equivalency(self): equivalent_fixtures = ['group', 'loop', #'gps', 'specify_other', 'include', 'text_and_integer', \ 'include_json', 'yes_or_no_question'] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.assertEqual(csv_inp, xls_inp)
def _parse_input(self): if self.filetype == "xls": self._dict = xls_to_dict(self._path) elif self.filetype == "csv": self._dict = csv_to_dict(self._path) self._sheet_names = self._dict.keys() self._set_choices_and_columns_sheet_name() self._strip_unicode_values() self._fix_int_values() self._group_dictionaries()
def test_equivalency(self): equivalent_fixtures = ['group', 'loop', #'gps', 'specify_other', 'include', 'text_and_integer', \ 'include_json', 'yes_or_no_question'] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.maxDiff = None self.assertEqual(csv_inp, xls_inp)
def convert_csv_to_xls(csv_repr): dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_repr.encode("utf-8"))) workbook = xlwt.Workbook() for sheet_name in dict_repr.keys(): # pyxform.xls2json_backends adds "_header" items for each sheet..... if not re.match(r".*_header$", sheet_name): cur_sheet = workbook.add_sheet(sheet_name) _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name]) string_io = StringIO.StringIO() workbook.save(string_io) string_io.seek(0) return string_io
def test_xls_to_dict(self): # convert a CSV to XLS using our new method new_xls = pyxform_utils.convert_csv_to_xls(simple_yn) # convert our new XLS to dict (using pyxform) xls_dict = xls2json_backends.xls_to_dict(new_xls) # convert the original CSV to dict (using pyxform) csv_dict = xls2json_backends.csv_to_dict(StringIO(simple_yn)) # Our function, "pyxform_utils.csv_to_xls" performs (CSV -> XLS) # This assertion tests equivalence of # (CSV) -> dict_representation # (CSV -> XLS) -> dict_representation self.assertEqual(csv_dict, xls_dict)
def publish(self, user, id_string=None): if self.is_valid(): # If a text (csv) representation of the xlsform is present, # this will save the file and pass it instead of the 'xls_file' # field. if 'text_xls_form' in self.cleaned_data\ and self.cleaned_data['text_xls_form'].strip(): csv_data = self.cleaned_data['text_xls_form'] # "Note that any text-based field - such as CharField or # EmailField - always cleans the input into a Unicode string" # (https://docs.djangoproject.com/en/1.8/ref/forms/api/#django.forms.Form.cleaned_data). csv_data = csv_data.encode('utf-8') # requires that csv forms have a settings with an id_string or # form_id _sheets = csv_to_dict(StringIO(csv_data)) try: _settings = _sheets['settings'][0] if 'id_string' in _settings: _name = '%s.csv' % _settings['id_string'] else: _name = '%s.csv' % _settings['form_id'] except (KeyError, IndexError) as e: raise ValueError('CSV XLSForms must have a settings sheet' ' and id_string or form_id') cleaned_xls_file = \ default_storage.save( upload_to(None, _name, user.username), ContentFile(csv_data)) else: cleaned_xls_file = self.cleaned_data['xls_file'] if not cleaned_xls_file: cleaned_url = self.cleaned_data['xls_url'] if cleaned_url.strip() == '': cleaned_url = self.cleaned_data['dropbox_xls_url'] cleaned_xls_file = urlparse(cleaned_url) cleaned_xls_file = \ '_'.join(cleaned_xls_file.path.split('/')[-2:]) if cleaned_xls_file[-4:] != '.xls': cleaned_xls_file += '.xls' cleaned_xls_file = \ upload_to(None, cleaned_xls_file, user.username) self.validate(cleaned_url) xls_data = ContentFile(urllib2.urlopen(cleaned_url).read()) cleaned_xls_file = \ default_storage.save(cleaned_xls_file, xls_data) # publish the xls return publish_xls_form(cleaned_xls_file, user, id_string)
def publish(self, user, id_string=None): if self.is_valid(): # If a text (csv) representation of the xlsform is present, # this will save the file and pass it instead of the 'xls_file' # field. if 'text_xls_form' in self.cleaned_data\ and self.cleaned_data['text_xls_form'].strip(): csv_data = self.cleaned_data['text_xls_form'] # "Note that any text-based field - such as CharField or # EmailField - always cleans the input into a Unicode string" # (https://docs.djangoproject.com/en/1.8/ref/forms/api/#django.forms.Form.cleaned_data). csv_data = csv_data.encode('utf-8') # requires that csv forms have a settings with an id_string or # form_id _sheets = csv_to_dict(StringIO(csv_data)) try: _settings = _sheets['settings'][0] if 'id_string' in _settings: _name = '%s.csv' % _settings['id_string'] else: _name = '%s.csv' % _settings['form_id'] except (KeyError, IndexError) as e: raise ValueError('CSV XLSForms must have a settings sheet' ' and id_string or form_id') cleaned_xls_file = \ default_storage.save( upload_to(None, _name, user.username), ContentFile(csv_data)) else: cleaned_xls_file = self.cleaned_data['xls_file'] if not cleaned_xls_file: cleaned_url = self.cleaned_data['xls_url'] if cleaned_url.strip() == u'': cleaned_url = self.cleaned_data['dropbox_xls_url'] cleaned_xls_file = urlparse(cleaned_url) cleaned_xls_file = \ '_'.join(cleaned_xls_file.path.split('/')[-2:]) if cleaned_xls_file[-4:] != '.xls': cleaned_xls_file += '.xls' cleaned_xls_file = \ upload_to(None, cleaned_xls_file, user.username) self.validate(cleaned_url) xls_data = ContentFile(urllib2.urlopen(cleaned_url).read()) cleaned_xls_file = \ default_storage.save(cleaned_xls_file, xls_data) # publish the xls return publish_xls_form(cleaned_xls_file, user, id_string)
def test_equivalency(self): equivalent_fixtures = [ "group", "loop", #'gps', "specify_other", "include", "text_and_integer", "include_json", "yes_or_no_question", ] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.maxDiff = None self.assertEqual(csv_inp, xls_inp)
def test_equivalency(self): equivalent_fixtures = [ "group", "loop", # 'gps', "specify_other", "include", "text_and_integer", "include_json", "yes_or_no_question", ] for fixture in equivalent_fixtures: xls_path = utils.path_to_text_fixture("%s.xls" % fixture) csv_path = utils.path_to_text_fixture("%s.csv" % fixture) xls_inp = xls_to_dict(xls_path) csv_inp = csv_to_dict(csv_path) self.maxDiff = None self.assertEqual(csv_inp, xls_inp)
def _xform_to_asset_content(xform): # Load the xlsform from the KC API to avoid having to deal # with S3 credentials, etc. user = xform.user response = _kc_forms_api_request(user.auth_token, xform.pk, xlsform=True) if response.status_code == 404: raise SyncKCXFormsWarning(u'unable to load xls ({})'.format( response.status_code)) elif response.status_code != 200: raise SyncKCXFormsError(u'unable to load xls ({})'.format( response.status_code)) # Convert the xlsform to KPI JSON xls_io = io.BytesIO(response.content) if xform.xls.name.endswith('.csv'): dict_repr = xls2json_backends.csv_to_dict(xls_io) xls_io = _convert_dict_to_xls(dict_repr) asset_content = _xlsform_to_kpi_content_schema(xls_io) return asset_content
def _xform_to_asset_content(xform): # Load the xlsform from the KC API to avoid having to deal # with S3 credentials, etc. user = xform.user response = _kc_forms_api_request(user.auth_token, xform.pk, xlsform=True) if response.status_code == 404: raise SyncKCXFormsWarning( u'unable to load xls ({})'.format(response.status_code) ) elif response.status_code != 200: raise SyncKCXFormsError( u'unable to load xls ({})'.format(response.status_code) ) # Convert the xlsform to KPI JSON xls_io = io.BytesIO(response.content) if xform.xls.name.endswith('.csv'): dict_repr = xls2json_backends.csv_to_dict(xls_io) xls_io = _convert_dict_to_xls(dict_repr) asset_content = _xlsform_to_kpi_content_schema(xls_io) return asset_content
def parse_file_to_workbook_dict(path, file_object=None): """ Given a xls or csv workbook file use xls2json_backends to create a python workbook_dict. workbook_dicts are organized as follows: {sheetname : [{column_header : column_value_in_array_indexed_row}]} """ (filepath, filename) = os.path.split(path) if not filename: raise PyXFormError("No filename.") (shortname, extension) = os.path.splitext(filename) if not extension: raise PyXFormError("No extension.") if extension == ".xls" or extension == ".xlsx": return xls_to_dict(file_object if file_object is not None else path) elif extension == ".csv": return csv_to_dict(file_object if file_object is not None else path) else: raise PyXFormError("File was not recognized")
def convert_csv_to_xls(csv_repr): """ This method should be moved into pyxform """ # There should not be any blank lines in the "sheeted" CSV representation, # but often times there are. Strip them out before any further processing; # otherwise, `convert_csv_to_xls()` will raise an # `invalid worksheet name ''` exception csv_repr = ''.join([ line for line in csv_repr.splitlines(True) if line.strip().strip('"') ]) def _add_contents_to_sheet(sheet, contents): cols = [] for row in contents: for key in row.keys(): if key not in cols: cols.append(key) for ci, col in enumerate(cols): sheet.write(0, ci, col) for ri, row in enumerate(contents): for ci, col in enumerate(cols): val = row.get(col, None) if val: sheet.write(ri + 1, ci, val) encoded_csv = io.BytesIO(csv_repr) dict_repr = xls2json_backends.csv_to_dict(encoded_csv) workbook = xlwt.Workbook() for sheet_name in dict_repr.keys(): # pyxform.xls2json_backends adds "_header" items for each sheet..... if not re.match(r".*_header$", sheet_name): cur_sheet = workbook.add_sheet(sheet_name) _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name]) bytes_io = io.BytesIO() workbook.save(bytes_io) bytes_io.seek(0) return bytes_io
def convert_csv_to_xls(csv_repr): """ This method should be moved into pyxform """ # There should not be any blank lines in the "sheeted" CSV representation, # but often times there are. Strip them out before any further processing; # otherwise, `convert_csv_to_xls()` will raise an # `invalid worksheet name u''` exception csv_repr = ''.join([ line for line in csv_repr.splitlines(True) if line.strip().strip('"') ]) def _add_contents_to_sheet(sheet, contents): cols = [] for row in contents: for key in row.keys(): if key not in cols: cols.append(key) for ci, col in enumerate(cols): sheet.write(0, ci, col) for ri, row in enumerate(contents): for ci, col in enumerate(cols): val = row.get(col, None) if val: sheet.write(ri+1, ci, val) encoded_csv = csv_repr.decode("utf-8").encode("utf-8") dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(encoded_csv)) workbook = xlwt.Workbook() for sheet_name in dict_repr.keys(): # pyxform.xls2json_backends adds "_header" items for each sheet..... if not re.match(r".*_header$", sheet_name): cur_sheet = workbook.add_sheet(sheet_name) _add_contents_to_sheet(cur_sheet, dict_repr[sheet_name]) # TODO: As XLS files are binary, I believe this should be `io.BytesIO()`. string_io = StringIO.StringIO() workbook.save(string_io) string_io.seek(0) return string_io
def convert_csv_to_xls(csv_repr): dict_repr = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_repr.encode("utf-8"))) return convert_dict_to_xls(dict_repr)
def _csv_to_dict(content): out_dict = {} for (key, sheet) in csv_to_dict(StringIO(content.encode('utf-8'))).items(): if not re.search(r'_header$', key): out_dict[key] = sheet return out_dict
def create_survey_from_csv_text(csv_text, default_name='KoBoFormSurvey', default_language=u'default', warnings=None, ): workbook_dict = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_text.encode("utf-8"))) return create_survey_from_ss_struct(workbook_dict, default_name, default_language, warnings)
def create_survey_from_csv_text(csv_text, default_name='KoBoFormSurvey', default_language=u'default', warnings=None, ): workbook_dict = xls2json_backends.csv_to_dict(StringIO.StringIO(csv_text.encode("utf-8"))) dict_repr = xls2json.workbook_to_json(workbook_dict, default_name, default_language, warnings) dict_repr[u'name'] = dict_repr[u'id_string'] return builder.create_survey_element_from_dict(dict_repr)
def handle(self, *args, **options): if not settings.KOBOCAT_URL or not settings.KOBOCAT_INTERNAL_URL: raise ImproperlyConfigured( 'Both KOBOCAT_URL and KOBOCAT_INTERNAL_URL must be ' 'configured before using this command' ) if options.get('quiet'): # Do not output anything def print_str(string): pass else: # Output status messages def print_str(string): print string def print_tabular(*args): print_str(u'\t'.join(map(lambda x: u'{}'.format(x), args))) users = User.objects.all() print_str('%d total users' % users.count()) # A specific user or everyone? if options.get('username'): users = User.objects.filter(username=options.get('username')) print_str('%d users selected' % users.count()) # Only users who prefer KPI or all users? if not options.get('all_users'): users = users.filter( models.Q(formbuilderpreference__preferred_builder= FormBuilderPreference.KPI) | models.Q(formbuilderpreference=None) # KPI is the default now ) print_str('%d of selected users prefer KPI' % users.count()) # We'll be copying the date fields from KC, so don't auto-update them _set_auto_field_update(Asset, "date_created", False) _set_auto_field_update(Asset, "date_modified", False) for user in users: (token, created) = Token.objects.get_or_create(user=user) existing_surveys = user.assets.filter(asset_type='survey') # Each asset that the user has already deployed to KC should have a # form uuid stored in its deployment data kpi_deployed_uuids = {} for existing_survey in existing_surveys: dd = existing_survey._deployment_data if 'backend_response' in dd: kpi_deployed_uuids[dd['backend_response']['uuid']] = \ existing_survey.pk # Use our stub model to access KC's XForm objects xforms = user.xforms.all() for xform in xforms: try: update_existing = False if xform.uuid in kpi_deployed_uuids: # This KC form already has a corresponding KPI asset, # but the user may have directly updated the form on KC # after deploying from KPI. If so, then the KPI asset # must be updated with the contents of the KC form asset = user.assets.get( pk=kpi_deployed_uuids[xform.uuid]) time_diff = xform.date_modified - asset.date_modified # Format the timedelta in a sane way, per # http://stackoverflow.com/a/8408947 if time_diff < datetime.timedelta(0): time_diff_str = '-{}'.format(-time_diff) else: time_diff_str = '+{}'.format(time_diff) # If KC timestamp is not sufficiently ahead of the KPI # timestamp, we assume the KC form content was not # updated since the last KPI deployment if time_diff <= TIMESTAMP_DIFFERENCE_TOLERANCE: print_tabular( 'NOOP', user.username, xform.id_string, asset.uid, time_diff_str ) continue else: update_existing = True # Load the xlsform from the KC API to avoid having to deal # with S3 credentials, etc. response = kc_forms_api_request( token, xform.pk, xlsform=True) if response.status_code != 200: error_information = [ 'FAIL', user.username, xform.id_string, u'unable to load xls ({})'.format( response.status_code) ] print_tabular(*error_information) logging.warning(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) continue # Convert the xlsform to KPI JSON xls_io = io.BytesIO(response.content) if xform.xls.name.endswith('.csv'): dict_repr = xls2json_backends.csv_to_dict(xls_io) xls_io = convert_dict_to_xls(dict_repr) asset_content = xlsform_to_kpi_content_schema(xls_io) # Get the form data from KC response = kc_forms_api_request(token, xform.pk) if response.status_code != 200: error_information = [ 'FAIL', user.username, xform.id_string, 'unable to load form data ({})'.format( response.status_code) ] print_tabular(*error_information) logging.error(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) continue deployment_data = response.json() with transaction.atomic(): if not update_existing: # This is an orphaned KC form. Build a new asset to # match it asset = Asset() asset.asset_type = 'survey' asset.owner = user asset.date_created = dateutil.parser.parse( deployment_data['date_created']) # Update the asset's modification date and content # regardless of whether it's a new asset or an existing # one being updated asset.date_modified = dateutil.parser.parse( deployment_data['date_modified']) asset.content = asset_content asset.save() # If this user already has an identically-named asset, # append `xform.id_string` in parentheses for # clarification if Asset.objects.filter( owner=user, name=asset.name).exists(): asset.name = u'{} ({})'.format( asset.name, xform.id_string) # `store_data()` handles saving the asset # Copy the deployment-related data kc_deployment = KobocatDeploymentBackend(asset) kc_deployment.store_data({ 'backend': 'kobocat', 'identifier': kc_deployment.make_identifier( user.username, xform.id_string), 'active': xform.downloadable, 'backend_response': deployment_data, 'version': asset.version_id }) if update_existing: print_tabular( 'UPDATE', user.username, xform.id_string, asset.uid, time_diff_str ) else: print_tabular( 'CREATE', user.username, xform.id_string, asset.uid, ) except Exception as e: error_information = [ 'FAIL', user.username, xform.id_string, repr(e) ] print_tabular(*error_information) logging.exception(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) _set_auto_field_update(Asset, "date_created", True) _set_auto_field_update(Asset, "date_modified", True)
def handle(self, *args, **options): if not settings.KOBOCAT_URL or not settings.KOBOCAT_INTERNAL_URL: raise ImproperlyConfigured( 'Both KOBOCAT_URL and KOBOCAT_INTERNAL_URL must be ' 'configured before using this command') if options.get('quiet'): # Do not output anything def print_str(string): pass else: # Output status messages def print_str(string): print string def print_tabular(*args): print_str(u'\t'.join(map(lambda x: u'{}'.format(x), args))) users = User.objects.all() print_str('%d total users' % users.count()) # A specific user or everyone? if options.get('username'): users = User.objects.filter(username=options.get('username')) print_str('%d users selected' % users.count()) # Only users who prefer KPI or all users? if not options.get('all_users'): users = users.filter( models.Q(formbuilderpreference__preferred_builder= FormBuilderPreference.KPI) | models.Q(formbuilderpreference=None) # KPI is the default now ) print_str('%d of selected users prefer KPI' % users.count()) # We'll be copying the date fields from KC, so don't auto-update them _set_auto_field_update(Asset, "date_created", False) _set_auto_field_update(Asset, "date_modified", False) for user in users: (token, created) = Token.objects.get_or_create(user=user) existing_surveys = user.assets.filter(asset_type='survey') # Each asset that the user has already deployed to KC should have a # form uuid stored in its deployment data kpi_deployed_uuids = {} for existing_survey in existing_surveys: dd = existing_survey._deployment_data if 'backend_response' in dd: kpi_deployed_uuids[dd['backend_response']['uuid']] = \ existing_survey.pk # Use our stub model to access KC's XForm objects xforms = user.xforms.all() for xform in xforms: try: if xform.uuid in kpi_deployed_uuids: # This KC form already has a corresponding KPI asset, # but the user may have directly updated the form on KC # after deploying from KPI. If so, then the KPI asset # must be updated with the contents of the KC form asset = user.assets.get( pk=kpi_deployed_uuids[xform.uuid]) non_content_operation = 'NOOP' # First, compare hashes to see if the KC form content # has changed since the last deployment backend_response = asset._deployment_data[ 'backend_response'] if 'hash' in backend_response: update_existing = backend_response['hash'] \ != xform.prefixed_hash diff_str = 'hashes {}'.format( 'differ' if update_existing else 'match') else: # KC's `date_modified` is nearly useless, because # every new submission changes it to the current # time, and when there are no submissions, merely # loading the projects list does the same (see # https://github.com/kobotoolbox/kpi/issues/661#issuecomment-218073765). # Still, in cases where KPI does not yet know the # hash, comparing timestamps can sometimes save us # from creating duplicate asset versions time_diff = xform.date_modified - asset.date_modified # Format the timedelta in a sane way, per # http://stackoverflow.com/a/8408947 if time_diff < datetime.timedelta(0): diff_str = '-{}'.format(-time_diff) else: diff_str = '+{}'.format(time_diff) # If KC timestamp is sufficiently ahead of the KPI # timestamp, we assume the KC form content was # updated since the last KPI deployment if time_diff > TIMESTAMP_DIFFERENCE_TOLERANCE: update_existing = True else: update_existing = False # We don't need an update, but we should copy # the hash from KC to KPI for future reference backend_response['hash'] = xform.prefixed_hash asset.save(adjust_content=False) print_tabular('HASH', user.username, xform.id_string, asset.uid, diff_str) if not update_existing: # Check to see if the asset name matches the xform # title. Per #857, the xform title takes priority. # The first check is a cheap one: if asset.name != xform.title: # Now do a full check of the name desired_name = make_name_for_asset( asset, xform) if asset.name != desired_name: asset.name = desired_name asset.save(adjust_content=False) non_content_operation = 'NAME' # No further update needed. Skip to the next form print_tabular(non_content_operation, user.username, xform.id_string, asset.uid, diff_str) continue else: update_existing = False # Load the xlsform from the KC API to avoid having to deal # with S3 credentials, etc. response = kc_forms_api_request(token, xform.pk, xlsform=True) if response.status_code != 200: error_information = [ 'FAIL', user.username, xform.id_string, u'unable to load xls ({})'.format( response.status_code) ] print_tabular(*error_information) logging.warning(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) continue # Convert the xlsform to KPI JSON xls_io = io.BytesIO(response.content) if xform.xls.name.endswith('.csv'): dict_repr = xls2json_backends.csv_to_dict(xls_io) xls_io = convert_dict_to_xls(dict_repr) asset_content = xlsform_to_kpi_content_schema(xls_io) # Get the form data from KC response = kc_forms_api_request(token, xform.pk) if response.status_code != 200: error_information = [ 'FAIL', user.username, xform.id_string, 'unable to load form data ({})'.format( response.status_code) ] print_tabular(*error_information) # Don't spam the log when KC responds with 404, which # indicates that the form's XLS is missing from S3 if response.status_code != 404: logging.error(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) continue deployment_data = response.json() with transaction.atomic(): if not update_existing: # This is an orphaned KC form. Build a new asset to # match it asset = Asset(asset_type='survey', owner=user) asset.date_created = dateutil.parser.parse( deployment_data['date_created']) # Update the asset's modification date and content # regardless of whether it's a new asset or an existing # one being updated asset.date_modified = dateutil.parser.parse( deployment_data['date_modified']) # we may want to do standardize the content (by calling # `asset._standardize(asset_content)`), but this also # could cause errors on unexpected forms so we can # defer this until later. asset.content = asset_content asset.save(adjust_content=False) asset.name = make_name_for_asset(asset, xform) # Copy the deployment-related data kc_deployment = KobocatDeploymentBackend(asset) kc_deployment.store_data({ 'backend': 'kobocat', 'identifier': kc_deployment.make_identifier( user.username, xform.id_string), 'active': xform.downloadable, 'backend_response': deployment_data, 'version': asset.version_id }) asset._mark_latest_version_as_deployed() asset.save() if update_existing: print_tabular('UPDATE', user.username, xform.id_string, asset.uid, diff_str) else: print_tabular( 'CREATE', user.username, xform.id_string, asset.uid, ) except Exception as e: error_information = [ 'FAIL', user.username, xform.id_string, repr(e) ] print_tabular(*error_information) logging.exception(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) _set_auto_field_update(Asset, "date_created", True) _set_auto_field_update(Asset, "date_modified", True)
def convert_csv_to_xls(csv_repr): dict_repr = xls2json_backends.csv_to_dict( StringIO.StringIO(csv_repr.encode("utf-8"))) return convert_dict_to_xls(dict_repr)
def test_order_of_dict_values(self): csv_dict = xls2json_backends.csv_to_dict(StringIO(sample_for_ordered_columns)) self.assertEqual(csv_dict.keys()[0], "survey") survey = csv_dict.get("survey") self.assertEqual(survey[0].keys(), ["name", "type", "label", "required"])
def handle(self, *args, **options): if not settings.KOBOCAT_URL or not settings.KOBOCAT_INTERNAL_URL: raise ImproperlyConfigured( 'Both KOBOCAT_URL and KOBOCAT_INTERNAL_URL must be ' 'configured before using this command') if options.get('quiet'): # Do not output anything def print_str(string): pass else: # Output status messages def print_str(string): print string def print_tabular(*args): print_str(u'\t'.join(map(lambda x: u'{}'.format(x), args))) users = User.objects.all() print_str('%d total users' % users.count()) # A specific user or everyone? if options.get('username'): users = User.objects.filter(username=options.get('username')) print_str('%d users selected' % users.count()) # Only users who prefer KPI or all users? if not options.get('all_users'): users = users.filter( models.Q(formbuilderpreference__preferred_builder= FormBuilderPreference.KPI) | models.Q(formbuilderpreference=None) # KPI is the default now ) print_str('%d of selected users prefer KPI' % users.count()) # We'll be copying the date fields from KC, so don't auto-update them _set_auto_field_update(Asset, "date_created", False) _set_auto_field_update(Asset, "date_modified", False) for user in users: (token, created) = Token.objects.get_or_create(user=user) existing_surveys = user.assets.filter(asset_type='survey') # Each asset that the user has already deployed to KC should have a # form uuid stored in its deployment data kpi_deployed_uuids = {} for existing_survey in existing_surveys: dd = existing_survey._deployment_data if 'backend_response' in dd: kpi_deployed_uuids[dd['backend_response']['uuid']] = \ existing_survey.pk # Use our stub model to access KC's XForm objects xforms = user.xforms.all() for xform in xforms: try: update_existing = False if xform.uuid in kpi_deployed_uuids: # This KC form already has a corresponding KPI asset, # but the user may have directly updated the form on KC # after deploying from KPI. If so, then the KPI asset # must be updated with the contents of the KC form asset = user.assets.get( pk=kpi_deployed_uuids[xform.uuid]) time_diff = xform.date_modified - asset.date_modified # Format the timedelta in a sane way, per # http://stackoverflow.com/a/8408947 if time_diff < datetime.timedelta(0): time_diff_str = '-{}'.format(-time_diff) else: time_diff_str = '+{}'.format(time_diff) # If KC timestamp is not sufficiently ahead of the KPI # timestamp, we assume the KC form content was not # updated since the last KPI deployment if time_diff <= TIMESTAMP_DIFFERENCE_TOLERANCE: print_tabular('NOOP', user.username, xform.id_string, asset.uid, time_diff_str) continue else: update_existing = True # Load the xlsform from the KC API to avoid having to deal # with S3 credentials, etc. response = kc_forms_api_request(token, xform.pk, xlsform=True) if response.status_code != 200: error_information = [ 'FAIL', user.username, xform.id_string, u'unable to load xls ({})'.format( response.status_code) ] print_tabular(*error_information) logging.warning(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) continue # Convert the xlsform to KPI JSON xls_io = io.BytesIO(response.content) if xform.xls.name.endswith('.csv'): dict_repr = xls2json_backends.csv_to_dict(xls_io) xls_io = convert_dict_to_xls(dict_repr) asset_content = xlsform_to_kpi_content_schema(xls_io) # Get the form data from KC response = kc_forms_api_request(token, xform.pk) if response.status_code != 200: error_information = [ 'FAIL', user.username, xform.id_string, 'unable to load form data ({})'.format( response.status_code) ] print_tabular(*error_information) logging.error(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) continue deployment_data = response.json() with transaction.atomic(): if not update_existing: # This is an orphaned KC form. Build a new asset to # match it asset = Asset() asset.asset_type = 'survey' asset.owner = user asset.date_created = dateutil.parser.parse( deployment_data['date_created']) # Update the asset's modification date and content # regardless of whether it's a new asset or an existing # one being updated asset.date_modified = dateutil.parser.parse( deployment_data['date_modified']) asset.content = asset_content asset.save() # If this user already has an identically-named asset, # append `xform.id_string` in parentheses for # clarification if Asset.objects.filter(owner=user, name=asset.name).exists(): asset.name = u'{} ({})'.format( asset.name, xform.id_string) # `store_data()` handles saving the asset # Copy the deployment-related data kc_deployment = KobocatDeploymentBackend(asset) kc_deployment.store_data({ 'backend': 'kobocat', 'identifier': kc_deployment.make_identifier( user.username, xform.id_string), 'active': xform.downloadable, 'backend_response': deployment_data, 'version': asset.version_id }) if update_existing: print_tabular('UPDATE', user.username, xform.id_string, asset.uid, time_diff_str) else: print_tabular( 'CREATE', user.username, xform.id_string, asset.uid, ) except Exception as e: error_information = [ 'FAIL', user.username, xform.id_string, repr(e) ] print_tabular(*error_information) logging.exception(u'sync_kobocat_xforms: {}'.format( u', '.join(error_information))) _set_auto_field_update(Asset, "date_created", True) _set_auto_field_update(Asset, "date_modified", True)
def convert_csv_to_ss_structure(csv_repr): dict_repr = dict(xls2json_backends.csv_to_dict(StringIO.StringIO(csv_repr.encode("utf-8")))) for key in dict_repr.keys(): if re.match('.*_header$', key): del dict_repr[key] return dict_repr