def setUpClass(cls): super(BulkAppTranslationDownloadTest, cls).setUpClass() cls.app = Application.wrap(cls.get_json("app")) # Todo, refactor this into BulkAppTranslationTestBase.upload_raw_excel_translations file = StringIO() export_raw(cls.excel_headers, cls.excel_data, file, format=Format.XLS_2007) with tempfile.TemporaryFile(suffix='.xlsx') as f: f.write(file.getvalue()) wb_reader = WorkbookJSONReader(f) cls.expected_workbook = [{ 'name': ws.title, 'rows': list(ws) } for ws in wb_reader.worksheets]
def uploaded_file(self): try: bulk_file = self.request.FILES['bulk_upload_file'] except KeyError: raise BulkUploadCasesException(_("No files uploaded")) try: return WorkbookJSONReader(bulk_file) except InvalidExcelFileException: try: csv.DictReader(io.StringIO(bulk_file.read().decode('ascii'), newline=None)) raise BulkUploadCasesException(_("CommCare HQ no longer supports CSV upload. " "Please convert to Excel 2007 or higher (.xlsx) " "and try again.")) except UnicodeDecodeError: raise BulkUploadCasesException(_("Unrecognized format")) except JSONReaderError as e: raise BulkUploadCasesException(_('Your upload was unsuccessful. %s') % e.message)
def get_app_translation_workbook(file_or_filename): msgs = [] try: workbook = WorkbookJSONReader(file_or_filename) # todo: HeaderValueError does not belong here except (HeaderValueError, InvalidExcelFileException) as e: msgs.append( (messages.error, _(APP_TRANSLATION_UPLOAD_FAIL_MESSAGE).format(e)) ) return False, msgs except JSONReaderError as e: msgs.append( (messages.error, _( "App Translation Failed! There is an issue with Excel columns. Error details: {}." ).format(e)) ) return False, msgs return workbook, msgs
def __init__(self, task, file_ref_id): self.start = self.last_update = datetime.now() self.task = task self.progress = 0 self.total_rows = 100 if getattr(settings, 'CELERY_TASK_ALWAYS_EAGER', False): # Log progress since tasks are executed synchronously when # CELERY_TASK_ALWAYS_EAGER is true self.log = logging.getLogger(__name__).info else: self.log = lambda *a, **k: None if self.task: DownloadBase.set_progress(self.task, 0, 100) download_ref = DownloadBase.get(file_ref_id) if download_ref is None: raise UnknownFileRefException("Could not find file wih ref %s. It may have expired" % file_ref_id) self.workbook = WorkbookJSONReader(download_ref.get_filename())
def process_ui_translation_upload(app, trans_file): trans_dict = defaultdict(dict) # Use this to hard fail and not update any translations error_properties = [] # Use this to pass warnings without failing hard warnings = [] workbook = WorkbookJSONReader(trans_file) commcare_version = get_commcare_version_from_workbook(workbook.wb) try: translations = workbook.get_worksheet(title='translations') except WorksheetNotFound: error_properties.append( _('Could not find sheet "translations" in uploaded file.')) return trans_dict, error_properties, warnings commcare_ui_strings = list( load_translations('en', version=2, commcare_version=commcare_version).keys()) default_trans = get_default_translations_for_download( app, commcare_version) lang_with_defaults = app.langs[get_index_for_defaults(app.langs)] for row in translations: if row["property"] not in commcare_ui_strings: # Add a warning for unknown properties, but still add them to the translation dict warnings.append( row["property"] + " is not a known CommCare UI string, but we added it anyway") for lang in app.langs: if row.get(lang): all_parameters = re.findall(r"\$.*?}", row[lang]) for param in all_parameters: if not re.match(r"\$\{[0-9]+}", param): error_properties.append(row["property"] + ' - ' + row[lang]) if not (lang_with_defaults == lang and row[lang] == default_trans.get(row["property"], "")): trans_dict[lang].update({row["property"]: row[lang]}) return trans_dict, error_properties, warnings
def uploaded_file(self): try: bulk_file = self.request.FILES['bulk_upload_file'] if bulk_file.size > self.MAX_SIZE: raise BulkUploadCasesException(_(u"File size too large. " "Please upload file less than" " {size} Megabytes").format(size=self.MAX_SIZE / self.ONE_MB)) except KeyError: raise BulkUploadCasesException(_("No files uploaded")) try: return WorkbookJSONReader(bulk_file) except InvalidExcelFileException: try: csv.DictReader(io.StringIO(bulk_file.read().decode('utf-8'), newline=None)) raise BulkUploadCasesException(_("CommCare HQ does not support that file type." "Please convert to Excel 2007 or higher (.xlsx) " "and try again.")) except UnicodeDecodeError: raise BulkUploadCasesException(_("Unrecognized format")) except JSONReaderError as e: raise BulkUploadCasesException(_('Your upload was unsuccessful. %s') % e.message)
def clean_message_bank_file(self): value = self.cleaned_data.get("message_bank_file") if not value: raise ValidationError(_("Please choose a file.")) try: workbook = WorkbookJSONReader(value) except InvalidExcelFileException: raise ValidationError( _("Invalid format. Please convert to Excel 2007 or higher (.xlsx) and try again." )) try: worksheet = workbook.get_worksheet() except WorksheetNotFound: raise ValidationError(_("Workbook has no worksheets.")) message_ids = {} messages = [] row_num = 2 for row in worksheet: if "ID" not in row: raise ValidationError(_("Column 'ID' not found.")) if "Message" not in row: raise ValidationError(_("Column 'Message' not found.")) msg_id = row.get("ID") text = row.get("Message") try: assert isinstance(msg_id, six.string_types) msg_id = msg_id.strip() assert len(msg_id) > 1 assert msg_id[0].upper() in "ABCDEFGH" except Exception: raise ValidationError( _("Invalid ID at row %(row_num)s") % {"row_num": row_num}) if msg_id in message_ids: raise ValidationError( _("Duplicate ID at row %(row_num)s") % {"row_num": row_num}) try: assert isinstance(text, six.string_types) text = text.strip() assert len(text) > 0 except Exception: raise ValidationError( _("Invalid Message at row %(row_num)s") % {"row_num": row_num}) try: msg_id.encode("ascii") except Exception: raise ValidationError( _("ID at row %(row_num)s contains invalid character(s)") % {"row_num": row_num}) try: text.encode("ascii") except Exception: raise ValidationError( _("Message at row %(row_num)s contains invalid character(s)" ) % {"row_num": row_num}) if len(text) > 160: raise ValidationError( _("Message at row %(row_num)s is longer than 160 characters." ) % {"row_num": row_num}) messages.append({ "msg_id": msg_id, "text": text, }) message_ids[msg_id] = True row_num += 1 return messages
def test_excel_formula_reading(self): formula_filepath = self.get_path('formula_sheet', 'xlsx') workbook = WorkbookJSONReader(formula_filepath) results = list(workbook.get_worksheet('Sheet1')) self.assertEqual(results[0]['formula'], 2) # Instead of =SUM(1,1)
def post(self, request, *args, **kwargs): """View's dispatch method automatically calls this""" upload = request.FILES.get('bulk_upload_file') try: self.workbook = WorkbookJSONReader(upload) except InvalidExcelFileException: try: csv.DictReader( io.StringIO(upload.read().decode('ascii'), newline=None)) return HttpResponseBadRequest( "CommCare HQ no longer supports CSV upload. " "Please convert to Excel 2007 or higher (.xlsx) " "and try again.") except UnicodeDecodeError: return HttpResponseBadRequest("Unrecognized format") except JSONReaderError as e: messages.error(request, 'Your upload was unsuccessful. %s' % e.message) return self.get(request, *args, **kwargs) except HeaderValueError as e: return HttpResponseBadRequest( "Upload encountered a data type error: %s" % e.message) try: self.user_specs = self.workbook.get_worksheet(title='users') except WorksheetNotFound: try: self.user_specs = self.workbook.get_worksheet() except WorksheetNotFound: return HttpResponseBadRequest("Workbook has no worksheets") try: self.group_specs = self.workbook.get_worksheet(title='groups') except WorksheetNotFound: self.group_specs = [] try: check_headers(self.user_specs) except UserUploadError as e: messages.error(request, _(e.message)) return HttpResponseRedirect( reverse(UploadCommCareUsers.urlname, args=[self.domain])) # convert to list here because iterator destroys the row once it has # been read the first time self.user_specs = list(self.user_specs) for user_spec in self.user_specs: try: user_spec['username'] = enforce_string_type( user_spec['username']) except StringTypeRequiredError: messages.error( request, _("Error: Expected username to be a Text type for username {0}" ).format(user_spec['username'])) return HttpResponseRedirect( reverse(UploadCommCareUsers.urlname, args=[self.domain])) try: check_existing_usernames(self.user_specs, self.domain) except UserUploadError as e: messages.error(request, _(e.message)) return HttpResponseRedirect( reverse(UploadCommCareUsers.urlname, args=[self.domain])) try: check_duplicate_usernames(self.user_specs) except UserUploadError as e: messages.error(request, _(e.message)) return HttpResponseRedirect( reverse(UploadCommCareUsers.urlname, args=[self.domain])) task_ref = expose_cached_download(payload=None, expiry=1 * 60 * 60, file_extension=None) task = bulk_upload_async.delay( self.domain, self.user_specs, list(self.group_specs), ) task_ref.set_task(task) return HttpResponseRedirect( reverse(UserUploadStatusView.urlname, args=[self.domain, task_ref.download_id]))
def process_bulk_app_translation_upload(app, f): """ Process the bulk upload file for the given app. We return these message tuples instead of calling them now to allow this function to be used independently of request objects. :param app: :param f: :return: Returns a list of message tuples. The first item in each tuple is a function like django.contrib.messages.error, and the second is a string. """ msgs = [] headers = expected_bulk_app_sheet_headers(app) expected_sheets = {h[0]: h[1] for h in headers} processed_sheets = set() try: workbook = WorkbookJSONReader(f) # todo: HeaderValueError does not belong here except (HeaderValueError, InvalidExcelFileException) as e: msgs.append( (messages.error, _(APP_TRANSLATION_UPLOAD_FAIL_MESSAGE).format(e))) return msgs except JSONReaderError as e: msgs.append(( messages.error, _("App Translation Failed! There is an issue with excel columns. Error details: {}." ).format(e))) return msgs for sheet in workbook.worksheets: # sheet.__iter__ can only be called once, so cache the result rows = get_unicode_dicts(sheet) # CHECK FOR REPEAT SHEET if sheet.worksheet.title in processed_sheets: msgs.append( (messages.error, 'Sheet "%s" was repeated. Only the first ' + 'occurrence has been processed' % sheet.worksheet.title)) continue # CHECK FOR BAD SHEET NAME expected_columns = expected_sheets.get(sheet.worksheet.title, None) if expected_columns is None: msgs.append((messages.error, 'Skipping sheet "%s", did not recognize title' % sheet.worksheet.title)) continue # CHECK FOR MISSING KEY COLUMN if sheet.worksheet.title == "Modules and Forms": # Several columns on this sheet could be used to uniquely identify # rows. Using sheet_name for now, but unique_id could also be used. if expected_columns[1] not in sheet.headers: msgs.append( (messages.error, 'Skipping sheet "%s", could not find "%s" column' % (sheet.worksheet.title, expected_columns[1]))) continue elif expected_columns[0] == "case_property": # It's a module sheet if (expected_columns[0] not in sheet.headers or expected_columns[1] not in sheet.headers): msgs.append( (messages.error, 'Skipping sheet "%s", could not find case_property' ' or list_or_detail column.' % sheet.worksheet.title)) continue else: # It's a form sheet if expected_columns[0] not in sheet.headers: msgs.append( (messages.error, 'Skipping sheet "%s", could not find label column' % sheet.worksheet.title)) continue processed_sheets.add(sheet.worksheet.title) # CHECK FOR MISSING COLUMNS missing_cols = set(expected_columns) - set(sheet.headers) if len(missing_cols) > 0: msgs.append((messages.warning, 'Sheet "%s" has fewer columns than expected. ' 'Sheet will be processed but the following' ' translations will be unchanged: %s' % (sheet.worksheet.title, " ,".join(missing_cols)))) # CHECK FOR EXTRA COLUMNS extra_cols = set(sheet.headers) - set(expected_columns) if len(extra_cols) > 0: msgs.append(( messages.warning, 'Sheet "%s" has unrecognized columns. ' 'Sheet will be processed but ignoring the following columns: %s' % (sheet.worksheet.title, " ,".join(extra_cols)))) # NOTE: At the moment there is no missing row detection. # This could be added if we want though # (it is not that bad if a user leaves out a row) if sheet.worksheet.title == "Modules_and_forms": # It's the first sheet ms = _process_modules_and_forms_sheet(rows, app) msgs.extend(ms) elif sheet.headers[0] == "case_property": # It's a module sheet ms = _update_case_list_translations(sheet, rows, app) msgs.extend(ms) else: # It's a form sheet ms = update_form_translations(sheet, rows, missing_cols, app) msgs.extend(ms) msgs.append((messages.success, _("App Translations Updated!"))) return msgs
def get_worksheet(self, title): string_io = BytesIO() export_raw(self.headers, self.data, string_io, format=Format.XLS_2007) string_io.seek(0) workbook = WorkbookJSONReader(string_io) # __init__ will read string_io return workbook.worksheets_by_title[title]
def test_archive_forms_wrong_domain(self): uploaded_file = WorkbookJSONReader(join(BASE_PATH, BASIC_XLSX)) response = archive_forms_old('wrong_domain', 'user1', self.username, list(uploaded_file.get_worksheet())) self.assertEqual(len(response['errors']), len(self.xforms), "Error when wrong domain")