def add_upload_file(self, raw_file_string: bytes, file_name: str): """Detects (and applies) the encoding type of the file's contents. Since chardet runs slow, initially detects (only) MIN_ENCODING_DETECT chars; if that fails, chardet entire file for a fuller test :param raw_file_string: the file you want to detect the encoding :param file_name: name of the file """ decoded_file_string = general_functions.decode_bytes( raw_bytes=raw_file_string) # Line encodings: # \n Unix, OS X # \r Mac OS 9 # \r\n Win. CR+LF # The following block converts everything to '\n' # "\r\n" -> '\n' if "\r\n" in decoded_file_string[:constants.MIN_NEWLINE_DETECT]: decoded_file_string = decoded_file_string.replace('\r', '') # '\r' -> '\n' if '\r' in decoded_file_string[:constants.MIN_NEWLINE_DETECT]: decoded_file_string = decoded_file_string.replace('\r', '\n') # Add the file to the FileManager self.add_file(file_name, file_name, decoded_file_string)
def prepare_additional_options(opt_uploads: Dict[str, FileStorage], storage_options: List[str], storage_folder: str, storage_filenames: List[str]) -> List[str]: """Gathers all the strings used by the "Additional Options" scrub section. :param opt_uploads: A dictionary (specifically ImmutableMultiDict) containing the additional scrubbing option files that have been uploaded. :param storage_options: A list of strings representing additional options that have been chosen by the user. :param storage_folder: A string representing the path of the storage folder. :param storage_filenames: A list of filename strings that will be used to load and save the user's selections. :return: An array containing strings of all the additional scrubbing option text fields and files. """ file_strings = { 'consolidations_file[]': '', 'lemmas_file[]': '', 'special_characters_file[]': '', 'stop_words_file[]': '', 'consolidations': '', 'lemmas': '', 'special_characters': '', 'stop_words': '' } for index, key in enumerate(sorted(opt_uploads)): if opt_uploads[key].filename: file_content = opt_uploads[key].read() file_strings[key] = general_functions.decode_bytes(file_content) opt_uploads[key].seek(0) elif key.strip('[]') in storage_options: file_strings[key] = load_scrub_optional_upload( storage_folder, storage_filenames[index]) else: session['scrubbingoptions']['file_uploads'][key] = '' file_strings[key] = "" # Create an array of option strings: # cons_file_string, lem_file_string, sc_file_string, sw_kw_file_string, # cons_manual, lem_manual, sc_manual, and sw_kw_manual all_options = [ file_strings.get('consolidations_file[]'), file_strings.get('lemmas_file[]'), file_strings.get('special_characters_file[]'), file_strings.get('stop_words_file[]'), request.form['consolidations'], request.form['lemmas'], request.form['special_characters'], request.form['stop_words'] ] return all_options
def prepare_additional_options(opt_uploads: Dict[str, FileStorage], storage_options: List[str], storage_folder: str, storage_filenames: List[str]) -> List[str]: """Gathers all the strings used by the "Additional Options" scrub section. :param opt_uploads: A dictionary (specifically ImmutableMultiDict) containing the additional scrubbing option files that have been uploaded. :param storage_options: A list of strings representing additional options that have been chosen by the user. :param storage_folder: A string representing the path of the storage folder. :param storage_filenames: A list of filename strings that will be used to load and save the user's selections. :return: An array containing strings of all the additional scrubbing option text fields and files. """ file_strings = {'consfileselect[]': '', 'lemfileselect[]': '', 'scfileselect[]': '', 'swfileselect[]': '', 'manualconsolidations': '', 'manuallemmas': '', 'manualspecialchars': '', 'manualstopwords': ''} for index, key in enumerate(sorted(opt_uploads)): if opt_uploads[key].filename: file_content = opt_uploads[key].read() file_strings[key] = general_functions.decode_bytes(file_content) opt_uploads[key].seek(0) elif key.strip('[]') in storage_options: file_strings[key] = load_scrub_optional_upload( storage_folder, storage_filenames[index]) else: session['scrubbingoptions']['optuploadnames'][key] = '' file_strings[key] = "" # Create an array of option strings: # cons_file_string, lem_file_string, sc_file_string, sw_kw_file_string, # cons_manual, lem_manual, sc_manual, and sw_kw_manual all_options = [file_strings.get('consfileselect[]'), file_strings.get('lemfileselect[]'), file_strings.get('scfileselect[]'), file_strings.get('swfileselect[]'), request.form['manualconsolidations'], request.form['manuallemmas'], request.form['manualspecialchars'], request.form['manualstopwords']] return all_options
def test_python_string_decoding(self): python_string = "Hello, world!" assert decode_bytes(python_string) == python_string
def test_windows_1251_decoding(self): input_str = 'сегодняшнее домашнее задание.' \ ' Настенные часы висят на стене. ' assert decode_bytes(input_str.encode('windows-1251')) == input_str
def test_iso8859_1_decoding(self): assert decode_bytes('Äpple'.encode('iso-8859-1')) == 'Äpple'
def test_utf8_decoding(self): assert decode_bytes(u'España'.encode('utf-8')) == 'España'
def test_utf16_decoding(self): assert decode_bytes(u'абвгдежзийкл'.encode('utf-16')) == 'абвгдежзийкл'
def test_gb2312_decoding(self): assert decode_bytes(u'做戏之说做戏之'.encode('gb2312')) == '做戏之说做戏之'