def check_file_paths_parsing_error(main, file_paths): file_paths_parsing_error = [] file_paths_pass = [] if file_paths: for file_path in file_paths: file_path = wl_misc.get_normalized_path(file_path) if os.path.splitext(file_path)[1] in [ '.txt', '.csv', '.htm', '.html', '.xml', '.tmx' ]: if main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: encoding = wl_detection.detect_encoding(main, file_path) else: encoding = main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] try: text = '' with open(file_path, 'r', encoding=encoding) as f: for line in f: text += line except: file_paths_parsing_error.append(file_path) else: file_paths_pass.append(file_path) else: file_paths_pass.append(file_path) return file_paths_pass, file_paths_parsing_error
def test_detection_encoding(file_path): file_name = os.path.basename(file_path) print(f'Detecting encoding for file "{file_name}"... ', end = '') encoding_code = wl_detection.detect_encoding(main, file_path) encoding_code_file = re.search(r'(?<=\()[^\(\)]+?(?=\)\.txt)', file_name).group() print(f'Detected: {encoding_code}') assert encoding_code == encoding_code_file
def check_files_parsing_error(main, files): files_parsing_error = [] files_ok = [] if files: # Wordless files if type(files[0]) == dict: for file in files: file_path = file['path'] if os.path.splitext(file_path)[1] in [ '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: try: with open(file_path, 'r', encoding=file['encoding']) as f: for line in f: pass except: files_parsing_error.append(file) else: files_ok.append(file) else: files_ok.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wl_misc.get_normalized_path(file_path) if os.path.splitext(file_path)[1] in [ '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: if main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding, _ = wl_detection.detect_encoding( main, file_path) else: encoding = main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] try: with open(file_path, 'r', encoding=encoding) as f: for line in f: pass except: files_parsing_error.append(file_path) else: files_ok.append(file_path) else: files_ok.append(file_path) return files_ok, files_parsing_error
def test_detection_encoding(file_name): file = {} file['path'] = f'wl_tests/files/wl_utils/wl_detection/encoding/{file_name}' file['name'] = os.path.basename(file['path']) file['encoding'] = 'utf_8' encoding_code = wl_detection.detect_encoding(main, file["path"]) encoding_code_file = re.search(r'(?<=\()[^\(\)]+?(?=\)\.txt)', file_name).group() encoding_code_file = encoding_code_file.lower() encoding_code_file = encoding_code_file.replace('-', '_') assert encoding_code == encoding_code_file
def _new_file(self, file_path): new_file = {} detection_success_encoding = True detection_success_text_type = True detection_success_lang = True new_file['selected'] = True new_file['path'] = file_path new_file['name'], _ = os.path.splitext( os.path.basename(new_file['path'])) new_file['name_old'] = new_file['name'] # Detect encodings if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: (new_file['encoding'], detection_success_encoding) = wl_detection.detect_encoding( self.main, new_file['path']) else: new_file['encoding'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] # Detect text types if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_text_types']: (new_file['text_type'], detection_success_text_type) = wl_detection.detect_text_type( self.main, new_file) else: new_file['text_type'] = self.main.settings_custom[ 'auto_detection']['default_settings']['default_text_type'] # Detect languages if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_langs']: (new_file['lang'], detection_success_lang) = wl_detection.detect_lang( self.main, new_file) else: new_file['lang'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_lang'] return (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang)
def check_file_paths_empty(main, file_paths): file_paths_empty = [] file_paths_pass = [] if file_paths: for file_path in file_paths: file_path = wl_misc.get_normalized_path(file_path) # Text files if os.path.splitext(file_path)[1] in [ '.txt', '.csv', '.htm', '.html', '.xml', '.tmx' ]: if main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: encoding = wl_detection.detect_encoding(main, file_path) else: encoding = main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] try: with open(file_path, 'r', encoding=encoding) as f: empty_file = True for line in f: if line.strip(): empty_file = False break if empty_file: file_paths_empty.append(file_path) else: file_paths_pass.append(file_path) except: file_paths_pass.append(file_path) # Other file types else: if os.stat(file_path).st_size: file_paths_pass.append(file_path) else: file_paths_empty.append(file_path) return file_paths_pass, file_paths_empty
def run(self): new_files = [] if self.file_paths: len_file_paths = len(self.file_paths) for i, file_path in enumerate(self.file_paths): self.progress_updated.emit( self.tr(f'Opening files ... ({i + 1}/{len_file_paths})')) default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) default_encoding = self.main.settings_custom['import'][ 'temp_files']['default_encoding'] file_path = wl_misc.get_normalized_path(file_path) file_name, file_ext = os.path.splitext( os.path.basename(file_path)) file_ext = file_ext.lower() # Text files if file_ext == '.txt': new_files.append(self.main.wl_files._new_file(file_path)) else: if file_ext in ['.docx', '.xlsx', '.xls']: new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) # Word documents if file_ext == '.docx': lines = [] with open(new_path, 'w', encoding=default_encoding) as f: doc = docx.Document(file_path) for block in self.iter_block_items(doc): if type(block ) == docx.text.paragraph.Paragraph: f.write(f'{block.text}\n') elif type(block) == docx.table.Table: for row in self.iter_visual_cells( block): cells = [] for cell in row: cells.append(' '.join([ item.text for item in self.iter_cell_items(cell) ])) f.write('\t'.join(cells) + '\n') # Excel workbooks elif file_ext == '.xlsx': with open(new_path, 'w', encoding=default_encoding) as f: workbook = openpyxl.load_workbook( file_path, data_only=True) for worksheet_name in workbook.sheetnames: worksheet = workbook[worksheet_name] for row in worksheet.rows: f.write('\t'.join([( cell.value if cell.value != None else '') for cell in row]) + '\n') new_paths = [new_path] else: # Detect encoding if self.main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding_code, _ = wl_detection.detect_encoding( self.main, file_path) else: encoding_code = self.main.settings_custom[ 'auto_detection']['default_settings'][ 'default_encoding'] # CSV files if file_ext == '.csv': new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: with open(file_path, 'r', newline='', encoding=encoding_code) as f_csv: csv_reader = csv.reader(f_csv) for row in csv_reader: f.write('\t'.join(row) + '\n') new_paths = [new_path] # HTML files elif file_ext in ['.htm', '.html']: with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml') new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(soup.get_text()) new_paths = [new_path] # XML files elif file_ext == '.xml': with open(file_path, 'r', encoding=encoding_code) as f: xml_text = f.read() new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.xml')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(xml_text) new_paths = [new_path] # Translation memory files elif file_ext == '.tmx': lines_src = [] lines_target = [] with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') for tu in soup.find_all('tu'): seg_src, seg_target = tu.find_all('seg') lines_src.append(seg_src.get_text()) lines_target.append(seg_target.get_text()) path_src = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_source.txt')) path_target = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_target.txt')) with open(path_src, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_src)) f.write('\n') with open(path_target, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_target)) f.write('\n') new_paths = [path_src, path_target] for new_path in new_paths: new_files.append( self.main.wl_files._new_file(new_path, txt=False)) self.main.settings_custom['import']['files'][ 'default_path'] = wl_misc.get_normalized_dir( self.file_paths[0]) self.progress_updated.emit(self.tr('Updating table ...')) time.sleep(0.1) self.worker_done.emit(new_files)
def _new_file(self, file_path, txt=True): new_file = {} detect_pass_encoding = True detect_pass_lang = True new_file['selected'] = True new_file['path'] = file_path if new_file['path'].endswith('.txt'): new_file['tokenized'] = 'No' new_file['tagged'] = 'No' elif new_file['path'].endswith('.xml'): new_file['tokenized'] = 'Yes' new_file['tagged'] = 'Yes' new_file['name'], _ = os.path.splitext( os.path.basename(new_file['path'])) new_file['name_old'] = new_file['name'] # Detect encodings if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: new_file['encoding'] = wl_detection.detect_encoding( self.main, new_file['path']) else: new_file['encoding'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] # Detect languages if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_langs']: new_file['lang'] = wl_detection.detect_lang(self.main, new_file) else: new_file['lang'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_lang'] if txt: default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) new_file['path'] = os.path.join(default_dir, re.split(r'[/\\]', file_path)[-1]) new_file['path'] = wl_checking_misc.check_new_path( new_file['path']) # Remove header tags tags_header = [] for _, _, tag_opening, _ in self.main.settings_custom['tags'][ 'tags_header']: tags_header.append(tag_opening[1:-1]) text = '' with open(file_path, 'r', encoding=new_file['encoding']) as f: for line in f: text += line # The "lxml" parser will add <html><body> to the text, which is undesirable with open(new_file['path'], 'w', encoding='utf_8') as f: soup = bs4.BeautifulSoup(text, features='html.parser') for tag_header in tags_header: for header_element in soup.select(tag_header): header_element.decompose() f.write(str(soup)) return new_file
def check_files_empty(main, files): files_empty = [] files_ok = [] if files: # Wordless files if type(files[0]) == dict: for file in files: file_path = file['path'] # Text files if os.path.splitext(file_path)[1] in [ '.txt', '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: try: with open(file_path, 'r', encoding=file['encoding']) as f: empty_file = True for line in f: if line.strip(): empty_file = False break if empty_file: files_empty.append(file) else: files_ok.append(file) except: files_ok.append(file) # Other file types else: if os.stat(file_path).st_size: files_ok.append(file) else: files_empty.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wl_misc.get_normalized_path(file_path) # Text files if os.path.splitext(file_path)[1] in [ '.txt', '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: if main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding, _ = wl_detection.detect_encoding( main, file_path) else: encoding = main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] try: with open(file_path, 'r', encoding=encoding) as f: empty_file = True for line in f: if line.strip(): empty_file = False break if empty_file: files_empty.append(file_path) else: files_ok.append(file_path) except: files_ok.append(file_path) # Other file types else: if os.stat(file_path).st_size: files_ok.append(file_path) else: files_empty.append(file_path) return files_ok, files_empty
def import_list(self, settings): files = [] if os.path.exists( self.main.settings_custom['import'][settings]['default_path']): default_dir = self.main.settings_custom['import'][settings][ 'default_path'] else: default_dir = self.main.settings_default['import'][settings][ 'default_path'] file_paths = QFileDialog.getOpenFileNames( self.main, self.tr('Import from File(s)'), default_dir, self.tr('Text File (*.txt)'))[0] if file_paths: self.main.settings_custom['import'][settings][ 'default_path'] = os.path.normpath( os.path.dirname(file_paths[0])) # Detect encodings if self.main.settings_custom['import'][settings][ 'detect_encodings']: for file_path in file_paths: files.append({ 'path': wl_misc.get_normalized_path(file_path), 'encoding': wl_detection.detect_encoding(self.main, file_path)[0] }) else: for file_path in file_paths: files.append({ 'path': wl_misc.get_normalized_path(file_path), 'encoding': self.main.settings_custom['auto_detection'] ['default_settings']['default_encoding'] }) files_ok, files_empty = wl_checking_file.check_files_empty( self.main, files) files_ok, files_decoding_error = wl_checking_file.check_files_decoding_error( self.main, files_ok) # Extract file paths files_empty = [file['path'] for file in files_empty] files_decoding_error = [ file['path'] for file in files_decoding_error ] if files_empty or files_decoding_error: wl_dialog_error.wl_dialog_error_import( self.main, files_empty=files_empty, files_decoding_error=files_decoding_error) wl_msg.wl_msg_import_list_error(self.main) else: # Check duplicate items items_to_import = [] items_cur = self.get_items() num_prev = len(items_cur) for file in files_ok: with open(file['path'], 'r', encoding=file['encoding']) as f: for line in f: line = line.strip() if line not in items_cur: items_to_import.append(line) self.load_items( collections.OrderedDict.fromkeys(items_to_import)) self.itemChanged.emit(self.item(0)) wl_msg.wl_msg_import_list_success(self.main, num_prev, len(self.get_items()))
def imp_list(self): if os.path.exists(self.main.settings_custom['imp'][self.settings] ['default_path']): default_dir = self.main.settings_custom['imp'][ self.settings]['default_path'] else: default_dir = self.main.settings_default['imp'][ self.settings]['default_path'] file_paths = QFileDialog.getOpenFileNames( self.main, _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Import from Files'), default_dir, _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Text File (*.txt)'))[0] if file_paths: # Modify default path self.main.settings_custom['imp'][ self.settings]['default_path'] = os.path.normpath( os.path.dirname(file_paths[0])) file_paths, file_paths_empty = wl_checking_files.check_file_paths_empty( self.main, file_paths) if file_paths_empty: dialog_err_files = wl_dialogs_errs.Wl_Dialog_Err_Files( self.main, _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Import Error')) dialog_err_files.label_err.set_text( _tr( 'Wl_List_Add_Ins_Del_Clr_Imp_Exp', ''' <div> An error occurred during import, please check the following files and try again. </div> ''')) dialog_err_files.table_err_files.model().setRowCount( len(file_paths_empty)) dialog_err_files.table_err_files.disable_updates() for i, file_path in enumerate(file_paths_empty): dialog_err_files.table_err_files.model().setItem( i, 0, QStandardItem( _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'Empty File'))) dialog_err_files.table_err_files.model().setItem( i, 1, QStandardItem(file_path)) dialog_err_files.table_err_files.enable_updates() dialog_err_files.open() self.main.statusBar().showMessage( _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'An error occured during import!')) else: # Check duplicate items items_to_imp = [] items_cur = self.model().stringList() num_prev = len(items_cur) for file_path in file_paths: # Detect encodings if self.main.settings_custom['imp'][ self.settings]['detect_encodings']: encoding = wl_detection.detect_encoding( self.main, file_path) else: encoding = self.main.settings_custom['imp'][ self.settings]['default_encoding'] with open(file_path, 'r', encoding=encoding, errors='replace') as f: text = f.read() for line in text.split('\n'): line = line.strip() if line and line not in items_cur: items_to_imp.append(line) self._add_items(items_to_imp) num_imps = self.model().rowCount() - num_prev msg_item = _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'item') if num_imps == 1 else _tr( 'Wl_List_Add_Ins_Del_Clr_Imp_Exp', 'items') self.main.statusBar().showMessage( _tr('Wl_List_Add_Ins_Del_Clr_Imp_Exp', '{} {} has been successfully imported into the list.'). format(num_imps, msg_item))
def run(self): new_files = [] files_detection_error_encoding = [] files_detection_error_text_type = [] files_detection_error_lang = [] if self.file_paths: len_file_paths = len(self.file_paths) for i, file_path in enumerate(self.file_paths): self.progress_updated.emit( self.tr(f'Opening files ... ({i + 1}/{len_file_paths})')) default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) default_encoding = self.main.settings_custom['import'][ 'temp_files']['default_encoding'] file_path = wl_misc.get_normalized_path(file_path) file_name, file_ext = os.path.splitext( os.path.basename(file_path)) file_ext = file_ext.lower() # Text files if file_ext == '.txt': (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wl_files._new_file(file_path) new_files.append(new_file) if not detection_success_encoding: files_detection_error_encoding.append(new_file['path']) if not detection_success_text_type: files_detection_error_text_type.append( new_file['path']) if not detection_success_lang: files_detection_error_lang.append(new_file['path']) else: if file_ext in ['.docx', '.xlsx', '.xls']: new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) # Word documents if file_ext == '.docx': lines = [] with open(new_path, 'w', encoding=default_encoding) as f: doc = docx.Document(file_path) for block in self.iter_block_items(doc): if type(block ) == docx.text.paragraph.Paragraph: f.write(f'{block.text}\n') elif type(block) == docx.table.Table: for row in self.iter_visual_cells( block): cells = [] for cell in row: cells.append(' '.join([ item.text for item in self.iter_cell_items(cell) ])) f.write('\t'.join(cells) + '\n') # Excel workbooks elif file_ext == '.xlsx': with open(new_path, 'w', encoding=default_encoding) as f: workbook = openpyxl.load_workbook( file_path, data_only=True) for worksheet_name in workbook.sheetnames: worksheet = workbook[worksheet_name] for row in worksheet.rows: f.write('\t'.join([( cell.value if cell.value != None else '') for cell in row]) + '\n') elif file_ext == '.xls': with open(new_path, 'w', encoding=default_encoding) as f: workbook = xlrd.open_workbook(file_path) for i_sheet in range(workbook.nsheets): worksheet = workbook.sheet_by_index( i_sheet) for row in range(worksheet.nrows): f.write('\t'.join([ worksheet.cell_value(row, col) for col in range(worksheet.ncols) ]) + '\n') new_paths = [new_path] else: # Detect encoding if self.main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding_code, _ = wl_detection.detect_encoding( self.main, file_path) else: encoding_code = self.main.settings_custom[ 'auto_detection']['default_settings'][ 'default_encoding'] # CSV files if file_ext == '.csv': new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: with open(file_path, 'r', newline='', encoding=encoding_code) as f_csv: csv_reader = csv.reader(f_csv) for row in csv_reader: f.write('\t'.join(row) + '\n') new_paths = [new_path] # HTML files elif file_ext in ['.htm', '.html']: with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml') new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(soup.get_text()) new_paths = [new_path] # XML files elif file_ext == '.xml': with open(file_path, 'r', encoding=encoding_code) as f: xml_text = f.read() new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(xml_text) new_paths = [new_path] # Translation memory files elif file_ext == '.tmx': lines_src = [] lines_target = [] with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') for tu in soup.find_all('tu'): seg_src, seg_target = tu.find_all('seg') lines_src.append(seg_src.get_text()) lines_target.append(seg_target.get_text()) path_src = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_source.txt')) path_target = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_target.txt')) with open(path_src, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_src)) f.write('\n') with open(path_target, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_target)) f.write('\n') new_paths = [path_src, path_target] # Lyrics files elif file_ext == '.lrc': lyrics = {} with open(file_path, 'r', encoding=encoding_code) as f: for line in f: time_tags = [] line = line.strip() # Strip time tags while re.search(r'^\[[^\]]+?\]', line): time_tags.append( re.search(r'^\[[^\]]+?\]', line).group()) line = line[len(time_tags[-1]):].strip( ) # Strip word time tags line = re.sub(r'<[^>]+?>', r'', line) line = re.sub(r'\s{2,}', r' ', line).strip() for time_tag in time_tags: if re.search( r'^\[[0-9]{2}:[0-5][0-9]\.[0-9]{2}\]$', time_tag): lyrics[time_tag] = line new_path = wl_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: for _, lyrics in sorted(lyrics.items()): f.write(f'{lyrics}\n') new_paths = [new_path] for new_path in new_paths: (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wl_files._new_file(new_path) new_files.append(new_file) if not detection_success_encoding: files_detection_error_encoding.append( new_file['path']) if not detection_success_text_type: files_detection_error_text_type.append( new_file['path']) if not detection_success_lang: files_detection_error_lang.append(new_file['path']) self.main.settings_custom['import']['files'][ 'default_path'] = wl_misc.get_normalized_dir( self.file_paths[0]) self.progress_updated.emit(self.tr('Updating table ...')) time.sleep(0.1) self.worker_done.emit(new_files, files_detection_error_encoding, files_detection_error_text_type, files_detection_error_lang)
def _new_file(self, file_path, txt=True): new_file = {} detection_success_encoding = True detection_success_text_type = True detection_success_lang = True new_file['selected'] = True new_file['path'] = file_path new_file['name'], _ = os.path.splitext( os.path.basename(new_file['path'])) new_file['name_old'] = new_file['name'] # Detect encodings if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_encodings']: (new_file['encoding'], detection_success_encoding) = wl_detection.detect_encoding( self.main, new_file['path']) else: new_file['encoding'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] # Detect text types if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_text_types']: (new_file['text_type'], detection_success_text_type) = wl_detection.detect_text_type( self.main, new_file) else: new_file['text_type'] = self.main.settings_custom[ 'auto_detection']['default_settings']['default_text_type'] # Detect languages if self.main.settings_custom['files']['auto_detection_settings'][ 'detect_langs']: (new_file['lang'], detection_success_lang) = wl_detection.detect_lang( self.main, new_file) else: new_file['lang'] = self.main.settings_custom['auto_detection'][ 'default_settings']['default_lang'] # Remove header tags tags_header_opening = [] tags_header_closing = [] if txt: default_dir = wl_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) new_file['path'] = re.sub(r'^.+?\\([^\\]+?$)', fr'{re.escape(default_dir)}\\\1', file_path) new_file['path'] = wl_checking_misc.check_new_path( new_file['path']) for tag_opening, tag_closing in self.main.settings_custom['tags'][ 'tags_header']: tags_header_opening.append(fr"{tag_opening}.+?") tags_header_closing.append(fr".+?{tag_closing}") tag_header_opening = '|'.join(tags_header_opening) tag_header_closing = '|'.join(tags_header_closing) with open(file_path, 'r', encoding=new_file['encoding']) as f, open( new_file['path'], 'w', encoding='utf_8') as f_temp: tags_header = False for line in f: if tags_header: if re.search(tag_header_closing, line): f_temp.write(re.sub(tag_header_closing, '', line)) tags_header = False elif re.search(tag_header_opening, line): f_temp.write(re.sub(tag_header_opening, '', line)) tags_header = True else: f_temp.write(line) return (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang)