def __init__(self): super().__init__() self.threads_check_updates = [] self.setWindowTitle(self.tr('Wordless')) self.setWindowIcon( QIcon(wordless_misc.get_abs_path('imgs/wordless_icon.ico'))) # Default settings wordless_settings_default.init_settings_default(self) # Custom settings path_settings = wordless_misc.get_abs_path('wordless_settings.pickle') if os.path.exists(path_settings): with open(path_settings, 'rb') as f: settings_custom = pickle.load(f) if wordless_checking_misc.check_custom_settings( settings_custom, self.settings_default): self.settings_custom = settings_custom else: self.settings_custom = copy.deepcopy(self.settings_default) else: self.settings_custom = copy.deepcopy(self.settings_default) # Global settings wordless_settings_global.init_settings_global(self) # Settings self.wordless_settings = wordless_settings.Wordless_Settings(self) # Menu self.init_menu() # Work Area & File Area self.init_central_widget() # Status Bar self.statusBar().showMessage(self.tr('Ready!')) self.statusBar().setFixedHeight(22) self.statusBar().setStyleSheet(''' QStatusBar { background-color: #D0D0D0; } ''') # Check for updates on startup if self.settings_custom['general']['update_settings'][ 'check_updates_on_startup']: self.dialog_check_updates = self.help_check_updates( on_startup=True) self.load_settings() # Fix layout on macOS if platform.system() == 'Darwin': self.fix_macos_layout(self)
def check_files_unsupported(main, files): files_unsupported = [] files_ok = [] file_exts = [ ext for file_type in main.settings_global['file_types']['files'] for ext in re.findall(r'(?<=\*)\.[a-z]+', file_type) ] if files: # Wordless files if type(files[0]) == dict: for file in files: if os.path.splitext(file['path'])[1].lower() not in file_exts: files_unsupported.append(file) else: files_ok.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wordless_misc.get_abs_path(file_path) if os.path.splitext(file_path)[1].lower() not in file_exts: files_unsupported.append(file_path) else: files_ok.append(file_path) return files_ok, files_unsupported
def __init__(self, main, title): super().__init__(main) self.main = main self.setWindowTitle(title) self.setWindowIcon( QIcon(wordless_misc.get_abs_path('imgs/wordless_icon.ico'))) self.setWindowFlag(Qt.MSWindowsFixedSizeDialogHint, True) self.setWindowFlag(Qt.WindowContextHelpButtonHint, False)
def __init__(self): super().__init__(QPixmap(wordless_misc.get_abs_path('imgs/wordless_loading.png'))) msg_font = QFont('Times New Roman') msg_font.setPixelSize(14) self.setFont(msg_font) self.showMessage( self.tr(' Loading Wordless ...\n Please wait, it should only take a few seconds.'), color = Qt.white, alignment = Qt.AlignLeft | Qt.AlignBottom )
def check_files_parsing_error(main, files): files_parsing_error = [] files_ok = [] if files: # Wordless files if type(files[0]) == dict: for file in files: file_path = file['path'] if os.path.splitext(file_path)[1] in [ '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: try: with open(file_path, 'r', encoding=file['encoding']) as f: for line in f: pass except: files_parsing_error.append(file) else: files_ok.append(file) else: files_ok.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wordless_misc.get_abs_path(file_path) if os.path.splitext(file_path)[1] in [ '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: if main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding, _ = wordless_detection.detect_encoding( main, file_path) else: encoding = main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] try: with open(file_path, 'r', encoding=encoding) as f: for line in f: pass except: files_parsing_error.append(file_path) else: files_ok.append(file_path) else: files_ok.append(file_path) return files_ok, files_parsing_error
def __init__(self, main, title, width = 0, height = 0): super().__init__(main) self.main = main if width: self.setFixedWidth(width) if height: self.setFixedHeight(height) self.setWindowTitle(title) self.setWindowIcon(QIcon(wordless_misc.get_abs_path('imgs/wordless_icon.ico'))) self.setWindowFlag(Qt.MSWindowsFixedSizeDialogHint, True) self.setWindowFlag(Qt.WindowContextHelpButtonHint, False)
def check_files_missing(main, files): files_missing = [] files_ok = [] if files: # Wordless files if type(files[0]) == dict: for file in files: if os.path.exists(file['path']): files_ok.append(file) else: files_missing.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wordless_misc.get_abs_path(file_path) if os.path.exists(file_path): files_ok.append(file_path) else: files_missing.append(file_path) return files_ok, files_missing
def check_files_duplicate(main, files): files_duplicate = [] files_ok = [] if files: # Wordless files if type(files[0]) == dict: for file in files: if main.wordless_files.find_file_by_path(file['path']): files_duplicate.append(file) else: files_ok.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wordless_misc.get_abs_path(file_path) if main.wordless_files.find_file_by_path(file_path): files_duplicate.append(file_path) else: files_ok.append(file_path) return files_ok, files_duplicate
def restart(self): if getattr(sys, '_MEIPASS', False): if platform.system() == 'Windows': subprocess.Popen([wordless_misc.get_abs_path('Wordless.exe')]) elif platform.system() == 'Darwin': subprocess.Popen([wordless_misc.get_abs_path('Wordless')]) elif platform.system() == 'Linux': subprocess.Popen([wordless_misc.get_abs_path('Wordless')]) else: if platform.system() == 'Windows': subprocess.Popen(['python', wordless_misc.get_abs_path(__file__)]) elif platform.system() == 'Darwin': subprocess.Popen(['python3', wordless_misc.get_abs_path(__file__)]) elif platform.system() == 'Linux': subprocess.Popen(['python3.7', wordless_misc.get_abs_path(__file__)]) self.save_settings() sys.exit(0)
def wordless_get_stop_words(main, lang, list_stop_words='default'): if list_stop_words == 'default': list_stop_words = main.settings_custom['stop_words']['stop_words'][ lang] lang_639_1 = wordless_conversion.to_iso_639_1(main, lang) # Chinese (Simplified) if lang_639_1 == 'zh_cn': lang_639_1 = 'zh' if 'Stopwords ISO' in list_stop_words: # Norwegian Bokmål & Norwegian Nynorsk if lang_639_1 in ['nb', 'nn']: lang_639_1 = 'no' # Chinese (Traditional) if lang_639_1 == 'zh_tw': with open(wordless_misc.get_abs_path( 'stop_words/Stopwords ISO/stop_words_zh_tw.txt'), 'r', encoding='utf_8') as f: stop_words = [line.rstrip() for line in f] else: with open(wordless_misc.get_abs_path( 'stop_words/Stopwords ISO/stopwords_iso.json'), 'r', encoding='utf_8') as f: stop_words = json.load(f)[lang_639_1] elif 'spaCy' in list_stop_words: # Chinese (Traditional) if lang_639_1 == 'zh_tw': with open(wordless_misc.get_abs_path( 'stop_words/spaCy/stop_words_zh_tw.txt'), 'r', encoding='utf_8') as f: stop_words = [line.rstrip() for line in f] else: spacy_lang = importlib.import_module(f'spacy.lang.{lang_639_1}') stop_words = spacy_lang.STOP_WORDS elif 'NLTK' in list_stop_words: lang_texts = { 'ara': 'arabic', 'aze': 'azerbaijani', 'dan': 'danish', 'nld': 'dutch', 'eng': 'english', 'fin': 'finnish', 'fra': 'french', 'deu': 'german', 'ell': 'greek', 'hun': 'hungarian', 'ind': 'indonesian', 'ita': 'italian', 'kaz': 'kazakh', 'nep': 'nepali', # Norwegian Bokmål & Norwegian Nynorsk 'nob': 'norwegian', 'nno': 'norwegian', 'por': 'portuguese', 'ron': 'romanian', 'rus': 'russian', 'spa': 'spanish', 'swe': 'swedish', 'tur': 'turkish' } stop_words = nltk.corpus.stopwords.words(lang_texts[lang]) # Greek (Ancient) elif list_stop_words == main.tr( 'grk-stoplist - Greek (Ancient) Stop Words'): with open(wordless_misc.get_abs_path( 'stop_words/grk-stoplist/stoplist-greek.txt'), 'r', encoding='utf_8') as f: stop_words = [line.rstrip() for line in f.readlines()] # Thai elif list_stop_words == main.tr('PyThaiNLP - Thai Stop Words'): stop_words = pythainlp.corpus.common.thai_stopwords() # Custom Lists elif list_stop_words == main.tr('Custom List'): stop_words = main.settings_custom['stop_words']['custom_lists'][lang] return sorted(stop_words)
def wordless_lemmatize(main, tokens, lang, text_type=('untokenized', 'untagged'), lemmatizer='default'): empty_offsets = [] mapping_lemmas = {} lemmas = [] tokens = [str(token) for token in tokens] re_tags_all = wordless_matching.get_re_tags(main, tags='all') re_tags_pos = wordless_matching.get_re_tags(main, tags='pos') re_tags_non_pos = wordless_matching.get_re_tags(main, tags='non_pos') if text_type[1] == 'tagged_both': tags = [''.join(re.findall(re_tags_all, token)) for token in tokens] tokens = [re.sub(re_tags_all, '', token) for token in tokens] elif text_type[1] == 'tagged_pos': tags = [''.join(re.findall(re_tags_pos, token)) for token in tokens] tokens = [re.sub(re_tags_pos, '', token) for token in tokens] elif text_type[1] == 'tagged_non_pos': tags = [ ''.join(re.findall(re_tags_non_pos, token)) for token in tokens ] tokens = [re.sub(re_tags_non_pos, '', token) for token in tokens] else: tags = [''] * len(tokens) # Record empty tokens for i, token in reversed(list(enumerate(tokens))): if not token.strip(): tokens.remove(token) empty_offsets.append(i) wordless_text_utils.check_lemmatizers(main, lang) if tokens and lang in main.settings_global['lemmatizers']: if lemmatizer == 'default': lemmatizer = main.settings_custom['lemmatization']['lemmatizers'][ lang] # Dutch, English, French, German, Greek (Modern), Italian, Portuguese, Spanish if 'spaCy' in lemmatizer: nlp = main.__dict__[f'spacy_nlp_{lang}'] doc = spacy.tokens.Doc(nlp.vocab, words=tokens) nlp.tagger(doc) lemmas = [token.lemma_ for token in doc] # English elif lemmatizer == main.tr('NLTK - WordNet Lemmatizer'): word_net_lemmatizer = nltk.WordNetLemmatizer() for token, pos in wordless_pos_tag( main, tokens, lang='eng', pos_tagger='NLTK - Perceptron POS Tagger', tagset='universal'): if pos == 'ADJ': lemmas.append( word_net_lemmatizer.lemmatize( token, pos=nltk.corpus.wordnet.ADJ)) elif pos in ['NOUN', 'PROPN']: lemmas.append( word_net_lemmatizer.lemmatize( token, pos=nltk.corpus.wordnet.NOUN)) elif pos == 'ADV': lemmas.append( word_net_lemmatizer.lemmatize( token, pos=nltk.corpus.wordnet.ADV)) elif pos in ['VERB', 'AUX']: lemmas.append( word_net_lemmatizer.lemmatize( token, pos=nltk.corpus.wordnet.VERB)) else: lemmas.append(word_net_lemmatizer.lemmatize(token)) # Greek (Ancient) elif lemmatizer == main.tr( 'lemmalist-greek - Greek (Ancient) Lemma List'): with open(wordless_misc.get_abs_path( 'lemmatization/lemmalist-greek/lemmalist-greek.txt'), 'r', encoding='utf_8') as f: for line in f.readlines(): line = line.rstrip() if line: lemma, *words = line.split() for word in words: mapping_lemmas[word] = lemma # Russian & Ukrainian elif lemmatizer == main.tr('pymorphy2 - Morphological Analyzer'): if lang == 'rus': morphological_analyzer = pymorphy2.MorphAnalyzer(lang='ru') else: morphological_analyzer = pymorphy2.MorphAnalyzer(lang='uk') for token in tokens: lemmas.append( morphological_analyzer.parse(token)[0].normal_form) # Tibetan elif lemmatizer == main.tr('pybo - Tibetan Lemmatizer'): word_tokenizer = main.settings_custom['word_tokenization'][ 'word_tokenizers'][lang] wordless_text_utils.check_pybo_tokenizers( main, word_tokenizer=word_tokenizer) if word_tokenizer == main.tr( 'pybo - Tibetan Word Tokenizer (GMD)'): tokens = main.pybo_tokenizer_gmd.tokenize(' '.join(tokens)) elif word_tokenizer == main.tr( 'pybo - Tibetan Word Tokenizer (POS)'): tokens = main.pybo_tokenizer_pos.tokenize(' '.join(tokens)) elif word_tokenizer == main.tr( 'pybo - Tibetan Word Tokenizer (tsikchen)'): tokens = main.pybo_tokenizer_tsikchen.tokenize( ' '.join(tokens)) for token in tokens: if token.lemma: lemmas.append(token.lemma) else: lemmas.append(token.text) # Other Languages elif 'Lemmatization Lists' in lemmatizer: lang = wordless_conversion.to_iso_639_1(main, lang) with open(wordless_misc.get_abs_path( f'lemmatization/Lemmatization Lists/lemmatization-{lang}.txt' ), 'r', encoding='utf_8_sig') as f: for line in f: try: lemma, word = line.rstrip().split('\t') mapping_lemmas[word] = lemma except: pass else: lemmas = tokens if mapping_lemmas: lemmas = [mapping_lemmas.get(token, token) for token in tokens] # Insert empty lemmas for empty_offset in empty_offsets: lemmas.insert(empty_offset, '') return [lemma + tag for lemma, tag in zip(lemmas, tags)]
def wordless_get_stop_words(main, lang, list_stop_words='default'): if list_stop_words == 'default': list_stop_words = main.settings_custom['stop_words']['stop_words'][ lang] lang_639_1 = wordless_conversion.to_iso_639_1(main, lang) # Chinese (Simplified) if lang_639_1 == 'zh_cn': lang_639_1 = 'zh' # extra-stopwords if 'extra-stopwords' in list_stop_words: LANG_TEXTS = { 'sqi': 'albanian', 'ara': 'arabic', 'hye': 'armenian', 'eus': 'basque', 'bel': 'belarusian', 'ben': 'bengali', 'bul': 'bulgarian', 'cat': 'catalan', 'zho_cn': 'chinese', # Chinese (Traditional) 'zho_tw': 'chinese-traditional', 'hrv': 'croatian', 'ces': 'czech', 'dan': 'danish', 'nld': 'dutch', 'eng': 'english', 'est': 'estonian', 'fin': 'finnish', 'fra': 'french', 'glg': 'galician', 'deu': 'german', 'ell': 'greek', 'hau': 'hausa', 'heb': 'hebrew', 'hin': 'hindi', 'hun': 'hungarian', 'isl': 'icelandic', 'ind': 'indonesian', 'gle': 'irish', 'ita': 'italian', 'jpn': 'japanese', 'kor': 'korean', 'kur': 'kurdish', 'lav': 'latvian', 'lit': 'lithuanian', 'msa': 'malay', 'mar': 'marathi', 'mon': 'mongolian', 'nep': 'nepali', # Norwegian Bokmål & Norwegian Nynorsk 'nob': 'norwegian', 'nno': 'norwegian', 'fas': 'persian', 'pol': 'polish', 'por': 'portuguese', 'ron': 'romanian', 'rus': 'russian', 'srp_cyrl': 'serbian-cyrillic', 'srp_latn': 'serbian', 'slk': 'slovak', 'slv': 'slovenian', 'spa': 'spanish', 'swa': 'swahili', 'swe': 'swedish', 'tgl': 'tagalog', 'tel': 'telugu', 'tha': 'thai', 'tur': 'turkish', 'ukr': 'ukranian', 'urd': 'urdu', 'vie': 'vietnamese', 'yor': 'yoruba' } with open(wordless_misc.get_abs_path( f'stop_words/extra-stopwords/{LANG_TEXTS[lang]}'), 'r', encoding='utf_8') as f: stop_words = [ line.rstrip() for line in f if not line.startswith('#') ] # NLTK elif 'NLTK' in list_stop_words: LANG_TEXTS = { 'ara': 'arabic', 'aze': 'azerbaijani', 'dan': 'danish', 'nld': 'dutch', 'eng': 'english', 'fin': 'finnish', 'fra': 'french', 'deu': 'german', 'ell': 'greek', 'hun': 'hungarian', 'ind': 'indonesian', 'ita': 'italian', 'kaz': 'kazakh', 'nep': 'nepali', # Norwegian Bokmål & Norwegian Nynorsk 'nob': 'norwegian', 'nno': 'norwegian', 'por': 'portuguese', 'ron': 'romanian', 'rus': 'russian', 'slv': 'slovene', 'spa': 'spanish', 'swe': 'swedish', 'tgk': 'tajik', 'tur': 'turkish' } stop_words = nltk.corpus.stopwords.words(LANG_TEXTS[lang]) # spaCy elif 'spaCy' in list_stop_words: # Chinese (Traditional) if lang_639_1 == 'zh_tw': with open(wordless_misc.get_abs_path( 'stop_words/spaCy/stop_words_zh_tw.txt'), 'r', encoding='utf_8') as f: stop_words = [line.rstrip() for line in f] else: # Serbian (Cyrillic) & Serbian (Latin) if lang_639_1 == 'sr_cyrl': spacy_lang = importlib.import_module('spacy.lang.sr') stop_words = spacy_lang.STOP_WORDS elif lang_639_1 == 'sr_latn': spacy_lang = importlib.import_module('spacy.lang.sr') stop_words = spacy_lang.STOP_WORDS stop_words = wordless_text_utils.to_srp_latn(stop_words) else: spacy_lang = importlib.import_module( f'spacy.lang.{lang_639_1}') stop_words = spacy_lang.STOP_WORDS # Stopwords ISO elif 'Stopwords ISO' in list_stop_words: # Norwegian Bokmål & Norwegian Nynorsk if lang_639_1 in ['nb', 'nn']: lang_639_1 = 'no' # Chinese (Traditional) if lang_639_1 == 'zh_tw': with open(wordless_misc.get_abs_path( 'stop_words/Stopwords ISO/stop_words_zh_tw.txt'), 'r', encoding='utf_8') as f: stop_words = [line.rstrip() for line in f] else: with open(wordless_misc.get_abs_path( 'stop_words/Stopwords ISO/stopwords_iso.json'), 'r', encoding='utf_8') as f: stop_words = json.load(f)[lang_639_1] # Greek (Ancient) elif list_stop_words == main.tr( 'grk-stoplist - Greek (Ancient) Stop Words'): with open(wordless_misc.get_abs_path( 'stop_words/grk-stoplist/stoplist-greek.txt'), 'r', encoding='utf_8') as f: stop_words = [line.rstrip() for line in f.readlines()] # Thai elif list_stop_words == main.tr('PyThaiNLP - Thai Stop Words'): stop_words = pythainlp.corpus.common.thai_stopwords() # Custom Lists elif list_stop_words == main.tr('Custom List'): stop_words = main.settings_custom['stop_words']['custom_lists'][lang] # Remove empty tokens stop_words = [stop_word for stop_word in stop_words if stop_word] return sorted(set(stop_words))
def add_files(self): new_files = [] files_detection_failed_encoding = [] files_detection_failed_text_type = [] files_detection_failed_lang = [] if self.file_paths: len_file_paths = len(self.file_paths) for i, file_path in enumerate(self.file_paths): self.progress_updated.emit( self.tr(f'Loading files ... ({i + 1}/{len_file_paths})')) default_dir = wordless_checking_misc.check_dir( self.main.settings_custom['import']['temp_files'] ['default_path']) default_encoding = self.main.settings_custom['import'][ 'temp_files']['default_encoding'] file_path = wordless_misc.get_abs_path(file_path) file_name, file_ext = os.path.splitext( os.path.basename(file_path)) file_ext = file_ext.lower() # Text Files if file_ext == '.txt': (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wordless_files._new_file(file_path) new_files.append(new_file) if not detection_success_encoding: files_detection_failed_encoding.append( new_file['path']) if not detection_success_text_type: files_detection_failed_text_type.append( new_file['path']) if not detection_success_lang: files_detection_failed_lang.append(new_file['path']) else: if file_ext in ['.docx', '.xlsx', '.xls']: new_path = wordless_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) # Word Documents if file_ext == '.docx': lines = [] with open(new_path, 'w', encoding=default_encoding) as f: doc = docx.Document(file_path) for block in self.iter_block_items(doc): if type(block ) == docx.text.paragraph.Paragraph: f.write(f'{block.text}\n') elif type(block) == docx.table.Table: for row in self.iter_visual_cells( block): cells = [] for cell in row: cells.append(' '.join([ item.text for item in self.iter_cell_items(cell) ])) f.write('\t'.join(cells) + '\n') # Excel Workbooks elif file_ext == '.xlsx': with open(new_path, 'w', encoding=default_encoding) as f: workbook = openpyxl.load_workbook( file_path, data_only=True) for worksheet_name in workbook.sheetnames: worksheet = workbook[worksheet_name] for row in worksheet.rows: f.write('\t'.join([( cell.value if cell.value != None else '') for cell in row]) + '\n') elif file_ext == '.xls': with open(new_path, 'w', encoding=default_encoding) as f: workbook = xlrd.open_workbook(file_path) for i_sheet in range(workbook.nsheets): worksheet = workbook.sheet_by_index( i_sheet) for row in range(worksheet.nrows): f.write('\t'.join([ worksheet.cell_value(row, col) for col in range(worksheet.ncols) ]) + '\n') new_paths = [new_path] else: # Detect encoding if self.main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding_code, _ = wordless_detection.detect_encoding( self.main, file_path) else: encoding_code = self.main.settings_custom[ 'encoding_detection']['default_settings'][ 'default_encoding'] # CSV Files if file_ext == '.csv': new_path = wordless_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: with open(file_path, 'r', newline='', encoding=encoding_code) as f_csv: csv_reader = csv.reader(f_csv) for row in csv_reader: f.write('\t'.join(row) + '\n') new_paths = [new_path] # HTML Files elif file_ext in ['.htm', '.html']: with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml') new_path = wordless_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: f.write(soup.get_text()) new_paths = [new_path] # Translation Memory Files elif file_ext == '.tmx': lines_src = [] lines_target = [] with open(file_path, 'r', encoding=encoding_code) as f: soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') for tu in soup.find_all('tu'): seg_src, seg_target = tu.find_all('seg') lines_src.append(seg_src.get_text()) lines_target.append(seg_target.get_text()) path_src = wordless_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_source.txt')) path_target = wordless_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}_target.txt')) with open(path_src, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_src)) f.write('\n') with open(path_target, 'w', encoding=default_encoding) as f: f.write('\n'.join(lines_target)) f.write('\n') new_paths = [path_src, path_target] # Lyrics Files elif file_ext == '.lrc': lyrics = {} with open(file_path, 'r', encoding=encoding_code) as f: for line in f: time_tags = [] line = line.strip() # Strip time tags while re.search(r'^\[[^\]]+?\]', line): time_tags.append( re.search(r'^\[[^\]]+?\]', line).group()) line = line[len(time_tags[-1]):].strip( ) # Strip word time tags line = re.sub(r'<[^>]+?>', r'', line) line = re.sub(r'\s{2,}', r' ', line).strip() for time_tag in time_tags: if re.search( r'^\[[0-9]{2}:[0-5][0-9]\.[0-9]{2}\]$', time_tag): lyrics[time_tag] = line new_path = wordless_checking_misc.check_new_path( os.path.join(default_dir, f'{file_name}.txt')) with open(new_path, 'w', encoding=default_encoding) as f: for _, lyrics in sorted(lyrics.items()): f.write(f'{lyrics}\n') new_paths = [new_path] for new_path in new_paths: (new_file, detection_success_encoding, detection_success_text_type, detection_success_lang ) = self.main.wordless_files._new_file(new_path) new_files.append(new_file) if not detection_success_encoding: files_detection_failed_encoding.append( new_file['path']) if not detection_success_text_type: files_detection_failed_text_type.append( new_file['path']) if not detection_success_lang: files_detection_failed_lang.append( new_file['path']) self.main.settings_custom['import']['files'][ 'default_path'] = wordless_misc.get_abs_path( os.path.dirname(self.file_paths[0])) self.files_added.emit(new_files, files_detection_failed_encoding, files_detection_failed_text_type, files_detection_failed_lang)
def get_path(file_name): return wordless_misc.get_abs_path(f'wordless_tests/files/checking/{file_name}')
def import_list(self, settings): files = [] if os.path.exists(self.main.settings_custom['import'][settings]['default_path']): default_dir = self.main.settings_custom['import'][settings]['default_path'] else: default_dir = self.main.settings_default['import'][settings]['default_path'] file_paths = QFileDialog.getOpenFileNames(self.main, self.tr('Import from File(s)'), default_dir, self.tr('Text File (*.txt)'))[0] if file_paths: self.main.settings_custom['import'][settings]['default_path'] = os.path.normpath(os.path.dirname(file_paths[0])) # Detect encodings if self.main.settings_custom['import'][settings]['detect_encodings']: for file_path in file_paths: files.append({ 'path': wordless_misc.get_abs_path(file_path), 'encoding': wordless_detection.detect_encoding(self.main, file_path)[0] }) else: for file_path in file_paths: files.append({ 'path': wordless_misc.get_abs_path(file_path), 'encoding': self.main.settings_custom['auto_detection']['default_settings']['default_encoding'] }) files_ok, files_empty = wordless_checking_file.check_files_empty(self.main, files) files_ok, files_decoding_error = wordless_checking_file.check_files_decoding_error(self.main, files_ok) # Extract file paths files_empty = [file['path'] for file in files_empty] files_decoding_error = [file['path'] for file in files_decoding_error] if files_empty or files_decoding_error: wordless_dialog_error.wordless_dialog_error_import(self.main, files_empty = files_empty, files_decoding_error = files_decoding_error) wordless_msg.wordless_msg_import_list_error(self.main) else: # Check duplicate items items_to_import = [] items_cur = self.get_items() num_prev = len(items_cur) for file in files_ok: with open(file['path'], 'r', encoding = file['encoding']) as f: for line in f: line = line.strip() if line not in items_cur: items_to_import.append(line) self.load_items(collections.OrderedDict.fromkeys(items_to_import)) self.itemChanged.emit(self.item(0)) wordless_msg.wordless_msg_import_list_success(self.main, num_prev, len(self.get_items()))
def check_files_empty(main, files): files_empty = [] files_ok = [] if files: # Wordless files if type(files[0]) == dict: for file in files: file_path = file['path'] # Text files if os.path.splitext(file_path)[1] in [ '.txt', '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: try: with open(file_path, 'r', encoding=file['encoding']) as f: empty_file = True for line in f: if line.strip(): empty_file = False break if empty_file: files_empty.append(file) else: files_ok.append(file) except: files_ok.append(file) # Other file types else: if os.stat(file_path).st_size: files_ok.append(file) else: files_empty.append(file) # File paths elif type(files[0]) == str: for file_path in files: file_path = wordless_misc.get_abs_path(file_path) # Text files if os.path.splitext(file_path)[1] in [ '.txt', '.csv', '.htm', '.html', '.xml', '.tmx', '.lrc' ]: if main.settings_custom['files'][ 'auto_detection_settings']['detect_encodings']: encoding, _ = wordless_detection.detect_encoding( main, file_path) else: encoding = main.settings_custom['auto_detection'][ 'default_settings']['default_encoding'] try: with open(file_path, 'r', encoding=encoding) as f: empty_file = True for line in f: if line.strip(): empty_file = False break if empty_file: files_empty.append(file_path) else: files_ok.append(file_path) except: files_ok.append(file_path) # Other file types else: if os.stat(file_path).st_size: files_ok.append(file_path) else: files_empty.append(file_path) return files_ok, files_empty