invalid_txt = Path(target_invalid_path).open(mode='w', encoding='utf8', errors='ignore') valid_txt = Path(target_valid_path).open(mode='w', encoding='utf8', errors='ignore') with Path(source_path).open(mode='r', encoding='utf8', errors='ignore') as sample_txt: count_total = 0 count = 0 for idx, line in enumerate(sample_txt): line = line.strip() if len(line) > 0: # if not is_valid(line.replace(' ', '')) and contains_invalid_unicode_characters(line.replace(' ', '')): if contains_invalid_unicode_characters(line.replace(' ', '')): invalid_txt.write(line + '\n') count += 1 if count % 100000 == 0: print( 'Wrote %d lines in invalid file from a total of %d lines' % (count, count_total)) else: valid_txt.write(line + '\n') count_total += 1 print('Wrote %d lines in invalid file from a total of %d lines' % (count, count_total)) invalid_txt.close() valid_txt.close() sample_txt.close()