def handle_text(self, line_delimiter="\n"): result_text = "" delimiter = helpers.find_delimiter(self.text) if not delimiter: delimiter = helpers.find_delimiter_euristic(self.text) self.delimiter = delimiter for line in self.text.split(line_delimiter): # здесь формируется вывод if not helpers.is_string_good(line, self.delimiter): continue line = helpers.handle_line(line) words = self.split_string_to_words(line) try: eng_word, rus_word = line.split(self.delimiter)[0:2] except ValueError: logging.error("problem with delimiter: {} in line:{}".format(self.delimiter, line)) result_line = "" else: if helpers.is_english(rus_word) or helpers.is_russian(eng_word): # print("doing it!") tmp = rus_word rus_word = eng_word eng_word = tmp # print("eng:{}, rus:{}".format(eng_word, rus_word)) result_line = helpers.handle_words_or_word(words).format(eng=eng_word, rus=rus_word, tab="\t") if result_line: result_text = result_text + result_line + "\n" self.handled_text = result_text # print(self.handled_text) return result_text
def split_string_to_words(self, line): if helpers.is_string_good(line, self.delimiter): words = self.left_part_regexp.findall(line) words = [x for x in words if x not in self.set_of_popular_words] return words else: return []