def print_commons_marked(self, diff_file): """TODO decribe what this method does. Keyword arguments: diff_file -- TODO describe what this argument is about """ rows = self._parse_src() marked = Wordanalyzer.get_normalized_words(self._reader.parse(diff_file)) self.print_header_row(rows[0].get_column_format() if len(rows) > 0 else self._column_format.split('|')) for row in rows: normalized = Translator.normalize_word(row.original_word) if normalized in marked: if len(row.tags) > 0: row.tags = 'marked %s' % row.tags else: row.tags = 'marked' #end if marked.remove(normalized) print('Debug: tags = %s' % row.tags) #end if self.print_csv_row(row) #end for for word in marked: self.print_csv_row([word, '', '', '', '', '', '', 'new'])
def print_new_words_es(self): """TODO decribe what this method does.""" for row in self._parse_src(): words = set([Translator.normalize_word(word) for word in Translator.resolve_word_list(row.original_word)]) if words <= wordlist_es.WORD_COLLECTION_ES: # words is subset from WORD_COLLECTION print('Removed entry %s (Normalized: %s)' % (row.original_word, ', '.join(words))) else: self.print_csv_row(row)
def get_normalized_words(rows): return set([Translator.normalize_word(row.original_word) for row in rows])