def thai_len_2(self, values, eng_fn_len): first_words = utils.map_to_words(values, i=0) title, sim = self.choose_title(first_words, 'tha') if sim > TITLE_MIN_SIM: # Title + firstname firstname, ratio = utils.most_frequent(utils.map_to_words(values, i=1), with_ratio=True) return ((title, sim), (firstname, ratio), EMPTY_FIELD) else: # Look for second words, in case of bad croping second_words = utils.map_to_words(values, i=1) title, sim = self.choose_title(second_words, 'tha') if sim > TITLE_MIN_SIM: return ((title, sim), EMPTY_FIELD, EMPTY_FIELD) # Look if title included in first word title, values_no_title = self.find_included_title(values, 'tha') values = values if title == '' else values_no_title value, ratio = utils.most_frequent_by_word(values) if eng_fn_len > 1: return ((title, TITLE_MIN_SIM), (value, ratio), EMPTY_FIELD) else: fn, ln = value.split() return ((title, TITLE_MIN_SIM), (fn, ratio), (ln, ratio))
def thai_len_1(self, values): title, sim = self.choose_title(values, 'tha') if sim > TITLE_MIN_SIM: return ((title, sim), EMPTY_FIELD, EMPTY_FIELD) name, ratio = utils.most_frequent(values, with_ratio=True) return (EMPTY_FIELD, (name, ratio), EMPTY_FIELD)
def eng_fn_len_1(self, values): title, sim = self.choose_title(values, 'eng') if sim > TITLE_MIN_SIM: return ((title, sim), EMPTY_FIELD) else: name, ratio = utils.most_frequent(values, with_ratio=True) name = ' '.join( list(filter(lambda w: len(w) > 2 or w == 'Na', name.split()))) return (EMPTY_FIELD, (name, ratio))
def eng_extract_lastname(self, eng_fn_img): values, length = self.eng_get_clean_correct_values(eng_fn_img) if length == 0: return EMPTY_FIELD elif length == 1: name, ratio = utils.most_frequent(values, with_ratio=True) else: name, ratio = utils.most_frequent_by_word(values) name = ' '.join( list(filter(lambda w: len(w) > 2 or w == 'Na', name.split()))) return name, ratio
def eng_fn_len_2_more(self, values, length): first_words = utils.map_to_words(values, i=0) title, sim = self.choose_title(first_words, 'eng') values = utils.map_to_words(values, start=1) if length == 2: name, ratio = utils.most_frequent(values, with_ratio=True) else: name, ratio = utils.most_frequent_by_word(values) name = ' '.join( list(filter(lambda w: len(w) > 2 or w == 'Na', name.split()))) return ((title, sim), (name, ratio))