def calc_text_token_distance(cls, attr_name, item_list, selected_item): if selected_item and getattr(selected_item, attr_name): item_tokens = vector_utils.to_vector(attr_name, item_list) tokens = vector_utils.to_value(getattr(selected_item, attr_name)) clusters, vectors = vector_utils.make_text_clusters(item_tokens) target_vector = vector_utils.classify_text_tokens(tokens, clusters) distances = [vector_utils.calc_vector_distance(target_vector, v) for v in vectors] inv_distance = [4 if d == 0 else 1 - math.log(d) for d in distances] # 4 is large enough in f(x) = 1-log(x) return cls.normalize(inv_distance) else: raise NotCalculatable("selected item's " + attr_name + " is None")
def test_classify_text_token(self): classified = vector_utils.make_text_clusters([["AAA", "BBB", "CCC"], ["AAA", "ZZZ"], ["aaa", "BBB", "CCC"]]) print(classified)