Beispiel #1
0
    def make_pattern(self, item_list, selected_item):
        """
        make criticize pattern
        :param item_list:
        :param selected_item:
        :return:
        """

        # attributes = self.get_attributes()
        attributes = self.__rules.keys()

        pt_keys = []
        length = 0
        pt_judged = {}
        pt_counter = Counter()

        def judge_pattern(t_value, b_value):
            _ptn = 0
            if t_value and b_value:
                if t_value > b_value:
                    _ptn = 1
                elif t_value < b_value:
                    _ptn = -1
            return _ptn

        for a in attributes:
            name = a
            selected_value = vector_utils.to_vector(name, [selected_item])[0]

            # don't use None or empty value to create criticize pattern
            if selected_value is None:
                continue

            attribute_values = vector_utils.to_vector(name, item_list)
            pt_keys.append(name)
            if length == 0:
                length = len(item_list)

            pt_judged.update({name: [judge_pattern(a_v, selected_value) for a_v in attribute_values]})

        # make pattern
        for p_index in range(0, length):
            # single pattern & multiple(combination of two attribute) pattern
            for cnt in [1, 2]:
                for combi in itertools.combinations(pt_keys, cnt):
                    ptn = ",".join(combi)
                    p_ptn = "".join(["X" if pt_judged[a_k][p_index] == 1 else "" for a_k in combi])
                    n_ptn = "".join(["X" if pt_judged[a_k][p_index] == -1 else "" for a_k in combi])
                    if len(p_ptn) == cnt:
                        pt_counter["+:" + ptn] += 1
                    if len(n_ptn) == cnt:
                        pt_counter["-:" + ptn] += 1

        # order by support rate, and define pattern by at least two count
        patterns = filter(lambda p: p[1] > 1, pt_counter.items())
        patterns = [self.pattern_type(item[0], item[1] / length) for item in patterns]
        patterns = sorted(patterns, key=lambda p: p.score)
        return patterns
 def __assert_vector_integrity(self, obj_list, target_attribute, is_print=True):
     attributes = vector_manager.to_vector(target_attribute, obj_list)
     self.assertEquals(len(obj_list), len(attributes))
     for index, value in enumerate(attributes):
         if is_print:
             print(value)
         self.assertEquals(vector_manager.to_value(getattr(obj_list[index], target_attribute)), value)
Beispiel #3
0
 def calc_near_is_better(cls, attr_name, item_list, selected_item):
     if selected_item and getattr(selected_item, attr_name):
         values = vector_utils.to_vector(attr_name, item_list)
         attr = getattr(selected_item, attr_name)
         normalized = cls.__normalize(values, prop=attr)
         normalized = [v if v is None else 1 - abs(v) for v in normalized]
         return normalized
     else:
         raise NotCalculatable("selected item's " + attr_name + " is None")
    def test_calc_more_is_better(self):
        data = self.__create_test_data()

        values = vector_manager.to_vector("reviews", data)
        max_value = max(filter(None, values))
        min_value = min(filter(None, values))
        calc_result = [(v - min_value) / (max_value - min_value) for v in values if v]

        calc_returned = ItemEvaluator.calc_more_is_better("reviews", data)

        for index, value in enumerate(calc_result):
            self.assertLess(abs(calc_result[index] - calc_returned[index]), 1 / pow(10, 5))
            print(calc_result[index])
    def test_calc_near_is_better(self):
        data = self.__create_test_data()

        values = vector_manager.to_vector("bpm", data)
        max_value = max(filter(None, values))
        min_value = min(filter(None, values))
        selected = EvaluateItem().set_params(100, 10, datetime(2010, 4, 1, 0, 0))
        calc_result = [1 - abs(v - selected.bpm) / (max_value - min_value) for v in values if v]

        calc_returned = ItemEvaluator.calc_near_is_better("bpm", data, selected)

        for index, value in enumerate(calc_result):
            self.assertLess(abs(calc_result[index] - calc_returned[index]), 1 / pow(10, 5))
            print(calc_result[index])
Beispiel #6
0
    def calc_text_token_distance(cls, attr_name, item_list, selected_item):
        if selected_item and getattr(selected_item, attr_name):
            item_tokens = vector_utils.to_vector(attr_name, item_list)
            tokens = vector_utils.to_value(getattr(selected_item, attr_name))

            clusters, vectors = vector_utils.make_text_clusters(item_tokens)
            target_vector = vector_utils.classify_text_tokens(tokens, clusters)
            distances = [vector_utils.calc_vector_distance(target_vector, v) for v in vectors]
            inv_distance = [4 if d == 0 else 1 - math.log(d) for d in distances]
            # 4 is large enough in f(x) = 1-log(x)

            return cls.normalize(inv_distance)
        else:
            raise NotCalculatable("selected item's " + attr_name + " is None")
Beispiel #7
0
 def calc_less_is_better(cls, attr_name, item_list, selected_item=None):
     values = vector_utils.to_vector(attr_name, item_list)
     return cls.__normalize(values, normalize_value_type="max")