Example #1
0
    def predict(self, tupla, prediction={}, w=1):
        # Si es que es el nodo raiz
        if len(prediction.keys()) == 0:
            prediction = {c: 0.0 for c in self.data['class'].unique()}

        if self.is_leaf:
            aux = deepcopy(prediction)
            aux[self.clase] += w
            return aux

        # Puede que falte chequear casos bordes, al igual que lo hago en get_menores y get_mayores
        else:
            feature_name = self.feat_name.replace('.mean', '')
            mean = tupla[feature_name + '.mean']
            std = tupla[feature_name + '.std']
            l = tupla[feature_name + '.l']
            r = tupla[feature_name + '.r']
            pivote = self.feat_value

            w_left = min(w * pyRF_prob.cdf(pivote, mean, std, l, r), 1)
            w_right = min(w * (1 - pyRF_prob.cdf(pivote, mean, std, l, r)), 1)

            a = self.right.predict(tupla, prediction, w_right)
            b = self.left.predict(tupla, prediction, w_left)

            # Tengo que retornar la suma elementwise de los diccionarios a y b
            return {key: a[key] + b[key] for key in a}
Example #2
0
    def predict(self, tupla, prediction={}, w=1):
        # Si es que es el nodo raiz
        if len(prediction.keys()) == 0:
            prediction = {c: 0.0 for c in self.data['class'].unique()}

        if self.is_leaf:
            aux = deepcopy(prediction)
            aux[self.clase] += w
            return aux

        # Puede que falte chequear casos bordes, al igual que lo hago en get_menores y get_mayores
        else:
            mean = tupla[feature_name + '.mean']
            std = tupla[feature_name + '.std']
            l = tupla[feature_name + '.l']
            r = tupla[feature_name + '.r']
            pivote = self.feat_value

            w_left = min(w * pyRF_prob.cdf(pivote, mean, std, l, r), 1)
            w_right = min(w * (1 - pyRF_prob.cdf(pivote, mean, std, l, r)), 1)

            a = self.right.predict(tupla, prediction, w_right)
            b = self.left.predict(tupla, prediction, w_left)

            # Tengo que retornar la suma elementwise de los diccionarios a y b
            return {key: a[key] + b[key] for key in a}
Example #3
0
    def test_split_at_right_border(self):
        feature_mass = pyRF_prob.cdf(8, 5, 1, 2, 8)
        self.assertEqual(feature_mass, 1)

        feature_mass = pyRF_prob.cdf(5, 5, 1, 2, 5)
        self.assertEqual(feature_mass, 1)

        feature_mass = pyRF_prob.cdf(4, 5, 1, 2, 4)
        self.assertEqual(feature_mass, 1)
Example #4
0
    def test_split_at_right_border(self):
        feature_mass = pyRF_prob.cdf(8, 5, 1, 2, 8)
        self.assertEqual(feature_mass, 1)

        feature_mass = pyRF_prob.cdf(5, 5, 1, 2, 5)
        self.assertEqual(feature_mass, 1)

        feature_mass = pyRF_prob.cdf(4, 5, 1, 2, 4)
        self.assertEqual(feature_mass, 1)
Example #5
0
    def test_split_at_left_border(self):
        # pyRF_prob.cdf(pivote, mean, std, left_bound, right_bound)
        feature_mass = pyRF_prob.cdf(2, 5, 1, 2, 8)
        self.assertEqual(feature_mass, 0)

        feature_mass = pyRF_prob.cdf(5, 5, 1, 5, 8)
        self.assertEqual(feature_mass, 0)

        feature_mass = pyRF_prob.cdf(6, 5, 1, 6, 8)
        self.assertEqual(feature_mass, 0)
Example #6
0
    def test_split_at_left_border(self):
        # pyRF_prob.cdf(pivote, mean, std, left_bound, right_bound)
        feature_mass = pyRF_prob.cdf(2, 5, 1, 2, 8)
        self.assertEqual(feature_mass, 0)

        feature_mass = pyRF_prob.cdf(5, 5, 1, 5, 8)
        self.assertEqual(feature_mass, 0)

        feature_mass = pyRF_prob.cdf(6, 5, 1, 6, 8)
        self.assertEqual(feature_mass, 0)
Example #7
0
def split_tuples_by_pivot(w_list, mean_list, std_list, left_bound_list, right_bound_list,
                          class_list, pivote):
        """divides a group of data according to a pivot

        It operates along all the data. And then returns two dictionaries with the total sum
        of the mass separated by class.

        Returns:
            menores: Dictionary for the data thats inferior than the pivot
            mayores: Dictionary for the data thats superior to the pivot
        """
        clip = lambda x, l, r: l if x < l else r if x > r else x

        clases = set(class_list)
        menores = {c: 0.0 for c in clases}
        mayores = {c: 0.0 for c in clases}

        for i in xrange(len(class_list)):
            cum_prob = pyRF_prob.cdf(pivote, mean_list[i], std_list[i], left_bound_list[i],
                                     right_bound_list[i])

            cum_prob = clip(cum_prob, 0, 1)

            menores[class_list[i]] += w_list[i] * cum_prob
            mayores[class_list[i]] += w_list[i] * (1 - cum_prob)

        return menores, mayores
Example #8
0
def split_tuples_by_pivot(w_list, mean_list, std_list, left_bound_list,
                          right_bound_list, class_list, pivote):
    """divides a group of data according to a pivot

        It operates along all the data. And then returns two dictionaries with the total sum
        of the mass separated by class.

        Returns:
            menores: Dictionary for the data thats inferior than the pivot
            mayores: Dictionary for the data thats superior to the pivot
        """
    clip = lambda x, l, r: l if x < l else r if x > r else x

    clases = set(class_list)
    menores = {c: 0.0 for c in clases}
    mayores = {c: 0.0 for c in clases}

    for i in xrange(len(class_list)):
        cum_prob = pyRF_prob.cdf(pivote, mean_list[i], std_list[i],
                                 left_bound_list[i], right_bound_list[i])

        cum_prob = clip(cum_prob, 0, 1)

        menores[class_list[i]] += w_list[i] * cum_prob
        mayores[class_list[i]] += w_list[i] * (1 - cum_prob)

    return menores, mayores
Example #9
0
    def get_weight(self, tupla, pivote, feature_name, how):
        """ Determina la distribucion de probabilidad gaussiana acumulada entre dos bordes.

        pivote: valor de corte
        how: determina si la probabilidad se calcula desde l hasta pivote o desde pivote hasta r
                -> mayor: probabilidad de caer entre pivote y limite right_bound
                -> menor: probabilidad de caer entre left_bound y pivote
        """

        left_bound = tupla[feature_name + '.l']
        right_bound = tupla[feature_name + '.r']

        if left_bound >= pivote and how == 'mayor' or right_bound <= pivote and how == 'menor':
            return tupla

        else:
            w = tupla['weight']
            mean = tupla[feature_name + '.mean']
            std = tupla[feature_name + '.std']

            feature_mass = pyRF_prob.cdf(pivote, mean, std, left_bound,
                                         right_bound)

            if math.isnan(feature_mass):
                if pivote > right_bound:
                    if how == 'menor':
                        feature_mass = 1.0
                    else:
                        feature_mass = 0.0
                else:
                    if how == 'menor':
                        feature_mass = 0.0
                    else:
                        feature_mass = 1.0

            if how == 'menor':
                if (feature_mass >= self.min_mass_threshold):
                    tupla['weight'] = min(w * feature_mass, 1)
                else:
                    tupla['weight'] = 0
                # tupla[feature_name+'.r'] = min(pivote, tupla[feature_name + '.r'])
                tupla[feature_name + '.r'] = pivote
                return tupla

            elif how == 'mayor':
                feature_mass = 1 - feature_mass
                if (feature_mass >= self.min_mass_threshold):
                    tupla['weight'] = min(w * feature_mass, 1)
                else:
                    tupla['weight'] = 0
                # tupla[feature_name+'.l'] = max(pivote, tupla[feature_name + '.l'])
                tupla[feature_name + '.l'] = pivote
                return tupla
Example #10
0
    def predict(self, tupla, prediction={}, w=1.0):
        # Si es que es el nodo raiz
        if len(prediction.keys()) == 0:
            prediction = {c: 0.0 for c in self.classes}

        if self.is_leaf:
            aux = deepcopy(prediction)
            aux[self.clase] += w
            return aux

        # Puede que falte chequear casos bordes, al igual que lo hago en get_menores y get_mayores
        else:
            feature_name = self.feat_name.replace('.mean', '')
            mean = tupla[feature_name + '.mean']
            std = tupla[feature_name + '.std']
            l = tupla[feature_name + '.l']
            r = tupla[feature_name + '.r']
            pivote = self.feat_value

            aux_mass = pyRF_prob.cdf(pivote, mean, std, l, r)

            # MAL FIX
            if math.isnan(aux_mass):
                if pivote > r:
                    aux_mass = 1.0
                else:
                    aux_mass = 0.0

            clip = lambda hi, lo, x: lo if x <= lo else hi if x >= hi else x
            aux_mass = clip(1, 0, aux_mass)

            w_left = w * aux_mass
            w_right = w * (1.0 - aux_mass)

            print str(aux_mass) + ' ' + str(1.0 - aux_mass)

            a = self.right.predict(tupla, prediction, w_right)
            b = self.left.predict(tupla, prediction, w_left)

            # Tengo que retornar la suma elementwise de los diccionarios a y b
            return {key: a[key] + b[key] for key in a}
Example #11
0
    def get_weight(self, tupla, pivote, feature_name, how):
        """ Determina la distribucion de probabilidad gaussiana acumulada entre dos bordes.

        pivote: valor de corte
        how: determina si la probabilidad se calcula desde l hasta pivote o desde pivote hasta r
        """

        left_bound = tupla[feature_name + '.l']
        right_bound = tupla[feature_name + '.r']

        if left_bound >= pivote and how == 'mayor' or right_bound <= pivote and how == 'menor':
            return tupla

        else:
            w = tupla['weight']
            mean = tupla[feature_name + '.mean']
            std = tupla[feature_name + '.std']

            feature_mass = pyRF_prob.cdf(pivote, mean, std, left_bound, right_bound)

            if how == 'menor':
                if (feature_mass >= self.min_mass_threshold):
                    tupla['weight'] = min(w * feature_mass, 1)
                else:
                    tupla['weight'] = 0
                # tupla[feature_name+'.r'] = min(pivote, tupla[feature_name + '.r'])
                tupla[feature_name + '.r'] = pivote
                return tupla

            elif how == 'mayor':
                feature_mass = 1 - feature_mass
                if (feature_mass >= self.min_mass_threshold):
                    tupla['weight'] = min(w * feature_mass, 1)
                else:
                    tupla['weight'] = 0
                # tupla[feature_name+'.l'] = max(pivote, tupla[feature_name + '.l'])
                tupla[feature_name + '.l'] = pivote
                return tupla
Example #12
0
    def split_tuples_by_pivot(self, w_list, mean_list, std_list, left_bound_list, right_bound_list,
                              class_list, pivote, menores, mayores):
        """divides a group of data according to a pivot
        It operates along all the data. And then returns two dictionaries with the total sum
        of the mass separated by class.
        Returns:
            menores: Dictionary for the data thats inferior than the pivot
            mayores: Dictionary for the data thats superior to the pivot
        """
        clip = lambda x, l, r: l if x < l else r if x > r else x

        # Este loop es fundamental paralelizarlo
        for i in xrange(len(class_list)):
            cum_prob = pyRF_prob.cdf(pivote, mean_list[i], std_list[i], left_bound_list[i],
                                     right_bound_list[i])

            cum_prob = clip(cum_prob, 0, 1)

            # En vez de agregar estas cantidades hago un submetodo que las retorne
            # Hago un map y dsp las unzipeo y las sumo segĂșn su clase
            menores[class_list[i]] += w_list[i] * cum_prob
            mayores[class_list[i]] += w_list[i] * (1 - cum_prob)

        return menores, mayores
Example #13
0
    def split_tuples_by_pivot(self, w_list, mean_list, std_list, left_bound_list, right_bound_list,
                              class_list, pivote, menores, mayores):
        """divides a group of data according to a pivot
        It operates along all the data. And then returns two dictionaries with the total sum
        of the mass separated by class.
        Returns:
            menores: Dictionary for the data thats inferior than the pivot
            mayores: Dictionary for the data thats superior to the pivot
        """
        clip = lambda x, l, r: l if x < l else r if x > r else x

        # Este loop es fundamental paralelizarlo
        for i in xrange(len(class_list)):
            cum_prob = pyRF_prob.cdf(pivote, mean_list[i], std_list[i], left_bound_list[i],
                                     right_bound_list[i])

            cum_prob = clip(cum_prob, 0, 1)

            # En vez de agregar estas cantidades hago un submetodo que las retorne
            # Hago un map y dsp las unzipeo y las sumo segĂșn su clase
            menores[class_list[i]] += w_list[i] * cum_prob
            mayores[class_list[i]] += w_list[i] * (1 - cum_prob)

        return menores, mayores