Esempio n. 1
0
    def get_attributes(self):
        """ return attributes by tornado Attributes type

            @return:
            attributes: Attributes
        """
        attributes = []
        dim_index = 0
        dataset_attributes = Dataset.get_attributes(self.dataset)

        for attr in dataset_attributes:
            for index in range(attr[1]):
                attribute = Attribute()
                attribute.set_name('{}_{}'.format(attr[0], index))
                attribute.set_type(TornadoDic.NUMERIC_ATTRIBUTE)
                attribute.set_possible_values([])
                if self.split:
                    range_dim = self.configuration['split']['range'][
                        self.cur_data_slit][dim_index]
                else:
                    range_dim = self.configuration['all']['range'][dim_index]
                attribute.set_bounds_values(range_dim[0], range_dim[1])
                if attr[0] != 'cast':
                    attributes.append(attribute)
                dim_index += 1
        return [0, 1], attributes
 def construct_attribute(self):
     for attr in [('warning_level', self.le, [0, 1, 2])]:
         for index in range(attr[1]):
             attribute = Attribute()
             attribute.set_name('{}_{}'.format(attr[0], index))
             attribute.set_type(TornadoDic.NOMINAL_ATTRIBUTE)
             attribute.set_possible_values(attr[2])
             self.attributes.append(attribute)
def construct_attribute():
    nb_property = []
    for attr in [('warning_level', 1, [1, 2, 3])]:
        for index in range(attr[1]):
            att = Attribute()
            att.set_name('{}_{}'.format(attr[0], index))
            att.set_type(TornadoDic.NOMINAL_ATTRIBUTE)
            att.set_possible_values(attr[2])
            nb_property.append(att)
    return nb_property
Esempio n. 4
0
class Perceptron(SuperClassifier):
    """This is the implementation of a single perceptron for learning from data streams."""

    LEARNER_NAME = TornadoDic.PERCEPTRON
    LEARNER_TYPE = TornadoDic.TRAINABLE
    LEARNER_CATEGORY = TornadoDic.NUM_CLASSIFIER

    __BIAS_ATTRIBUTE = Attribute()
    __BIAS_ATTRIBUTE.set_name("bias")
    __BIAS_ATTRIBUTE.set_type(TornadoDic.NUMERIC_ATTRIBUTE)
    __BIAS_ATTRIBUTE.set_possible_values(1)

    def __init__(self, labels, attributes, learning_rate=1):
        super().__init__(labels, attributes)

        attributes.append(self.__BIAS_ATTRIBUTE)
        self.WEIGHTS = OrderedDict()
        self.__initialize_weights()
        self.LEARNING_RATE = learning_rate

    def __initialize_weights(self):
        for c in self.CLASSES:
            self.WEIGHTS[c] = OrderedDict()
            for a in self.ATTRIBUTES:
                self.WEIGHTS[c][a.NAME] = 0.2 * random.random() - 0.1

    def train(self, instance):
        x = instance[0:len(instance) - 1]
        x.append(1)
        y_real = instance[len(instance) - 1]
        predictions = OrderedDict()
        for c in self.CLASSES:
            predictions[c] = self.predict(x, c)

        for c in self.CLASSES:
            actual = 1 if c == y_real else 0
            delta = (actual -
                     predictions[c]) * predictions[c] * (1 - predictions[c])
            for i in range(0, len(instance)):
                self.WEIGHTS[c][self.ATTRIBUTES[i].
                                NAME] += self.LEARNING_RATE * delta * x[i]
        self._IS_READY = True

    def predict(self, x, c):
        s = 0
        for i in range(0, len(x)):
            s += self.WEIGHTS[c][self.ATTRIBUTES[i].NAME] * x[i]
        p = 1 / (1 + math.exp(-s))
        return p

    def test(self, instance):
        if self._IS_READY:
            x = instance[0:len(instance) - 1]
            y = instance[len(instance) - 1]
            x.append(1)
            predictions = OrderedDict()
            for c in list(self.CLASSES):
                predictions[c] = self.predict(x, c)
            y_predicted = max(predictions.items(),
                              key=operator.itemgetter(1))[0]
            self.update_confusion_matrix(y, y_predicted)
            return y_predicted
        else:
            print("Please train a Perceptron classifier first!")
            exit()

    def get_prediction_prob_list(self, instance):

        # Just return a one-hot array

        if self._IS_READY:
            x = instance[0:len(instance) - 1]
            y = instance[len(instance) - 1]
            x.append(1)
            predictions = OrderedDict()
            for c in list(self.CLASSES):
                predictions[c] = self.predict(x, c)
            y_predicted = max(predictions.items(),
                              key=operator.itemgetter(1))[0]

            prob = []
            for i, c in enumerate(self.CLASSES):
                prob.append(int(c == y_predicted))

            return prob
        else:
            print("Please train a Perceptron classifier first!")
            exit()

    def reset(self):
        super()._reset_stats()
        self.WEIGHTS = OrderedDict()
        self.__initialize_weights()
Esempio n. 5
0
    def read(file_path):
        labels = []
        attributes = []
        attributes_min_max = []
        records = []
        data_flag = False
        reader = open(file_path, "r")
        for line in reader:
            
            if line.strip() == '':
                continue
            
            if line.startswith("@attribute") or line.startswith("@ATTRIBUTE"):

                line = line.strip('\n\r\t')
                line = line.split(' ')

                attribute_name = line[1]
                attribute_value_range = line[2]

                attribute = Attribute()
                attribute.set_name(attribute_name)
                if attribute_value_range.lower() in ['numeric', 'real', 'integer']:
                    attribute_type = TornadoDic.NUMERIC_ATTRIBUTE
                    attribute_value_range = []
                    attributes_min_max.append([0, 0])
                else:
                    attribute_type = TornadoDic.NOMINAL_ATTRIBUTE
                    attribute_value_range = attribute_value_range.strip('{}').replace("'", "")
                    attribute_value_range = attribute_value_range.split(',')
                    attributes_min_max.append([None, None])
                attribute.set_type(attribute_type)
                attribute.set_possible_values(attribute_value_range)

                attributes.append(attribute)

            elif line.startswith("@data") or line.startswith("@DATA"):
                data_flag = True
                labels = attributes[len(attributes) - 1].POSSIBLE_VALUES
                attributes.pop(len(attributes) - 1)
                continue

            elif data_flag is True:
                line = re.sub('\s+', '', line)
                elements = line.split(',')
                for i in range(0, len(elements) - 1):
                    if attributes[i].TYPE == TornadoDic.NUMERIC_ATTRIBUTE:
                        elements[i] = float(elements[i])
                        min_value = attributes_min_max[i][0]
                        max_value = attributes_min_max[i][1]
                        if elements[i] < min_value:
                            min_value = elements[i]
                        elif elements[i] > max_value:
                            max_value = elements[i]
                        attributes_min_max[i] = [min_value, max_value]
                records.append(elements)

        for i in range(0, len(attributes)):
            if attributes[i].TYPE == TornadoDic.NUMERIC_ATTRIBUTE:
                attributes[i].set_bounds_values(attributes_min_max[i][0], attributes_min_max[i][1])

        return labels, attributes, records
Esempio n. 6
0
class Logistic(SuperClassifier):
    """basic logistic"""

    LEARNER_NAME = TornadoDic.LOGISTIC
    LEARNER_TYPE = TornadoDic.TRAINABLE
    LEARNER_CATEGORY = TornadoDic.NUM_CLASSIFIER

    __BIAS_ATTRIBUTE = Attribute()
    __BIAS_ATTRIBUTE.set_name("bias")
    __BIAS_ATTRIBUTE.set_type(TornadoDic.NUMERIC_ATTRIBUTE)
    __BIAS_ATTRIBUTE.set_possible_values(1)

    def __init__(self, labels, attributes, learning_rate=0.8):
        super().__init__(labels, attributes)

        attributes.append(self.__BIAS_ATTRIBUTE)
        self.seen_label = labels
        self.WEIGHTS = OrderedDict()
        self.__initialize_weights()
        self.LEARNING_RATE = learning_rate
        self.birthday = 0
        self.dataOfDeath = 0
        self.mse_it = 0
        self.squareErrors = 0
        self.id = 0
        random.seed(1)

    def __initialize_weights(self):
        for a in self.ATTRIBUTES:
            self.WEIGHTS[a.NAME] = 0.2 * random.random() - 0.1
            # self.WEIGHTS[a.NAME] = 1.0

    def train(self, instance, drift_status):
        x = instance[0:len(instance) - 1]
        x.append(1)
        y_real = instance[len(instance) - 1]
        prediction = self.predict(x)
        p = np.clip(prediction, 0.00001, 1-0.00001)
        err = y_real - p
        # update the weights and bias
        for i in range(len(instance)):
            self.WEIGHTS[self.ATTRIBUTES[i].NAME] += self.LEARNING_RATE * x[i] * err
        self._IS_READY = True

    def predict(self, x):
        s = 0
        for i in range(0, len(x)):
            s += self.WEIGHTS[self.ATTRIBUTES[i].NAME] * x[i]
        # print("value of s ==>", s)
        if s >= 0:
            return 1.0 / (1 + np.exp(-s))
        else:
            return np.exp(s) / (1 + np.exp(s))
        # p = 1.0 / (1 + np.exp(-s))
        # return p

    def getLoss(self, instance):
        x = instance[0:len(instance) - 1]
        y = instance[len(instance) - 1]
        x.append(1)
        y_predicted = self.predict(x)
        y_predicted = np.clip(y_predicted, 0.00001, 1-0.00001)
        loss = - y * np.log(y_predicted) - (1 - y) * np.log(1 - y_predicted)
        return loss

    def test(self, instance):
        x = instance[0:len(instance) - 1]
        x.append(1)
        y_predicted = self.predict(x)
        return [1-y_predicted, y_predicted]

    def reset(self):
        super()._reset_stats()
        self.WEIGHTS = OrderedDict()
        self.__initialize_weights()