def get_attributes(self): """ return attributes by tornado Attributes type @return: attributes: Attributes """ attributes = [] dim_index = 0 dataset_attributes = Dataset.get_attributes(self.dataset) for attr in dataset_attributes: for index in range(attr[1]): attribute = Attribute() attribute.set_name('{}_{}'.format(attr[0], index)) attribute.set_type(TornadoDic.NUMERIC_ATTRIBUTE) attribute.set_possible_values([]) if self.split: range_dim = self.configuration['split']['range'][ self.cur_data_slit][dim_index] else: range_dim = self.configuration['all']['range'][dim_index] attribute.set_bounds_values(range_dim[0], range_dim[1]) if attr[0] != 'cast': attributes.append(attribute) dim_index += 1 return [0, 1], attributes
def construct_attribute(self): for attr in [('warning_level', self.le, [0, 1, 2])]: for index in range(attr[1]): attribute = Attribute() attribute.set_name('{}_{}'.format(attr[0], index)) attribute.set_type(TornadoDic.NOMINAL_ATTRIBUTE) attribute.set_possible_values(attr[2]) self.attributes.append(attribute)
def construct_attribute(): nb_property = [] for attr in [('warning_level', 1, [1, 2, 3])]: for index in range(attr[1]): att = Attribute() att.set_name('{}_{}'.format(attr[0], index)) att.set_type(TornadoDic.NOMINAL_ATTRIBUTE) att.set_possible_values(attr[2]) nb_property.append(att) return nb_property
class Perceptron(SuperClassifier): """This is the implementation of a single perceptron for learning from data streams.""" LEARNER_NAME = TornadoDic.PERCEPTRON LEARNER_TYPE = TornadoDic.TRAINABLE LEARNER_CATEGORY = TornadoDic.NUM_CLASSIFIER __BIAS_ATTRIBUTE = Attribute() __BIAS_ATTRIBUTE.set_name("bias") __BIAS_ATTRIBUTE.set_type(TornadoDic.NUMERIC_ATTRIBUTE) __BIAS_ATTRIBUTE.set_possible_values(1) def __init__(self, labels, attributes, learning_rate=1): super().__init__(labels, attributes) attributes.append(self.__BIAS_ATTRIBUTE) self.WEIGHTS = OrderedDict() self.__initialize_weights() self.LEARNING_RATE = learning_rate def __initialize_weights(self): for c in self.CLASSES: self.WEIGHTS[c] = OrderedDict() for a in self.ATTRIBUTES: self.WEIGHTS[c][a.NAME] = 0.2 * random.random() - 0.1 def train(self, instance): x = instance[0:len(instance) - 1] x.append(1) y_real = instance[len(instance) - 1] predictions = OrderedDict() for c in self.CLASSES: predictions[c] = self.predict(x, c) for c in self.CLASSES: actual = 1 if c == y_real else 0 delta = (actual - predictions[c]) * predictions[c] * (1 - predictions[c]) for i in range(0, len(instance)): self.WEIGHTS[c][self.ATTRIBUTES[i]. NAME] += self.LEARNING_RATE * delta * x[i] self._IS_READY = True def predict(self, x, c): s = 0 for i in range(0, len(x)): s += self.WEIGHTS[c][self.ATTRIBUTES[i].NAME] * x[i] p = 1 / (1 + math.exp(-s)) return p def test(self, instance): if self._IS_READY: x = instance[0:len(instance) - 1] y = instance[len(instance) - 1] x.append(1) predictions = OrderedDict() for c in list(self.CLASSES): predictions[c] = self.predict(x, c) y_predicted = max(predictions.items(), key=operator.itemgetter(1))[0] self.update_confusion_matrix(y, y_predicted) return y_predicted else: print("Please train a Perceptron classifier first!") exit() def get_prediction_prob_list(self, instance): # Just return a one-hot array if self._IS_READY: x = instance[0:len(instance) - 1] y = instance[len(instance) - 1] x.append(1) predictions = OrderedDict() for c in list(self.CLASSES): predictions[c] = self.predict(x, c) y_predicted = max(predictions.items(), key=operator.itemgetter(1))[0] prob = [] for i, c in enumerate(self.CLASSES): prob.append(int(c == y_predicted)) return prob else: print("Please train a Perceptron classifier first!") exit() def reset(self): super()._reset_stats() self.WEIGHTS = OrderedDict() self.__initialize_weights()
def read(file_path): labels = [] attributes = [] attributes_min_max = [] records = [] data_flag = False reader = open(file_path, "r") for line in reader: if line.strip() == '': continue if line.startswith("@attribute") or line.startswith("@ATTRIBUTE"): line = line.strip('\n\r\t') line = line.split(' ') attribute_name = line[1] attribute_value_range = line[2] attribute = Attribute() attribute.set_name(attribute_name) if attribute_value_range.lower() in ['numeric', 'real', 'integer']: attribute_type = TornadoDic.NUMERIC_ATTRIBUTE attribute_value_range = [] attributes_min_max.append([0, 0]) else: attribute_type = TornadoDic.NOMINAL_ATTRIBUTE attribute_value_range = attribute_value_range.strip('{}').replace("'", "") attribute_value_range = attribute_value_range.split(',') attributes_min_max.append([None, None]) attribute.set_type(attribute_type) attribute.set_possible_values(attribute_value_range) attributes.append(attribute) elif line.startswith("@data") or line.startswith("@DATA"): data_flag = True labels = attributes[len(attributes) - 1].POSSIBLE_VALUES attributes.pop(len(attributes) - 1) continue elif data_flag is True: line = re.sub('\s+', '', line) elements = line.split(',') for i in range(0, len(elements) - 1): if attributes[i].TYPE == TornadoDic.NUMERIC_ATTRIBUTE: elements[i] = float(elements[i]) min_value = attributes_min_max[i][0] max_value = attributes_min_max[i][1] if elements[i] < min_value: min_value = elements[i] elif elements[i] > max_value: max_value = elements[i] attributes_min_max[i] = [min_value, max_value] records.append(elements) for i in range(0, len(attributes)): if attributes[i].TYPE == TornadoDic.NUMERIC_ATTRIBUTE: attributes[i].set_bounds_values(attributes_min_max[i][0], attributes_min_max[i][1]) return labels, attributes, records
class Logistic(SuperClassifier): """basic logistic""" LEARNER_NAME = TornadoDic.LOGISTIC LEARNER_TYPE = TornadoDic.TRAINABLE LEARNER_CATEGORY = TornadoDic.NUM_CLASSIFIER __BIAS_ATTRIBUTE = Attribute() __BIAS_ATTRIBUTE.set_name("bias") __BIAS_ATTRIBUTE.set_type(TornadoDic.NUMERIC_ATTRIBUTE) __BIAS_ATTRIBUTE.set_possible_values(1) def __init__(self, labels, attributes, learning_rate=0.8): super().__init__(labels, attributes) attributes.append(self.__BIAS_ATTRIBUTE) self.seen_label = labels self.WEIGHTS = OrderedDict() self.__initialize_weights() self.LEARNING_RATE = learning_rate self.birthday = 0 self.dataOfDeath = 0 self.mse_it = 0 self.squareErrors = 0 self.id = 0 random.seed(1) def __initialize_weights(self): for a in self.ATTRIBUTES: self.WEIGHTS[a.NAME] = 0.2 * random.random() - 0.1 # self.WEIGHTS[a.NAME] = 1.0 def train(self, instance, drift_status): x = instance[0:len(instance) - 1] x.append(1) y_real = instance[len(instance) - 1] prediction = self.predict(x) p = np.clip(prediction, 0.00001, 1-0.00001) err = y_real - p # update the weights and bias for i in range(len(instance)): self.WEIGHTS[self.ATTRIBUTES[i].NAME] += self.LEARNING_RATE * x[i] * err self._IS_READY = True def predict(self, x): s = 0 for i in range(0, len(x)): s += self.WEIGHTS[self.ATTRIBUTES[i].NAME] * x[i] # print("value of s ==>", s) if s >= 0: return 1.0 / (1 + np.exp(-s)) else: return np.exp(s) / (1 + np.exp(s)) # p = 1.0 / (1 + np.exp(-s)) # return p def getLoss(self, instance): x = instance[0:len(instance) - 1] y = instance[len(instance) - 1] x.append(1) y_predicted = self.predict(x) y_predicted = np.clip(y_predicted, 0.00001, 1-0.00001) loss = - y * np.log(y_predicted) - (1 - y) * np.log(1 - y_predicted) return loss def test(self, instance): x = instance[0:len(instance) - 1] x.append(1) y_predicted = self.predict(x) return [1-y_predicted, y_predicted] def reset(self): super()._reset_stats() self.WEIGHTS = OrderedDict() self.__initialize_weights()