def trapezoidalExperimentOLSF(input_dataset, dataset_name): print("Trapezoidal experiment with OLSF: " + str(dataset_name)) error_vector = np.zeros(len(input_dataset)) feature_summary = [ len(row) for row in preprocess2.removeDataTrapezoidal( copy.deepcopy(input_dataset)) ] for i in range(parameters.rounds): print("Round: " + str(i)) random.seed(parameters.seed) random.shuffle(input_dataset) current_dataset = preprocess2.removeDataTrapezoidal( copy.deepcopy(input_dataset)) current_classifier = olsf.classifier(current_dataset, []) classifier_summary, stream_error = current_classifier.train() error_vector = np.add(error_vector, stream_error) average_error_vector = np.divide(error_vector, parameters.rounds) #misc.plotError(average_error_vector, dataset_name) #misc.plotFeatures(feature_summary, dataset_name) #misc.plotClassifierDimension(classifier_summary, dataset_name) print(current_classifier.weight_dict) return average_error_vector
def preprocessData(data, mode='variable'): random.seed(50) copydata= copy.deepcopy(data) random.shuffle(copydata) if mode=='trapezoidal': dataset=preprocess2.removeDataTrapezoidal(copydata) if mode=='variable': dataset=preprocess2.removeRandomData(copydata) all_keys = set().union(*(d.keys() for d in dataset)) X,y = [],[] for row in dataset: for key in all_keys: if key not in row.keys() : row[key]=0 y.append(row['class_label']) del row['class_label'] if 0 not in row.keys(): start=1 if 0 in row.keys(): start=0 for row in dataset: X_row=[] for i in range(start, len(row)): X_row.append(row[i]) X.append(X_row) return X,y
def train(self, training_set, dataset_name, mode): #uses self.training_dataset self.data = training_set if mode == 'ASGD': self.classifier = SGDClassifier(average=True, max_iter=1, penalty='elasticnet', l1_ratio=0.5) self.classifier2 = SGDClassifier(average=False, max_iter=1, penalty='elasticnet', l1_ratio=0.5) elif mode == 'Perceptron': self.classifier = Perceptron(max_iter=1) self.classifier2 = self.classifiers elif mode == 'PA1': self.classifier = PassiveAggressiveClassifier(loss='hinge', C=-1.0, max_iter=1) self.classifier2 = self.classifier elif mode == 'PA2': self.classifier = PassiveAggressiveClassifier(loss='squared_hinge', C=1.0, max_iter=1) self.classifier2 = self.classifier init = np.zeros(len(self.data[0]) - 1).reshape(1, -1) for i in range(0, parameters.rounds): train_error_vector = [] iterations = 0 train_error = 0 copydata = copy.deepcopy(self.data) random.shuffle(copydata) self.data_preprocessor( preprocess2.removeDataTrapezoidal(copydata)) #or trapezoidal self.classifier = clone(self.classifier) self.classifier2 = clone(self.classifier2) self.classifier.partial_fit(init, [-self.y[0]], np.unique(self.y)) self.classifier2.partial_fit(init, [-self.y[0]], np.unique(self.y)) total_error_vector = np.zeros(len(self.y)) #c = list(zip(self.X, self.y)) #random.shuffle(c) #self.X, self.y= zip(*c) self.variance_vector = [np.ones(len(training_set[0]) - 1)] self.average_vector = [np.zeros(len(training_set[0]) - 1)] for i in range(0, len(self.y)): #self.classifier.densify() row = [self.X[i]] label = self.y[i] iterations = i + 1 old = self.classifier.coef_ self.classifier.coef_ = self.update_metadata(iterations) result = self.classifier.predict(row) self.classifier.coef_ = old if result[0] != label: train_error += 1 self.classifier.partial_fit(row, [self.y[i]], np.unique(self.y)) self.classifier2.partial_fit(row, [self.y[i]], np.unique(self.y)) #self.classifier.sparsify() train_error_vector.append(train_error / iterations) total_error_vector = np.add(train_error_vector, total_error_vector) total_error_vector = np.divide(total_error_vector, parameters.rounds) misc.plotError(train_error_vector[0::50], dataset_name) return train_error_vector