コード例 #1
0
ファイル: experiment.py プロジェクト: CopperWasp/OLVF
def trapezoidalExperimentOLSF(input_dataset, dataset_name):
    print("Trapezoidal experiment with OLSF: " + str(dataset_name))
    error_vector = np.zeros(len(input_dataset))
    feature_summary = [
        len(row) for row in preprocess2.removeDataTrapezoidal(
            copy.deepcopy(input_dataset))
    ]
    for i in range(parameters.rounds):
        print("Round: " + str(i))
        random.seed(parameters.seed)
        random.shuffle(input_dataset)
        current_dataset = preprocess2.removeDataTrapezoidal(
            copy.deepcopy(input_dataset))
        current_classifier = olsf.classifier(current_dataset, [])
        classifier_summary, stream_error = current_classifier.train()
        error_vector = np.add(error_vector, stream_error)
    average_error_vector = np.divide(error_vector, parameters.rounds)
    #misc.plotError(average_error_vector, dataset_name)
    #misc.plotFeatures(feature_summary, dataset_name)
    #misc.plotClassifierDimension(classifier_summary, dataset_name)
    print(current_classifier.weight_dict)
    return average_error_vector
コード例 #2
0
ファイル: olsf_vectorbased.py プロジェクト: CopperWasp/OLVF
def preprocessData(data, mode='variable'):
    random.seed(50)
    copydata= copy.deepcopy(data)
    random.shuffle(copydata)
    if mode=='trapezoidal': dataset=preprocess2.removeDataTrapezoidal(copydata)
    if mode=='variable': dataset=preprocess2.removeRandomData(copydata)
    all_keys = set().union(*(d.keys() for d in dataset))
    X,y = [],[]
    for row in dataset:
        for key in all_keys:
            if key not in row.keys() : row[key]=0
        y.append(row['class_label'])
        del row['class_label']
    if 0 not in row.keys(): start=1
    if 0 in row.keys(): start=0
    for row in dataset:
        X_row=[]
        for i in range(start, len(row)):
            X_row.append(row[i])
        X.append(X_row)
    return X,y        
コード例 #3
0
ファイル: asgd_based_olvf.py プロジェクト: CopperWasp/OLVF
    def train(self, training_set, dataset_name,
              mode):  #uses self.training_dataset
        self.data = training_set
        if mode == 'ASGD':
            self.classifier = SGDClassifier(average=True,
                                            max_iter=1,
                                            penalty='elasticnet',
                                            l1_ratio=0.5)
            self.classifier2 = SGDClassifier(average=False,
                                             max_iter=1,
                                             penalty='elasticnet',
                                             l1_ratio=0.5)
        elif mode == 'Perceptron':
            self.classifier = Perceptron(max_iter=1)
            self.classifier2 = self.classifiers
        elif mode == 'PA1':
            self.classifier = PassiveAggressiveClassifier(loss='hinge',
                                                          C=-1.0,
                                                          max_iter=1)
            self.classifier2 = self.classifier
        elif mode == 'PA2':
            self.classifier = PassiveAggressiveClassifier(loss='squared_hinge',
                                                          C=1.0,
                                                          max_iter=1)
            self.classifier2 = self.classifier

        init = np.zeros(len(self.data[0]) - 1).reshape(1, -1)
        for i in range(0, parameters.rounds):
            train_error_vector = []
            iterations = 0
            train_error = 0
            copydata = copy.deepcopy(self.data)
            random.shuffle(copydata)
            self.data_preprocessor(
                preprocess2.removeDataTrapezoidal(copydata))  #or trapezoidal
            self.classifier = clone(self.classifier)
            self.classifier2 = clone(self.classifier2)
            self.classifier.partial_fit(init, [-self.y[0]], np.unique(self.y))
            self.classifier2.partial_fit(init, [-self.y[0]], np.unique(self.y))
            total_error_vector = np.zeros(len(self.y))
            #c = list(zip(self.X, self.y))
            #random.shuffle(c)
            #self.X, self.y= zip(*c)
            self.variance_vector = [np.ones(len(training_set[0]) - 1)]
            self.average_vector = [np.zeros(len(training_set[0]) - 1)]
            for i in range(0, len(self.y)):
                #self.classifier.densify()
                row = [self.X[i]]
                label = self.y[i]
                iterations = i + 1
                old = self.classifier.coef_
                self.classifier.coef_ = self.update_metadata(iterations)
                result = self.classifier.predict(row)
                self.classifier.coef_ = old
                if result[0] != label:
                    train_error += 1

                self.classifier.partial_fit(row, [self.y[i]],
                                            np.unique(self.y))
                self.classifier2.partial_fit(row, [self.y[i]],
                                             np.unique(self.y))

                #self.classifier.sparsify()
                train_error_vector.append(train_error / iterations)
            total_error_vector = np.add(train_error_vector, total_error_vector)
        total_error_vector = np.divide(total_error_vector, parameters.rounds)
        misc.plotError(train_error_vector[0::50], dataset_name)
        return train_error_vector