Ejemplo n.º 1
0
    def trainNormalization(self):

        assert len(self.labels) >= 2

        if self.type == TYPE_TWOCLASS or self.type == TYPE_MULTICLASS:
            # Learn the classes
            n_classes = 0
            self.class_map = {}

            for label in self.labels:
                if not self.class_map.has_key(label):
                    self.class_map[label] = n_classes
                    n_classes += 1

            if self.type == TYPE_MULTICLASS:
                assert n_classes >= 2
            if self.type == TYPE_TWOCLASS:
                assert n_classes == 2

            self.class_inv = {}
            for key, value in self.class_map.iteritems():
                self.class_inv[value] = key

            new_labels = []
            for each in self.labels:
                new_labels.append(self.class_map[each])
            self.labels = new_labels

        if self.type == TYPE_REGRESSION:
            self.reg_mean = mean(self.labels)
            self.reg_std = std(self.labels)

            new_labels = []
            for each in self.labels:
                new_labels.append((each - self.reg_mean) / self.reg_std)
            self.labels = new_labels

        #test length
        shape = self.vectors[0].shape
        assert len(shape) == 1

        for each in self.vectors:
            assert shape == each.shape

        #crate a data matrix
        data = array(self.vectors, 'd')
        if self.norm == NORM_AUTO:
            self.norm = NORM_VALUE
            if data.shape[1] > 128:
                self.norm = NORM_PCA

        #Setup value normalization
        if self.norm == NORM_VALUE:
            self.dmean = data.mean(axis=0)
            self.dstd = data.std(axis=0)
            self.vectors = (data - self.dmean) / self.dstd

        elif self.norm == NORM_PCA:
            self.pca = PCA()
            for vec in self.vectors:
                self.pca.addFeature(vec)

            if self.pca_basis > 1:
                self.pca.train(drop_front=self.pca_drop, number=self.pca_basis)
            else:
                self.pca.train(drop_front=self.pca_drop, energy=self.pca_basis)

            new_vectors = []
            for each in self.vectors:
                new_vectors.append(self.pca.project(each, whiten=True))
                self.vectors = array(new_vectors, 'd')