Example #1
0
    def __init__(self, data, index=None, pca=False, min_std=10e-9):

        self.data = data
        self._pca = None

        self._zs = ZScore(data, replace_inf=True)
        data = self._zs.normalize(data)

        # to remove columns that contaim nan's and have zero variance
        mask_nan = np.invert(np.isnan(data.sum(axis=0)))
        self._mask = np.ones(mask_nan.shape).astype(bool)

        if index is not None:
            self._mask[:] = False
            self._mask[index] = True

        self._mask *= mask_nan

        data = self.filter(data)

        if pca:
            # data1 = data self.normalize(data)
            self._pca = PCA(data, minfrac=0.05)
            self.traindata = self._pca.project(data)
        else:
            self.traindata = data  #self.normalize(data)
Example #2
0
    def __init__(self, data, index=None, pca=False, min_std=10e-9):

        self.data = data
        self._pca = None

        self._zs = ZScore(data, replace_inf=True)
        data = self._zs.normalize(data)

        # to remove columns that contaim nan's and have zero variance
        mask_nan = np.invert(np.isnan(data.sum(axis=0)))
        self._mask = np.ones(mask_nan.shape).astype(bool)

        if index is not None:
            self._mask[:] = False
            self._mask[index] = True

        self._mask *= mask_nan

        data = self.filter(data)

        if pca:
            # data1 = data self.normalize(data)
            self._pca = PCA(data, minfrac=0.05)
            self.traindata = self._pca.project(data)
        else:
            self.traindata = data #self.normalize(data)
Example #3
0
class PreProcessor(object):
    """PreProcessor is used to normalize the data and remove columns that
    contain NaN's and have zero variance. If index is None all features are
    taken into account otherwise only those specified with index. Index can
    be an integer or a list of integers.
    """

    def __init__(self, data, index=None, pca=False, min_std=10e-9):

        self.data = data
        self._pca = None

        self._zs = ZScore(data, replace_inf=True)
        data = self._zs.normalize(data)

        # to remove columns that contaim nan's and have zero variance
        mask_nan = np.invert(np.isnan(data.sum(axis=0)))
        self._mask = np.ones(mask_nan.shape).astype(bool)

        if index is not None:
            self._mask[:] = False
            self._mask[index] = True

        self._mask *= mask_nan

        data = self.filter(data)

        if pca:
            # data1 = data self.normalize(data)
            self._pca = PCA(data, minfrac=0.05)
            self.traindata = self._pca.project(data)
        else:
            self.traindata = data #self.normalize(data)

    @property
    def std(self):
        return self.data.std(axis=0)

    @property
    def mean(self):
        return self.data.mean(axis=0)

    @property
    def mask(self):
        return self._mask

    @property
    def nfeatures(self):
        return self.traindata.shape[1]

    @property
    def nsamples(self):
        return self.data.shape[0]

    def normalize(self, data):
        return self._zs.normalize(data)

    def filter(self, data):
        return data[:, self._mask]

    def __call__(self, data):
        data1 = self.filter(self.normalize(data))
        if self._pca is None:
            return data1
        else:
            return self._pca.project(data1)
Example #4
0
class PreProcessor(object):
    """PreProcessor is used to normalize the data and remove columns that
    contain NaN's and have zero variance. If index is None all features are
    taken into account otherwise only those specified with index. Index can
    be an integer or a list of integers.
    """
    def __init__(self, data, index=None, pca=False, min_std=10e-9):

        self.data = data
        self._pca = None

        self._zs = ZScore(data, replace_inf=True)
        data = self._zs.normalize(data)

        # to remove columns that contaim nan's and have zero variance
        mask_nan = np.invert(np.isnan(data.sum(axis=0)))
        self._mask = np.ones(mask_nan.shape).astype(bool)

        if index is not None:
            self._mask[:] = False
            self._mask[index] = True

        self._mask *= mask_nan

        data = self.filter(data)

        if pca:
            # data1 = data self.normalize(data)
            self._pca = PCA(data, minfrac=0.05)
            self.traindata = self._pca.project(data)
        else:
            self.traindata = data  #self.normalize(data)

    @property
    def std(self):
        return self.data.std(axis=0)

    @property
    def mean(self):
        return self.data.mean(axis=0)

    @property
    def mask(self):
        return self._mask

    @property
    def nfeatures(self):
        return self.traindata.shape[1]

    @property
    def nsamples(self):
        return self.data.shape[0]

    def normalize(self, data):
        return self._zs.normalize(data)

    def filter(self, data):
        return data[:, self._mask]

    def __call__(self, data):
        data1 = self.filter(self.normalize(data))
        if self._pca is None:
            return data1
        else:
            return self._pca.project(data1)