コード例 #1
0
class Scaler(object):
    def __init__(self, root_dir, train_csv, resize, scaler= "std"):
        if scaler == "minmax":
            self.online_scaler = MinMaxScaler()
        elif scaler == "std":
            self.online_scaler = StandardScaler()
        elif scaler == "robust":
            self.online_scaler = RobustScaler()

        self.root_dir = root_dir
        self.train_csv = train_csv
        self.resize = resize
        # fit training data
        self.fit_data(self.train_csv, self.root_dir, self.online_scaler)
    
    def __call__(self, image):
        img = np.asarray(image, dtype=np.float)
        x, y = img.shape
        img = img.reshape(1, x * y)
        img = self.online_scaler.transform(img)
        img = img.reshape(x, y, 1)
        img = np.uint8(img*255)
        img = np.clip(img, 0, 255)

        return img

    def fit_data(self, csv_path, root_dir, online_scaler):
        print("======> fitting training data")
        csv_data = pd.read_csv(self.train_csv)
        for row in csv_data.itertuples():
            img_name = os.path.join(self.root_dir, row[1])
            image = Image.open(img_name)
            image = image.resize((self.resize, self.resize))
            image = np.asarray(image, dtype= np.float)
            x, y = image.shape
            image = image.reshape(1, x*y)
            self.online_scaler.partial_fit(image)

    # function modifies original data parameter to save memory
    def save_images(self, path, data, label):
        print("======> saving data")
        data = data.reshape((nsamples, 224, 224))
        for im, img_name, label in zip(data, label):
            if not os.path.exists(path + str(label)):
                os.makedirs(path + str(label))
            im = Image.fromarray(np.uint8(im*255)) # multiply to 255 to save image so the image can be used by ToTensor() in pytorch
            im.save(os.path.join(path, label, img_name))
コード例 #2
0
class ListScaler(BaseEstimator, TransformerMixin):
    '''
    Given a list of pointclouds, applies a given scaler partially, then transforms partially

    Parameters
    ----------
    scaler : float, default 2
        base of the logarithm.

    '''
    def __init__(self, scaler="standart"):

        assert scaler in [
            "standart", "normal", "min_max", "max_abs", "robust", "log"
        ]
        assert scaler != 'log', NotImplementedError

        if scaler == "standart":
            from sklearn.preprocessing import StandardScaler
            self.scaler = StandardScaler()

        elif scaler == 'normal':
            from sklearn.preprocessing import Normalizer
            self.scaler = Normalizer()

        elif scaler == 'min_max':
            from sklearn.preprocessing import MinMaxScaler
            self.scaler = MinMaxScaler()

        elif scaler == 'max_abs':
            from sklearn.preprocessing import MaxAbsScaler
            self.scaler = MaxAbsScaler()

        elif scaler == 'robust':
            from sklearn.preprocessing import RobustScaler
            self.scaler = RobustScaler()

    def fit(self, X, y=None):
        '''Scaler partial fitting'''

        segments = X
        for segment in segments:
            self.scaler.partial_fit(segment)

        # in case of batch learning, scaler should be fit to the whole dataset
        with open(os.path.join(TEMP_DIR, 'total_scaler.csv'),
                  "wb") as f:  # Pickling
            pickle.dump(self.scaler, f)

        return self

    def partial_train(self, X):
        '''in case of batch learning, scaler should be fitted to the whole dataset'''

        segments = X

        # load total_scaler
        with open(os.path.join(TEMP_DIR, 'total_scaler.csv'),
                  "rb") as f:  # Unpickling
            total_scaler = pickle.load(f)
        # fit
        for segment in segments:
            total_scaler.partial_fit(segment)
        # save
        with open(os.path.join(TEMP_DIR, 'total_scaler.csv'),
                  "wb") as f:  # Pickling
            pickle.dump(total_scaler, f)

    def transform(self, X):
        '''ListScaler transforming'''

        segments = X
        new_segments = []
        for segment in segments:
            new_segments.append(self.scaler.transform(segment))
        return self

    def total_transform(self, X):
        '''ListScaler transforming'''

        segments = X

        # load
        with open(os.path.join(TEMP_DIR, 'total_scaler.csv'),
                  "rb") as f:  # Unpickling
            total_scaler = pickle.load(f)
        # transform
        new_segments = []
        for segment in segments:
            new_segments.append(total_scaler.transform(segment))
        return self