Ejemplo n.º 1
0
    def __load_data(self, img_preprocessing: str = None):
        (x_train, y_train), (x_val_test,
                             y_val_test) = fashion_mnist.load_data()

        x_train = x_train.astype('float32') / 255.0
        x_val_test = x_val_test.astype('float32') / 255.0

        if img_preprocessing == "std_normal":
            x_train_flat = x_train.reshape(-1, 28 * 28)
            x_val_test_flat = x_val_test.reshape(-1, 28 * 28)

            std = StandardScaler().fit(x_train_flat)
            x_train = std.transform(x_train_flat).reshape(-1, 28, 28)
            x_val_test = std.transform(x_val_test_flat).reshape(-1, 28, 28)

        elif img_preprocessing == "eq_hist":
            x_train = exposure.equalize_hist(x_train)
            x_val_test = exposure.equalize_hist(x_val_test)

        elif img_preprocessing == "zca_whiting":
            x_train_flat = x_train.reshape(-1, 28 * 28)
            x_val_test_flat = x_val_test.reshape(-1, 28 * 28)
            zca = ZCA().fit(x_train_flat)
            x_train = zca.transform(x_train_flat).reshape(-1, 28, 28)
            x_val_test = zca.transform(x_val_test_flat).reshape(-1, 28, 28)

        x_train = x_train.reshape(-1, 28, 28, 1)
        x_val_test = x_val_test.reshape(-1, 28, 28, 1)

        x_test, x_val, y_test, y_val = train_test_split(x_val_test,
                                                        y_val_test,
                                                        train_size=0.5,
                                                        random_state=42)
        y_train = utils.to_categorical(y_train, 10)
        y_val = utils.to_categorical(y_val, 10)
        y_test = utils.to_categorical(y_test, 10)

        return x_train, y_train, x_val, y_val, x_test, y_test
def sample_data(frames_per_packet, batch_size, start_frame_for_period=None, batch_step=1):

    import progressbar

    r = []
    X_brain_buffer = np.memmap(join(save_data_folder, 'X_brain_buffer_whitened.npy'), dtype=np.float32, mode='w+',
                         shape=(batch_size, frames_per_packet, full_matrix.shape[1]))
    X_images_buffer = np.memmap(join(save_data_folder, 'X_images_buffer.npy'), dtype=np.float32, mode='w+',
                         shape=(batch_size, 112 // 2, 150 // 2, 3))
    Y_buffer = np.memmap(join(save_data_folder, 'Y_buffer.npy'), dtype=np.float32, mode='w+',
                         shape=(batch_size, 112 // 2, 150 // 2))

    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    bar = progressbar.bar.ProgressBar(max_value=batch_size)
    samples_without_SVD_convergance = 0
    for i in range(batch_size):

        if start_frame_for_period == None:
            r_int = np.random.randint(total - frames_per_packet )
        else:
            r_int = start_frame_for_period + i * batch_step
        r.append(r_int)

        X = np.empty((frames_per_packet, full_matrix.shape[1]))

        for j in range(frames_per_packet):

            x = full_matrix[r_int + j]
            X[j, :] = np.array(x, dtype=np.float32, copy=False)

            if j == 0:
                cap.set(1, r_int)
                ret, frame = cap.read()
                # Binarize the input frame
                binarized_frame = binarize_frame(frame)
                binarized_frame = cv2.resize(binarized_frame, (150 // 2, 112 // 2), interpolation=cv2.INTER_AREA)
                X_images_buffer[i, :, :, :] = np.array(binarized_frame, dtype=np.float32, copy=False)
            elif j == frames_per_packet-1:
                cap.set(1, r_int + frames_per_packet)
                ret, frame = cap.read()
                y = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                y = cv2.resize(y, (150 // 2, 112 // 2), interpolation=cv2.INTER_AREA)
                Y_buffer[i, :, :] = np.array(y/255, dtype=np.float32, copy=False)

        # Whiten brain data
        X = X.T
        try:
            zca_transform = ZCA().fit(X)
            X_Whitened = zca_transform.transform(X)
            X_brain_buffer[i] = X_Whitened.T
        except: # If SVD does not converge add the previous sample again
            X_brain_buffer[i] = X_brain_buffer[i-1]
            X_images_buffer[i] = X_images_buffer[i-1]
            Y_buffer[i] = Y_buffer[i-1]
            samples_without_SVD_convergance = samples_without_SVD_convergance + 1
            print(samples_without_SVD_convergance)

        bar.update(i)

    r = np.array(r, dtype=np.float32, copy=False)

    np.savez(join(save_data_folder, 'binary_headers.npz'), dtype=[np.float32],
             shape_X_brain=[batch_size, frames_per_packet, full_matrix.shape[1]],
             shape_X_images=[batch_size, 112 // 2, 150 // 2, 3],
             shape_Y=[batch_size, 112 // 2, 150 // 2],
             r=r)

    print('/nStart frame = {}, End frame = {}'.format(r[0], r[-1]))
Ejemplo n.º 3
0
def load_data():
    import cv2
    from zca import ZCA
    zca = ZCA()

    # load dataset
    dataset = np.load('dataset/{}.npy'.format(dataset_name)).item()
    data = dataset['data']
    data_map = dataset['label']
    global nch, ncls
    nch = data.shape[2]
    ncls = len(np.unique(data_map)) - 1

    # partition the training and test data
    train_coord = np.empty(
        (0, 2)).astype(np.int8)  # coordinates of the training data
    test_coord = np.empty(
        (0, 2)).astype(np.int8)  # coordinates of the test data
    for cls in range(ncls):
        coord_class = np.transpose(np.nonzero(data_map == cls + 1))
        rng.shuffle(coord_class)
        # count = int(np.round(len(coord_class) * percent))
        samples_per_class = count
        train_coord = np.concatenate(
            (train_coord, coord_class[:samples_per_class]))
        test_coord = np.concatenate(
            (test_coord, coord_class[samples_per_class:]))
    rng.shuffle(train_coord)
    rng.shuffle(test_coord)
    print(train_coord.shape, test_coord.shape)
    train_map = np.zeros_like(data_map)
    test_map = np.zeros_like(data_map)
    for i in range(train_coord.shape[0]):
        train_map[train_coord[i, 0],
                  train_coord[i, 1]] = data_map[train_coord[i, 0],
                                                train_coord[i, 1]]
    for i in range(test_coord.shape[0]):
        test_map[test_coord[i, 0],
                 test_coord[i, 1]] = data_map[test_coord[i, 0], test_coord[i,
                                                                           1]]

    # data preprocessin
    data = (
        (data - np.min(data[train_map != 0])) /
        np.max(data[train_map != 0] - np.min(data[train_map != 0])) - 0.5) * 2
    zca.fit(data[train_map != 0])
    data = zca.transform(data.reshape(-1,
                                      nch)).reshape(data.shape[0],
                                                    data.shape[1],
                                                    data.shape[2])

    # padding the HSI scene and the label map
    data = cv2.copyMakeBorder(data, patch_size // 2, patch_size // 2,
                              patch_size // 2, patch_size // 2,
                              cv2.BORDER_REPLICATE)
    train_map = cv2.copyMakeBorder(train_map, patch_size // 2, patch_size // 2,
                                   patch_size // 2, patch_size // 2,
                                   cv2.BORDER_REPLICATE)
    test_map = cv2.copyMakeBorder(test_map, patch_size // 2, patch_size // 2,
                                  patch_size // 2, patch_size // 2,
                                  cv2.BORDER_REPLICATE)

    train_coord += patch_size // 2
    test_coord += patch_size // 2
    return data, train_map, train_coord, test_map, test_coord
Ejemplo n.º 4
0
class Sparsecode(BaseEstimator, TransformerMixin):
    def __init__(self, patch_file=None, patch_num=10000, patch_size=(16, 16),\
                n_components=384,  alpha = 1, n_iter=1000, batch_size=200):
        self.patch_num = patch_num
        self.patch_size = patch_size
        self.patch_file = patch_file
        
        self.n_components = n_components
        self.alpha = alpha #sparsity controlling parameter
        self.n_iter = n_iter
        self.batch_size = batch_size

    
    def fit(self, X=None, y=None):
        if self.patch_file is None:
            num = self.patch_num // X.size
            data = []
            for item in X:
                img = imread(str(item[0]))
                img = img_as_ubyte(rgb2gray(img))
                #img = self.binary(img) # 二值化
                tmp = extract_patches_2d(img, self.patch_size, max_patches = num,\
                                        random_state=np.random.RandomState())
                data.append(tmp)
            
            data = np.vstack(data)
            data = data.reshape(data.shape[0], -1)
            data = np.asarray(data, 'float32')
        else:
            data = np.load(self.patch_file,'r+') # load npy file, 注意模式,因为后面需要修改
        
        data = np.require(data, dtype=np.float32)
        
        # Standardization
        #logging.info("Pre-processing : Standardization...")
        #self.standard = StandardScaler()
        #data = self.standard.fit_transform(data)
            
        # whiten
        #logging.info("Pre-processing : PCA Whiten...")
        #self.pca = RandomizedPCA(copy=True, whiten=True)
        #data = self.pca.fit_transform(data)
        
        # whiten
        logging.info("Pre-processing : ZCA Whiten...")
        self.zca = ZCA()
        data = self.zca.fit_transform(data)
        
        # 0-1 scaling 都可以用preprocessing模块实现
        #self.minmax = MinMaxScaler()
        #data = self.minmax.fit_transform(data)
        
        """k-means
        self.kmeans = MiniBatchKMeans(n_clusters=self.n_components, init='k-means++', \
                                    max_iter=self.n_iter, batch_size=self.batch_size, verbose=1,\
                                    tol=0.0, max_no_improvement=100,\
                                    init_size=None, n_init=3, random_state=np.random.RandomState(0),\
                                    reassignment_ratio=0.0001)
        logging.info("Sparse coding : Phase 1 - Codebook learning (K-means).")
        self.kmeans.fit(data)
        
        logging.info("Sparse coding : Phase 2 - Define coding method (omp,lars...).")
        self.coder = SparseCoder(dictionary=self.kmeans.cluster_centers_, 
                                 transform_n_nonzero_coefs=256,
                                 transform_alpha=None, 
                                 transform_algorithm='lasso_lars',
                                 n_jobs = 1)
        """
        #'''genertic
        logging.info("Sparse coding...")
        self.coder = MiniBatchDictionaryLearning(n_components=self.n_components, \
                                           alpha=self.alpha, n_iter=self.n_iter, \
                                           batch_size =self.batch_size, verbose=True)
        self.coder.fit(data)
        self.coder.transform_algorithm = 'omp'
        self.coder.transform_alpha = 0.1 # omp情况下,代表重建的误差
        #'''
        return self
    
    def transform(self, X):
        #whiten
        #X_whiten = self.pca.transform(X)
        logging.info("Compute the sparse coding of X.")
        X = np.require(X, dtype=np.float32)
        
        #TODO: 是否一定需要先fit,才能transform
        #X = self.minmax.fit_transform(X)
        
        # -mean/std and whiten
        #X = self.standard.transform(X)
        #X = self.pca.transform(X)
        
        # ZCA
        X = self.zca.transform(X)

        # MiniBatchDictionaryLearning
        # return self.dico.transform(X_whiten)
        
        # k-means
        # TODO: sparse coder method? problem...
        return self.coder.transform(X)
        
    
    def get_params(self, deep=True):
        return {"patch_num": self.patch_num,
                "patch_size":self.patch_size,
                "alpha":self.alpha,
                "n_components":self.n_components,
                "n_iter":self.n_iter,
                "batch_size":self.batch_size}
                
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            self.__setattr__(parameter, value)
        return self