예제 #1
1
def test_dict_learning_online_positivity(transform_algorithm,
                                         positive_code,
                                         positive_dict):
    rng = np.random.RandomState(0)
    n_components = 8

    dico = MiniBatchDictionaryLearning(
        n_components, transform_algorithm=transform_algorithm, random_state=0,
        positive_code=positive_code, positive_dict=positive_dict).fit(X)
    code = dico.transform(X)
    if positive_dict:
        assert_true((dico.components_ >= 0).all())
    else:
        assert_true((dico.components_ < 0).any())
    if positive_code:
        assert_true((code >= 0).all())
    else:
        assert_true((code < 0).any())

    code, dictionary = dict_learning_online(X, n_components=n_components,
                                            alpha=1, random_state=rng,
                                            positive_dict=positive_dict,
                                            positive_code=positive_code)
    if positive_dict:
        assert_true((dictionary >= 0).all())
    else:
        assert_true((dictionary < 0).any())
    if positive_code:
        assert_true((code >= 0).all())
    else:
        assert_true((code < 0).any())
예제 #2
0
def test_dict_learning_online_readonly_initialization():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)
    V.setflags(write=False)
    MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V,
                                random_state=0, shuffle=False).fit(X)
예제 #3
0
def learn_sparse_components3(shapes,
                             n_components,
                             lmbda,
                             batch_size,
                             transform_n_nonzero_coefs,
                             fit_algorithm,
                             n_iter=5000):
    """Learn sparse components from a dataset of shapes."""
    n_shapes = len(shapes)
    # Learn sparse components and predict coefficients for the dataset
    dl = MiniBatchDictionaryLearning(
        n_components=n_components,
        alpha=lmbda,
        batch_size=batch_size,
        n_iter=n_iter,
        transform_n_nonzero_coefs=transform_n_nonzero_coefs,
        verbose=1,
        fit_algorithm=fit_algorithm,
        transform_algorithm='lasso_cd',
        positive_code=True)
    dl.coefficients = dl.fit_transform(shapes)
    # Compute frequency of activations and argsort
    # (but do not apply argsort as we would also need to sort coefficients and all inner
    # stats of the sklearn object)
    dl.frequencies = np.count_nonzero(dl.coefficients.T, axis=1) / n_shapes
    dl.argsort_freqs = np.argsort(-dl.frequencies)
    return dl
def test_dict_learning_online_estimator_shapes():
    n_components = 5
    dico = MiniBatchDictionaryLearning(
        n_components, batch_size=4, max_iter=5, random_state=0
    )
    dico.fit(X)
    assert dico.components_.shape == (n_components, n_features)
예제 #5
0
 def fit(self, X, y=None):
     # compute the codes
     print 'Extracting patchs...'
     patchs = []
     num = self.patch_num // X.size
     for x in X:
         img = imread(str(x[0]))
         tmp = extract_patches_2d(img, (self.patch_size,self.patch_size), \
                                  max_patches=num, random_state=np.random.RandomState())
         patchs.append(tmp)
     data = np.vstack(patchs)
     data = data.reshape(data.shape[0], -1)
     
     data -= np.mean(data, axis=0)
     data = data/np.std(data, axis=0)
     
     print 'Learning codebook...'
     if self.method == 'sc':
         self.dico = MiniBatchDictionaryLearning(n_components=self.codebook_size, \
                                            alpha=1, n_iter=100, batch_size =100, verbose=True)
         self.dico.fit(data)
     elif self.method=='km':
         # self.dico = MiniBatchKMeans(n_clusters=self.codebook_size)
         pass
     
     return self
예제 #6
0
def dict_learn(y, n_components=16, n_iter=n_iter):
    n = np.shape(y)[0]
    dico = MiniBatchDictionaryLearning(n_components=n_components,
                                       n_iter=n_iter,
                                       alpha=0.5,
                                       fit_algorithm=fit_algorithm,
                                       transform_algorithm=transform_algorithm,
                                       transform_n_nonzero_coefs=transform_n_nonzero_coefs)
    v = dico.fit(y).components_
    A, B = dico.inner_stats_
    #print(A)
    #print(B)
    x = dico.transform(y)
    #print("y:",y)
    #print("x,",x)
    #print("dic:",v)

    a = []
    nz = np.nonzero(x)
    #print(np.nonzero(x))
    #print(np.where(nz[0] == 1))
    #print(len(nz), n*2)
    index = zip(nz[0], nz[1])
    #print(index)
    for i in index:
        a.append(x[i[0], i[1]])

    res = np.dot(x,v)
    error = np.mean(np.linalg.norm(y-res, axis=1))/10.0
    #error = 0
    #print(res[0][:12])
    #print(y[:12,0])
    #print(error)
    #print(index)
    return v, index, a, error
예제 #7
0
def test_dict_learning_online_overcomplete():
    n_components = 12
    dico = MiniBatchDictionaryLearning(n_components,
                                       batch_size=4,
                                       max_iter=5,
                                       random_state=0).fit(X)
    assert dico.components_.shape == (n_components, n_features)
예제 #8
0
def main(games_path=None):

    if games_path == None:
        games_path = 'specmine/data/go_games/2010-01.pickle.gz'

    with specmine.util.openz(games_path) as games_file:
        games = pickle.load(games_file)

    boards = None  # numpy array nx9x9
    for game in games:
        if boards == None:
            boards = games[game].grids
        else:
            boards = numpy.vstack((boards, games[game].grids))

    print 'boards shape: ', boards.shape

    boards = boards.reshape((boards.shape[0], -1))

    print 'boards reshaped: ', boards.shape

    print 'Learning the dictionary... '
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_atoms=100, alpha=1, n_iter=500)
    V = dico.fit(boards).components_
    dt = time() - t0
    print 'done in %.2fs.' % dt

    #pl.figure(figsize=(4.2, 4))
    for i, comp in enumerate(V[:100]):
        pl.subplot(10, 10, i + 1)
        pl.imshow(comp, cmap=pl.cm.gray_r)  # interpolation='nearest')
        pl.xticks(())
        pl.yticks(())
예제 #9
0
def generar_diccionario(s, number_atoms, number_samples, length):
    print('Generando el diccionario...')

    #recortamos las señales a 2048 muestras
    s = cut_signals(s, number_samples)

    #creamos la matriz A de number_of_patches filas y length columnas
    A = extract_patches_1D(s, length, number_atoms, number_samples)

    #normalizamos los patches
    for i in range(0, number_atoms):
        A[i] -= np.mean(A[i])
        A[i] /= np.std(A[i])

    #entrenamos el diccionario
    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=number_atoms,
                                       alpha=1.2 / np.sqrt(number_atoms),
                                       n_iter=1000,
                                       batch_size=4,
                                       fit_algorithm='lars',
                                       transform_algorithm='lasso_lars')

    V = dico.fit(A).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    #guardo el diccionario para cargarlo directamente
    np.save("DICT", V)
    return (A, V)
def learn_dictionary(X, n_filters, filter_size, n_sample=1000,
                     n_sample_patches=0, **kwargs):
    """
    learn a dictionary of n_filters atoms from n_sample images from X
    """

    n_channels = X.shape[1]

    # subsample n_sample images randomly
    rand_idx = np.random.choice(len(X), n_sample, replace=False)

    # extract patches
    patch_size = (filter_size, filter_size)
    patches = PatchExtractor(patch_size).transform(
        X[rand_idx, ...].reshape(n_sample, X.shape[2], X.shape[3], X.shape[1]))
    patches = patches.reshape(patches.shape[0], -1)
    patches -= np.mean(patches, axis=0)
    patches /= np.std(patches, axis=0)

    if n_sample_patches > 0 and (n_sample_patches < len(patches)):
        np.random.shuffle(patches)
        patches = patches[:n_sample_patches, ...]

    # learn dictionary
    print('Learning dictionary for weight initialization...')

    dico = MiniBatchDictionaryLearning(n_components=n_filters, alpha=1, n_iter=1000, batch_size=10, shuffle=True,
                                       verbose=True, **kwargs)
    W = dico.fit(patches).components_
    W = W.reshape(n_filters, n_channels, filter_size, filter_size)

    print('Dictionary learned.')

    return W.astype(np.float32)
예제 #11
0
def test_dict_learning_online_initialization():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=0,
                                       dict_init=V, random_state=0).fit(X)
    assert_array_equal(dico.components_, V)
예제 #12
0
파일: dsc.py 프로젝트: XckCodeDD/nilmtk-dl
    def learn_dictionary(self, appliance_main, app_name):

        if appliance_main.size % self.shape != 0:
            extra_values = self.shape - (appliance_main.size) % (self.shape)
            appliance_main = list(
                appliance_main.values.flatten()) + [0] * extra_values
        appliance_main = np.array(appliance_main).reshape((-1, self.shape)).T
        self.power[app_name] = appliance_main

        if app_name not in self.dictionaries:
            print("Training First dictionary for ", app_name)
            model = MiniBatchDictionaryLearning(
                n_components=self.n_components,
                positive_code=True,
                positive_dict=True,
                transform_algorithm='lasso_lars',
                alpha=self.sparsity_coef)

        else:
            print("Re-training dictionary for ", app_name)
            model = self.dictionaries[app_name]
        model.fit(appliance_main.T)
        reconstruction = np.matmul(model.components_.T,
                                   model.transform(appliance_main.T).T)
        print("RMSE reconstruction for appliance %s is %s" %
              (app_name, mean_squared_error(reconstruction, appliance_main)
               **(.5)))
        self.dictionaries[app_name] = model
예제 #13
0
def cluster_sk_mini_batch_dictionary_learning(content):
    """ x """
    _config = MiniBatchDictionaryLearning(
        n_components=content['n_components'],
        alpha=content['alpha'],
        n_iter=content['n_iter'],
        fit_algorithm=content['fit_algorithm'],
        n_jobs=1,
        batch_size=content['batch_size'],
        shuffle=content['shuffle'],
        dict_init=None,
        transform_algorithm=content['transform_algorithm'],
        transform_n_nonzero_coefs=None,
        transform_alpha=None,
        verbose=False,
        split_sign=content['split_sign'],
        random_state=None)
    _result = _config.fit_transform(content['data'])
    return httpWrapper(
        json.dumps(
            {
                'result': _result.tolist(),
                'components': _config.components_.tolist(),
                'iter': _config.n_iter_
            },
            ignore_nan=True))
예제 #14
0
def test_minibatch_dictionary_learning_warns_and_ignore_n_iter():
    """Check that we always raise a warning when `n_iter` is set even if it is
    ignored if `max_iter` is set.
    """
    warn_msg = "'n_iter' is deprecated in version 1.1"
    with pytest.warns(FutureWarning, match=warn_msg):
        model = MiniBatchDictionaryLearning(batch_size=256, n_iter=2, max_iter=2).fit(X)
    assert model.n_iter_ == 2
def test_batch_size_default_value_future_warning():
    # Check that a FutureWarning is raised if batch_size is left to its default value.
    # FIXME: remove in 1.3
    msg = "The default value of batch_size will change"
    with pytest.warns(FutureWarning, match=msg):
        dict_learning_online(X, n_components=2, random_state=0)

    with pytest.warns(FutureWarning, match=msg):
        MiniBatchDictionaryLearning(n_components=2, random_state=0).fit(X)
예제 #16
0
 def init_core_model(self):
     if hasattr(self, 'model'):
         del self.model
         import gc
         gc.collect()
     self.model = MiniBatchDictionaryLearning(
         n_components=self.n_components, alpha=self.alpha, n_jobs=16,
         n_iter=self.n_iter, batch_size=self.batch_size,
         fit_algorithm='lars', transform_algorithm='omp', verbose=True)
예제 #17
0
def test_dict_learning_online_partial_fit():
    # this test was not actually passing before!
    raise SkipTest
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    dico1 = MiniBatchDictionaryLearning(n_components, n_iter=10, batch_size=1,
                                        shuffle=False, dict_init=V,
                                        random_state=0).fit(X)
    dico2 = MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V,
                                        random_state=0)
    for ii, sample in enumerate(X):
        dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter)
        # if ii == 1: break
    assert_true(not np.all(sparse_encode(X, dico1.components_, alpha=100) ==
                           0))
    assert_array_equal(dico1.components_, dico2.components_)
예제 #18
0
 def extract_codes(self, X, standardize=False):
     self.standardize = standardize
     self._extract_data_patches(X)
     self.dico = MiniBatchDictionaryLearning(n_components=self.n_components,
                                             alpha=1,
                                             n_iter=500)
     print "Dictionary learning from data..."
     self.D = self.dico.fit(self.data)
     return self
예제 #19
0
def learn_dictionary(patch_size, step, plot_dictionary=False, *args):
    """
    Function that normalizes the patches, learns a dictionary on them and plots it
    
    Parameters
    ----------
    patch_size: (int, int), the size of the patches to be extracted from the images
    step: int, the step of the moving patches, overlap of patches = patch_size - step
    plot_dictionary: boolean, False by default, plots the dictionary if True

    Return
    -----------
    dico: a dictionary (a set of atoms) that can best be used to represent data using a sparse code
    V: array, [n_components, n_features], the components of the fitted data
    """

    argCount = len(args)
    assert argCount > 0, 'no image to extract the patches from'

    global initial_patch_size, all_patches
    print(f'Extracting reference patches from {argCount} images...')
    t0 = time()
    for image in args:
        patches = patchify(image, patch_size, step)
        initial_patch_size = patches.shape
        patches = patches.reshape(-1, patch_size[0] * patch_size[1])
        all_patches.append(patches)
    dt = time() - t0
    print('done in %.2fs.' % dt)
    #return all_patches

    all_patches = np.reshape(all_patches, (-1, patch_size[0] * patch_size[1]))
    all_patches -= np.mean(all_patches, axis=0)  # remove the mean
    all_patches /= np.std(all_patches, axis=0)  # normalize each patch

    print('Learning the dictionary...')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=400)
    V = dico.fit(all_patches).components_
    dt = time() - t0
    print('done in %.2fs.' % dt)

    if plot_dictionary == True:
        # plotting the dictionary
        plt.figure(figsize=(4.2, 4))
        for i, comp in enumerate(V[:100]):
            plt.subplot(10, 10, i + 1)
            plt.imshow(comp.reshape(patch_size),
                       cmap=plt.cm.gray_r,
                       interpolation='nearest')
            plt.xticks(())
            plt.yticks(())
        plt.suptitle('Dictionary learned from patches')
        plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

    return dico, V
예제 #20
0
def test_dict_learning_iter_offset():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)
    dict1 = MiniBatchDictionaryLearning(n_components,
                                        n_iter=10,
                                        dict_init=V,
                                        random_state=0,
                                        shuffle=False)
    dict2 = MiniBatchDictionaryLearning(n_components,
                                        n_iter=10,
                                        dict_init=V,
                                        random_state=0,
                                        shuffle=False)
    dict1.fit(X)
    for sample in X:
        dict2.partial_fit(sample[np.newaxis, :])

    assert dict1.iter_offset_ == dict2.iter_offset_
예제 #21
0
def test_dict_learning_online_partial_fit():
    n_components = 12
    rng = np.random.RandomState(0)
    V = rng.randn(n_components, n_features)  # random init
    V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
    dict1 = MiniBatchDictionaryLearning(n_components, n_iter=10 * len(X),
                                        batch_size=1,
                                        alpha=1, shuffle=False, dict_init=V,
                                        random_state=0).fit(X)
    dict2 = MiniBatchDictionaryLearning(n_components, alpha=1,
                                        n_iter=1, dict_init=V,
                                        random_state=0)
    for i in range(10):
        for sample in X:
            dict2.partial_fit(sample[np.newaxis, :])

    assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0)
    assert_array_almost_equal(dict1.components_, dict2.components_,
                              decimal=2)
예제 #22
0
def test_dict_learning_online_verbosity():
    n_components = 5
    # test verbosity
    from sklearn.externals.six.moves import cStringIO as StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1,
                                       random_state=0)
    dico.fit(X)
    dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2,
                                       random_state=0)
    dico.fit(X)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=1,
                         random_state=0)
    dict_learning_online(X, n_components=n_components, alpha=1, verbose=2,
                         random_state=0)
    sys.stdout = old_stdout
    assert_true(dico.components_.shape == (n_components, n_features))
예제 #23
0
def scskl_dico_learning(list_pickled_array, n_atoms, maxepoch=5, maxiter=100):
    D = None
    for e in range(maxepoch):
        for a in list_pickled_array:
            data = joblib.load(a)
            dico = MiniBatchDictionaryLearning(n_components=n_atoms,
                                               n_iter=maxiter,
                                               dict_init=D)
            D = dico.fit(data).components_.astype(np.float32)
    return D
def test_minibatch_dict_learning_partial_fit_iter_offset_deprecated():
    # check the deprecation warning of iter_offset in partial_fit
    # FIXME: remove in 1.3
    depr_msg = (
        "'iter_offset' is deprecated in version 1.1 and will be removed in version 1.3"
    )
    est = MiniBatchDictionaryLearning(n_components=2, batch_size=4, random_state=0)

    with pytest.warns(FutureWarning, match=depr_msg):
        est.partial_fit(X, iter_offset=0)
def to_sparse(X, dim):

    sparse_dict = MiniBatchDictionaryLearning(dim)
    sparse_dict.fit(X)
    sparse_vectors = sparse_encode(X, sparse_dict.components_)

    for i in sparse_vectors:
        print i

    return sparse_vectors
def getOverallDict():
    classes = pickle.load(open(path_classes, "rb"))
    class_names = pickle.load(open(path_class_names, "rb"))
    i = 0
    X = np.zeros((0, patch_size))
    dictionary = MiniBatchDictionaryLearning(n_components=35,
                                             alpha=20,
                                             n_iter=700,
                                             n_jobs=3)
    for label in classes:
        class_name = class_names[label]
        X_class = []
        if class_name in ["artifacts", "artifacts_edge"]:
            continue
        for img_id in classes[label]:
            img = imread(path_train_cropped + "/" + img_id + ".jpg") / 255

            (h, w) = img.shape
            if h < patch_dim[0]:
                temp = np.ones((patch_dim[0], w))
                temp[:h, :] = img
                img = temp
                h = patch_dim[0]
            if w < patch_dim[1]:
                temp = np.ones((h, patch_dim[1]))
                temp[:, :w] = img
                img = temp

            # print(img.shape)

            img_patches = extract_patches_2d(img, patch_dim)
            # if img_patches.shape[0] < 10:
            # 	print(class_name+ "/" + file_name)
            # 	print(img.shape)
            # 	plt.imshow(img, cmap=cm.gray, interpolation='none')
            # 	plt.show()
            img_patches = img_patches.reshape(img_patches.shape[0], -1)
            old_shape = img_patches.shape
            img_patches = img_patches[~np.all(img_patches > 0.98, axis=1)]
            # if old_shape != img_patches.shape:
            # 	print((img_patches.shape,old_shape))
            # 	print("--------------------------")
            X_class += list(img_patches)
            i += 1
        label += 1
        if (len(X_class) > 33000):
            X_class = sample(X_class, 30000)
        # X_class = np.concatenate(X_class)
        print(str(label) + " --- " + str(len(X_class)), flush=True)
        X_class = np.array(X_class)
        X = np.concatenate([X, X_class], axis=0)
    print(X.shape)
    pickle.dump(X, open("../data/temp.dat", "wb"))
    dictionary.fit(X)
    return dictionary
def ksvd(noisy_data):

    print('Updating Dictionary')
    t0 = time()
    dico = MiniBatchDictionaryLearning(n_components=n_comp,
                                       alpha=2,
                                       n_iter=n_iter)
    #dict_init=D)
    print('done in %.2fs.' % (time() - t0))
    V = dico.fit(noisy_data).components_
    return V, dico
예제 #28
0
def loadDico(components_,
             transform_algorithm='lars',
             kwargs={'transform_n_nonzero_coefs': 5}):
    n_components = components_.shape[0]
    dico = MiniBatchDictionaryLearning(n_components=n_components,
                                       alpha=1,
                                       n_iter=500)
    dico.set_params(transform_algorithm=transform_algorithm, **kwargs)
    dico.components_ = components_
    dico.n_components_ = n_components
    return dico
예제 #29
0
def test_minibatch_dictionary_learning_lars(positive_dict):
    n_components = 8

    dico = MiniBatchDictionaryLearning(
        n_components, transform_algorithm="lars", random_state=0,
        positive_dict=positive_dict, fit_algorithm='cd').fit(X)

    if positive_dict:
        assert (dico.components_ >= 0).all()
    else:
        assert (dico.components_ < 0).any()
예제 #30
0
def fit_and_save(n_iter, alpha, data):
    dico = MiniBatchDictionaryLearning(
        n_components=500, alpha=alpha, n_iter=n_iter, n_jobs=4
    )  # 500 took 7.5 mins, 5000 should be an hour, 10000 took 10 mins
    t = time.clock()
    print('fitting...', n_iter, alpha)
    V = dico.fit(data)
    print('took', time.clock() - t, 'sec')
    np.save(
        'bases/bases_iters=' + str(n_iter) + '_alpha=' + str(alpha) + '.npy',
        V.components_)