예제 #1
0
def cluster_sk_mini_batch_dictionary_learning(content):
    """ x """
    _config = MiniBatchDictionaryLearning(
        n_components=content['n_components'],
        alpha=content['alpha'],
        n_iter=content['n_iter'],
        fit_algorithm=content['fit_algorithm'],
        n_jobs=1,
        batch_size=content['batch_size'],
        shuffle=content['shuffle'],
        dict_init=None,
        transform_algorithm=content['transform_algorithm'],
        transform_n_nonzero_coefs=None,
        transform_alpha=None,
        verbose=False,
        split_sign=content['split_sign'],
        random_state=None)
    _result = _config.fit_transform(content['data'])
    return httpWrapper(
        json.dumps(
            {
                'result': _result.tolist(),
                'components': _config.components_.tolist(),
                'iter': _config.n_iter_
            },
            ignore_nan=True))
예제 #2
0
def learn_sparse_components3(shapes,
                             n_components,
                             lmbda,
                             batch_size,
                             transform_n_nonzero_coefs,
                             fit_algorithm,
                             n_iter=5000):
    """Learn sparse components from a dataset of shapes."""
    n_shapes = len(shapes)
    # Learn sparse components and predict coefficients for the dataset
    dl = MiniBatchDictionaryLearning(
        n_components=n_components,
        alpha=lmbda,
        batch_size=batch_size,
        n_iter=n_iter,
        transform_n_nonzero_coefs=transform_n_nonzero_coefs,
        verbose=1,
        fit_algorithm=fit_algorithm,
        transform_algorithm='lasso_cd',
        positive_code=True)
    dl.coefficients = dl.fit_transform(shapes)
    # Compute frequency of activations and argsort
    # (but do not apply argsort as we would also need to sort coefficients and all inner
    # stats of the sklearn object)
    dl.frequencies = np.count_nonzero(dl.coefficients.T, axis=1) / n_shapes
    dl.argsort_freqs = np.argsort(-dl.frequencies)
    return dl
예제 #3
0
파일: mlml.py 프로젝트: hugingstar/ML
def ML_DL(X_train, n_components, alpha, batch_size, n_iter, random_state):
    from sklearn.decomposition import MiniBatchDictionaryLearning
    import pandas as pd
    dl = MiniBatchDictionaryLearning(n_components=n_components,
                                     alpha=alpha,
                                     batch_size=batch_size,
                                     n_iter=n_iter,
                                     random_state=random_state)
    X_train_PCA = dl.fit_transform(X_train)
    X_train_PCA = pd.DataFrame(data=X_train_PCA)
    return X_train_PCA
예제 #4
0
    def dictionary_learning(self,
                            train_data,
                            test_data,
                            components=100,
                            save_fig=True,
                            save_model=True,
                            save_name=''):
        """
        Learns a dictionary from train data and applies it to train and test data.
        :param train_data: Image batch in (x,y,z,1) grayscale format (train)
        :param test_data: Image batch in (x,y,z,1) grayscale format (test)
        :param components: Number of atoms in dictionary to be extracted
        :param save_fig: If true 9 random components are plotted
        :param save_model: If true fitted dictionary model is saved as pickle file
        :param save_name: Name of the pickle file if save_model=True
        :return: returns transformed train and test data in (x,components) format
        """
        print("[INFO] Starting Dictionary Learning")
        height, width = train_data.shape[1], train_data.shape[2]
        train_data = train_data.reshape(
            train_data.shape[0], train_data.shape[1] * train_data.shape[2])
        test_data = test_data.reshape(test_data.shape[0],
                                      test_data.shape[1] * test_data.shape[2])

        dictionary = MiniBatchDictionaryLearning(n_components=components)
        train_data_dl = dictionary.fit_transform(train_data)
        test_data_dl = dictionary.transform(test_data)

        if save_model:
            dict_results = {}
            dict_results["model"] = dictionary
            dict_results["train_data"] = train_data_dl
            dict_results["test_data"] = test_data_dl
            save_path = "learning_output/" + save_name + "_dictionary_learning.pickle"
            with open(save_path, "wb") as output_file:
                pickle.dump(dict_results, output_file)

        if save_fig:
            components = dictionary.components_
            index = np.random.choice(components.shape[0], 9, replace=False)
            images_to_plot = components[index]
            images_to_plot = images_to_plot.reshape(9, height, width)
            self._save_image(images_to_plot, "DL_components")

        print("[INFO] Finished Dictionary Learning")
        return train_data_dl, test_data_dl
예제 #5
0
from sklearn.decomposition import MiniBatchDictionaryLearning

n_components = 50
alpha = 1
batch_size = 200
n_iter = 25
random_state = 2018

miniBatchDictLearning = MiniBatchDictionaryLearning(n_components=n_components,
                                                    alpha=alpha,
                                                    batch_size=batch_size,
                                                    n_iter=n_iter,
                                                    random_state=random_state)

miniBatchDictLearning.fit(X_train.loc[:, :10000])
X_train_miniBatchDictLearning = miniBatchDictLearning.fit_transform(X_train)
X_train_miniBatchDictLearning = pd.DataFrame(
    data=X_train_miniBatchDictLearning, index=train_index)

X_validation_miniBatchDictLearning = miniBatchDictLearning.transform(
    X_validation)
X_validation_miniBatchDictLearning = pd.DataFrame(
    data=X_validation_miniBatchDictLearning, index=validation_index)

scatterPlot(X_train_miniBatchDictLearning, y_train,
            "Mini-batch Dictionary Learning")

# In[ ]:

# Independent Component Analysis
from sklearn.decomposition import FastICA
예제 #6
0
파일: srif.py 프로젝트: nmileva/srif
#    plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,
#               interpolation='nearest')
#    plt.xticks(())
#    plt.yticks(())
#
#plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
#
#plt.imshow(V)
#plt.gray()
#plt.show()

transform_algorithms = [('omp', {'transform_n_nonzero_coefs': 3})]

for transform_algorithm, kwargs in transform_algorithms:
    dico.set_params(transform_algorithm=transform_algorithm, **kwargs)
    Q = dico.fit_transform(patches_l)  # sparse representation

dt = time.time() - t0
print('done in %.2fs.' % dt)

# Dictionary learning on high resolution patches
print('Learning the high resolution dictionary...')
t0 = time.time()

Q_t = np.transpose(Q)
P_t = np.transpose(patches_h)
dictionary_h = P_t @ Q @ (np.linalg.pinv(Q_t @ Q))

dt = time.time() - t0
print('done in %.2fs.' % dt)
예제 #7
0
                  cells_per_block=(1, 1))

    X_valid_final[i, :] = temp_fd

# In[19]:

np.shape(X_train)

# In[20]:

#Feature extraction using Dictionary Learning
#Training data
print('Learning the dictionary...')
t0 = time()
dico = MiniBatchDictionaryLearning(n_components=10, alpha=1, n_iter=100)
X_train_dict = dico.fit_transform(X_train.T)
np.shape(X_train_dict)
dt = time() - t0
print('done in %.2fs.' % dt)

# In[21]:

#Express test data in terms of (Dictionary) learned features
X_test_dict = dico.transform(X_test.T)
np.shape(X_test_dict)

# In[22]:

#Express validation data in terms of (Dictionary) learned features
X_valid_dict = dico.transform(X_valid.T)
np.shape(X_valid_dict)
예제 #8
0
파일: app.py 프로젝트: NeoVand/Flower
def preprocess(parser, args):
    edf_path = args.edf
    video_path = args.video
    if not os.path.exists(edf_path):
        parser.error("The file %s does not exist!" % edf_path)
    elif not os.path.exists(video_path):
        parser.error("The file %s does not exist!" % video_path)
    else:
        global EDFPATH
        EDFPATH = os.path.normpath(edf_path)
        global VIDEOPATH
        VIDEOPATH = os.path.normpath(video_path)
        copyfile(VIDEOPATH, os.path.join(os.curdir, 'static', 'v.mp4'))
        global FR
        clip = e.VideoFileClip(VIDEOPATH)
        # print('video frame rate: ', FR)
        FR = clip.fps
        close_clip(clip)

        print(f"file {EDFPATH} is chosen")
        f = pyedflib.EdfReader(EDFPATH)

        equipment = f.getEquipment()
        print(f'equipment: {equipment}')

        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        print('channel names:')
        global MONO
        channel_names = f.getSignalLabels()
        MONO = [channel_names.index(ch) for ch in natural_order]
        print(channel_names)

        global DUAL
        DUAL = [channel_names.index(ch) for ch in dual_order]

        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        global SPS
        fs = f.getSampleFrequencies()[0]
        SPS = fs
        print(f'Sampling Frequency:{fs}')
        dur = f.getFileDuration()
        sample_count = f.getNSamples()[0]
        print(
            f"dur: {dur}, samples:{sample_count}, effective fs:{sample_count/dur}"
        )

        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        # n = ceiling (sRate * recordStartTime) + 1
        a = f.read_annotation()
        a = [[
            np.ceil(fs * (anot[0] / 10000000)).astype(np.int) + 1,
            int(anot[2].split(b'#')[1])
        ] for anot in a]
        global START_SAMPLE
        START_SAMPLE = a[0][0] - np.ceil(fs * (a[0][1] / FR)).astype(np.int)

        print(f'Annotations:')
        print(f'start: {a[0]}, end: {a[-1]}')
        print(f'video starts at sample {START_SAMPLE}')

        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        print("Processing ... ")
        n = f.signals_in_file
        edf = np.zeros((n, f.getNSamples()[0]))
        for i in np.arange(n):
            edf[i, :] = f.readSignal(i)
        edf = edf[:32, :]
        channel_count, num_samples = edf.shape
        print(f"file is processed to an array of shape {edf.shape}")

        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        print("Rereferencing and standardizing ...")
        # dc component of the signal
        edf = edf - np.mean(edf, axis=1).reshape(
            channel_count, 1)  # all channels shifted to avg=0
        # finding the average reference
        edf = edf - np.mean(edf, axis=0).reshape(
            1, num_samples)  # rereferencing all channels on average
        # standardizing
        edf = edf / (np.std(edf, axis=1).reshape(channel_count, 1))
        edf = edf.transpose()
        global EDF
        EDF = edf
        del edf
        gc.collect()
        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        # print("Precomputing Splines ... ")
        # mono_points = 500
        # print(EDF.shape)
        # monosplines = np.zeros((EDF.shape[0],mono_points))
        # def spliner(l,d=11,amp=1.5,dir=1,offset=0,res=500):
        #     channels = len(l)
        #     X=np.zeros(channels)
        #     Y=np.zeros(channels)
        #     for i,p in enumerate(l):
        #         theta = (i/channels)*2*np.pi
        #         r = 0.5+(d+amp*p)
        #         X[i] = offset+dir*r*np.cos(theta)
        #         Y[i] = r*np.sin(theta)
        #     X = np.r_[X, X[0]]
        #     Y = np.r_[Y, Y[0]]
        #     tck, u = interpolate.splprep([X, Y], s=0, per=True)
        #     X, Y = interpolate.splev(np.linspace(0, 1, res), tck)
        #     Z=np.zeros(res)
        #     return np.stack([X,Y,Z],axis=1)
        # monosplines = np.array([spliner(sample) for sample in EDF[:,MONO]])
        # dualRsplines  = [spliner(sample,d=5.5,offset=10) for sample in EDF[:,DUAL[:len(DUAL)//2]]]
        # dualLsplines  = [spliner(sample,d=5.5,offset=-10, dir=-1) for sample in EDF[:,DUAL[len(DUAL)//2:]]]

        # print(f"the shape of the splines: {monosplines.shape}")

        print(
            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        )
        print("dimensionality reduction ... ")
        model = MiniBatchDictionaryLearning(n_components=3,
                                            alpha=0.1,
                                            n_iter=50,
                                            batch_size=30,
                                            random_state=0,
                                            positive_dict=True)
        # model = FastICA(n_components=3, random_state=0)
        global W
        W = model.fit_transform(EDF)

        W = W - np.mean(W, axis=0).reshape(1, 3)
        W = 0.5 + 0.5 * W / np.std(W, axis=0).reshape(1, 3)
        print(
            f'min: {np.min(W)}, max: {np.max(W)}, mean: {np.mean(W)}, std: {np.std(W)}'
        )
        print('file processed. Data is ready to be served')