def cluster_sk_mini_batch_dictionary_learning(content): """ x """ _config = MiniBatchDictionaryLearning( n_components=content['n_components'], alpha=content['alpha'], n_iter=content['n_iter'], fit_algorithm=content['fit_algorithm'], n_jobs=1, batch_size=content['batch_size'], shuffle=content['shuffle'], dict_init=None, transform_algorithm=content['transform_algorithm'], transform_n_nonzero_coefs=None, transform_alpha=None, verbose=False, split_sign=content['split_sign'], random_state=None) _result = _config.fit_transform(content['data']) return httpWrapper( json.dumps( { 'result': _result.tolist(), 'components': _config.components_.tolist(), 'iter': _config.n_iter_ }, ignore_nan=True))
def learn_sparse_components3(shapes, n_components, lmbda, batch_size, transform_n_nonzero_coefs, fit_algorithm, n_iter=5000): """Learn sparse components from a dataset of shapes.""" n_shapes = len(shapes) # Learn sparse components and predict coefficients for the dataset dl = MiniBatchDictionaryLearning( n_components=n_components, alpha=lmbda, batch_size=batch_size, n_iter=n_iter, transform_n_nonzero_coefs=transform_n_nonzero_coefs, verbose=1, fit_algorithm=fit_algorithm, transform_algorithm='lasso_cd', positive_code=True) dl.coefficients = dl.fit_transform(shapes) # Compute frequency of activations and argsort # (but do not apply argsort as we would also need to sort coefficients and all inner # stats of the sklearn object) dl.frequencies = np.count_nonzero(dl.coefficients.T, axis=1) / n_shapes dl.argsort_freqs = np.argsort(-dl.frequencies) return dl
def ML_DL(X_train, n_components, alpha, batch_size, n_iter, random_state): from sklearn.decomposition import MiniBatchDictionaryLearning import pandas as pd dl = MiniBatchDictionaryLearning(n_components=n_components, alpha=alpha, batch_size=batch_size, n_iter=n_iter, random_state=random_state) X_train_PCA = dl.fit_transform(X_train) X_train_PCA = pd.DataFrame(data=X_train_PCA) return X_train_PCA
def dictionary_learning(self, train_data, test_data, components=100, save_fig=True, save_model=True, save_name=''): """ Learns a dictionary from train data and applies it to train and test data. :param train_data: Image batch in (x,y,z,1) grayscale format (train) :param test_data: Image batch in (x,y,z,1) grayscale format (test) :param components: Number of atoms in dictionary to be extracted :param save_fig: If true 9 random components are plotted :param save_model: If true fitted dictionary model is saved as pickle file :param save_name: Name of the pickle file if save_model=True :return: returns transformed train and test data in (x,components) format """ print("[INFO] Starting Dictionary Learning") height, width = train_data.shape[1], train_data.shape[2] train_data = train_data.reshape( train_data.shape[0], train_data.shape[1] * train_data.shape[2]) test_data = test_data.reshape(test_data.shape[0], test_data.shape[1] * test_data.shape[2]) dictionary = MiniBatchDictionaryLearning(n_components=components) train_data_dl = dictionary.fit_transform(train_data) test_data_dl = dictionary.transform(test_data) if save_model: dict_results = {} dict_results["model"] = dictionary dict_results["train_data"] = train_data_dl dict_results["test_data"] = test_data_dl save_path = "learning_output/" + save_name + "_dictionary_learning.pickle" with open(save_path, "wb") as output_file: pickle.dump(dict_results, output_file) if save_fig: components = dictionary.components_ index = np.random.choice(components.shape[0], 9, replace=False) images_to_plot = components[index] images_to_plot = images_to_plot.reshape(9, height, width) self._save_image(images_to_plot, "DL_components") print("[INFO] Finished Dictionary Learning") return train_data_dl, test_data_dl
from sklearn.decomposition import MiniBatchDictionaryLearning n_components = 50 alpha = 1 batch_size = 200 n_iter = 25 random_state = 2018 miniBatchDictLearning = MiniBatchDictionaryLearning(n_components=n_components, alpha=alpha, batch_size=batch_size, n_iter=n_iter, random_state=random_state) miniBatchDictLearning.fit(X_train.loc[:, :10000]) X_train_miniBatchDictLearning = miniBatchDictLearning.fit_transform(X_train) X_train_miniBatchDictLearning = pd.DataFrame( data=X_train_miniBatchDictLearning, index=train_index) X_validation_miniBatchDictLearning = miniBatchDictLearning.transform( X_validation) X_validation_miniBatchDictLearning = pd.DataFrame( data=X_validation_miniBatchDictLearning, index=validation_index) scatterPlot(X_train_miniBatchDictLearning, y_train, "Mini-batch Dictionary Learning") # In[ ]: # Independent Component Analysis from sklearn.decomposition import FastICA
# plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) # #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) # #plt.imshow(V) #plt.gray() #plt.show() transform_algorithms = [('omp', {'transform_n_nonzero_coefs': 3})] for transform_algorithm, kwargs in transform_algorithms: dico.set_params(transform_algorithm=transform_algorithm, **kwargs) Q = dico.fit_transform(patches_l) # sparse representation dt = time.time() - t0 print('done in %.2fs.' % dt) # Dictionary learning on high resolution patches print('Learning the high resolution dictionary...') t0 = time.time() Q_t = np.transpose(Q) P_t = np.transpose(patches_h) dictionary_h = P_t @ Q @ (np.linalg.pinv(Q_t @ Q)) dt = time.time() - t0 print('done in %.2fs.' % dt)
cells_per_block=(1, 1)) X_valid_final[i, :] = temp_fd # In[19]: np.shape(X_train) # In[20]: #Feature extraction using Dictionary Learning #Training data print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=10, alpha=1, n_iter=100) X_train_dict = dico.fit_transform(X_train.T) np.shape(X_train_dict) dt = time() - t0 print('done in %.2fs.' % dt) # In[21]: #Express test data in terms of (Dictionary) learned features X_test_dict = dico.transform(X_test.T) np.shape(X_test_dict) # In[22]: #Express validation data in terms of (Dictionary) learned features X_valid_dict = dico.transform(X_valid.T) np.shape(X_valid_dict)
def preprocess(parser, args): edf_path = args.edf video_path = args.video if not os.path.exists(edf_path): parser.error("The file %s does not exist!" % edf_path) elif not os.path.exists(video_path): parser.error("The file %s does not exist!" % video_path) else: global EDFPATH EDFPATH = os.path.normpath(edf_path) global VIDEOPATH VIDEOPATH = os.path.normpath(video_path) copyfile(VIDEOPATH, os.path.join(os.curdir, 'static', 'v.mp4')) global FR clip = e.VideoFileClip(VIDEOPATH) # print('video frame rate: ', FR) FR = clip.fps close_clip(clip) print(f"file {EDFPATH} is chosen") f = pyedflib.EdfReader(EDFPATH) equipment = f.getEquipment() print(f'equipment: {equipment}') print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) print('channel names:') global MONO channel_names = f.getSignalLabels() MONO = [channel_names.index(ch) for ch in natural_order] print(channel_names) global DUAL DUAL = [channel_names.index(ch) for ch in dual_order] print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) global SPS fs = f.getSampleFrequencies()[0] SPS = fs print(f'Sampling Frequency:{fs}') dur = f.getFileDuration() sample_count = f.getNSamples()[0] print( f"dur: {dur}, samples:{sample_count}, effective fs:{sample_count/dur}" ) print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) # n = ceiling (sRate * recordStartTime) + 1 a = f.read_annotation() a = [[ np.ceil(fs * (anot[0] / 10000000)).astype(np.int) + 1, int(anot[2].split(b'#')[1]) ] for anot in a] global START_SAMPLE START_SAMPLE = a[0][0] - np.ceil(fs * (a[0][1] / FR)).astype(np.int) print(f'Annotations:') print(f'start: {a[0]}, end: {a[-1]}') print(f'video starts at sample {START_SAMPLE}') print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) print("Processing ... ") n = f.signals_in_file edf = np.zeros((n, f.getNSamples()[0])) for i in np.arange(n): edf[i, :] = f.readSignal(i) edf = edf[:32, :] channel_count, num_samples = edf.shape print(f"file is processed to an array of shape {edf.shape}") print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) print("Rereferencing and standardizing ...") # dc component of the signal edf = edf - np.mean(edf, axis=1).reshape( channel_count, 1) # all channels shifted to avg=0 # finding the average reference edf = edf - np.mean(edf, axis=0).reshape( 1, num_samples) # rereferencing all channels on average # standardizing edf = edf / (np.std(edf, axis=1).reshape(channel_count, 1)) edf = edf.transpose() global EDF EDF = edf del edf gc.collect() print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) # print("Precomputing Splines ... ") # mono_points = 500 # print(EDF.shape) # monosplines = np.zeros((EDF.shape[0],mono_points)) # def spliner(l,d=11,amp=1.5,dir=1,offset=0,res=500): # channels = len(l) # X=np.zeros(channels) # Y=np.zeros(channels) # for i,p in enumerate(l): # theta = (i/channels)*2*np.pi # r = 0.5+(d+amp*p) # X[i] = offset+dir*r*np.cos(theta) # Y[i] = r*np.sin(theta) # X = np.r_[X, X[0]] # Y = np.r_[Y, Y[0]] # tck, u = interpolate.splprep([X, Y], s=0, per=True) # X, Y = interpolate.splev(np.linspace(0, 1, res), tck) # Z=np.zeros(res) # return np.stack([X,Y,Z],axis=1) # monosplines = np.array([spliner(sample) for sample in EDF[:,MONO]]) # dualRsplines = [spliner(sample,d=5.5,offset=10) for sample in EDF[:,DUAL[:len(DUAL)//2]]] # dualLsplines = [spliner(sample,d=5.5,offset=-10, dir=-1) for sample in EDF[:,DUAL[len(DUAL)//2:]]] # print(f"the shape of the splines: {monosplines.shape}") print( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) print("dimensionality reduction ... ") model = MiniBatchDictionaryLearning(n_components=3, alpha=0.1, n_iter=50, batch_size=30, random_state=0, positive_dict=True) # model = FastICA(n_components=3, random_state=0) global W W = model.fit_transform(EDF) W = W - np.mean(W, axis=0).reshape(1, 3) W = 0.5 + 0.5 * W / np.std(W, axis=0).reshape(1, 3) print( f'min: {np.min(W)}, max: {np.max(W)}, mean: {np.mean(W)}, std: {np.std(W)}' ) print('file processed. Data is ready to be served')