def test_FindWavKurt(self): N=16 fcut=0.4 level_index=11 freq_index=24 lev=self.level_w[level_index] c_dict=loadmat("test_data/c.mat") c_exp = c_dict['c'] S_dict=loadmat("test_data/S.mat") S_exp = S_dict['S'] # get bw and frequency (Hz) bw_hz, fc_hz, fi = getBandwidthAndFrequency(self.nlevel, self.Fs, self.level_w, self.freq_w, level_index, freq_index) # get basic filter parameters h, g, h1, h2, h3 = get_h_parameters(N, fcut) c,s,threshold,Bw,fc = Find_wav_kurt(self.x, h, g, h1, h2, h3, self.nlevel,lev, fi, self.Fs) S=getFTSquaredEnvelope(c) # do tests self.assertAlmostEqual(Bw*self.Fs,bw_hz) self.assertAlmostEqual(fc*self.Fs,fc_hz) np.testing.assert_allclose(c.flatten(),c_exp.flatten(),atol=1e-3) np.testing.assert_allclose(S.flatten(),S_exp.flatten(),atol=1e-6)
def test_FindWavKurt(self): from scipy.io.matlab import loadmat N = 16 fcut = 0.4 level_index = 11 freq_index = 24 lev = self.level_w[level_index] base_path = os.getenv("WAVELOC_PATH") matlab_file = os.path.join(base_path, "test_data", "c.mat") c_dict = loadmat(matlab_file) c_exp = c_dict["c"] matlab_file = os.path.join(base_path, "test_data", "S.mat") S_dict = loadmat(matlab_file) S_exp = S_dict["S"] # get bw and frequency (Hz) bw_hz, fc_hz, fi, l1 = getBandwidthAndFrequency( self.nlevel, self.Fs, self.level_w, self.freq_w, level_index, freq_index ) # get basic filter parameters h, g, h1, h2, h3 = get_h_parameters(N, fcut) c, s, threshold, Bw, fc = Find_wav_kurt(self.x, h, g, h1, h2, h3, self.nlevel, lev, fi, Fs=self.Fs) S = getFTSquaredEnvelope(c) # do tests self.assertAlmostEqual(Bw * self.Fs, bw_hz) self.assertAlmostEqual(fc * self.Fs, fc_hz) np.testing.assert_allclose(c.flatten(), c_exp.flatten(), atol=1e-3) np.testing.assert_allclose(S.flatten(), S_exp.flatten(), atol=1e-6)
def __get_excit_wfm(filepath): """ Returns the excitation BE waveform present in the more parms.mat file Parameters ------------ filepath : String / unicode Absolute filepath of the .mat parameter file Returns ----------- ex_wfm : 1D numpy float array Band Excitation waveform """ if not path.exists(filepath): warn('BEPSndfTranslator - NO more_parms.mat file found') return np.zeros(1000, dtype=np.float32) if 'more_parms' in filepath: matread = loadmat(filepath, variable_names=['FFT_BE_wave']) fft_full = np.complex64(np.squeeze(matread['FFT_BE_wave'])) bin_inds = None fft_full_rev = None else: matread = loadmat(filepath, variable_names=['FFT_BE_wave', 'FFT_BE_rev_wave', 'BE_bin_ind']) bin_inds = np.uint(np.squeeze(matread['BE_bin_ind'])) - 1 fft_full = np.complex64(np.squeeze(matread['FFT_BE_wave'])) fft_full_rev = np.complex64(np.squeeze(matread['FFT_BE_rev_wave'])) return fft_full, fft_full_rev, bin_inds
def test_srmr_slow(): fs = 16000 s = loadmat("test/test.mat")["s"][:,0] correct_ratios = loadmat("test/correct_ratios.mat")['correct_ratios'][0] srmr = SRMR(fs, fast=False) out = srmr.predict(s, s, s) ratio_slow, avg_energy_slow = out['p']['srmr'], out['avg_energy'] assert np.allclose(ratio_slow, correct_ratios[0], rtol=1e-6, atol=1e-12)
def test_srmr_norm(): fs = 16000 s = loadmat("test/test.mat")["s"][:,0] correct_ratios = loadmat("test/correct_ratios.mat")['correct_ratios'][0] srmr = SRMR(fs, fast=False, norm=True, max_cf=30) out = srmr.predict(s, s, s) ratio_norm, avg_energy_norm = out['p']['srmr'], out['avg_energy'] assert np.allclose(ratio_norm, correct_ratios[3], rtol=1e-6, atol=1e-12)
def timeseries_design(subject_id,whatParadigm,onsets_dir): import scipy.signal import scipy.special as sp import numpy as np import math from nipype.interfaces.base import Bunch from copy import deepcopy from scipy.io.matlab import loadmat import glob import os #from Facematch import onsets_dir print "Entered timeseries_design once with arguments SUBID = "+subject_id+", paradigm = "+whatParadigm+", and onsets dir = "+onsets_dir+"." output = [] regressor_names = None regressors = None onsets_temp = os.path.join(onsets_dir, subject_id+'*onsets.mat') onsets_files = sorted(glob.glob(onsets_temp)) testmat = loadmat(onsets_files[0], struct_as_record=False) testnames = testmat['names'][0] names_count_vec = np.zeros(len(testnames)) for r in range(len(onsets_files)): mat = loadmat(onsets_files[r], struct_as_record=False) ons = mat['onsets'][0] nam = mat['names'][0] dur = mat['durations'][0] names = [] durations = [] run_onsets = [] for condition in range(len(nam)): for onset in range(len(ons[condition][0])): names_count_vec[condition] += 1 names.append(str(nam[condition][0])+'_%d'%(names_count_vec[condition])) run_onsets.append([ons[condition][0][onset]]) durations.append(dur[condition][0]) print run_onsets print names print durations output.insert(r, Bunch(conditions=deepcopy(names), onsets=deepcopy(run_onsets), durations=deepcopy(durations), amplitudes=None, tmod=None, pmod=None, regressor_names=None, regressors=regressors)) #here is where we can do linear, quad, etc detrending return output
def main(argv): dim = 64 imidx = 7 # load unnormalized log-likelihood results = loadmat('results/vanhateren/poe/AIS_GibbsTrain_white_studentt_L=064_M=256_B=0100000_learner=PMPFdH1_20120523T112539.mat') loglik = -mean(results['E'][:, :10000]) - results['logZ'] # load importance weights for partition function ais_weights = loadmat('results/vanhateren/poe/matlab_up=022150_T=10000000_ais.mat')['logweights'] ais_weights.shape # number of samples to probe num_samples = 2**arange(0, ceil(log2(ais_weights.shape[0])) + 1, dtype='int32') num_samples[-1] = max([num_samples[-1], ais_weights.shape[0]]) num_repetitions = ceil(2.**16 / num_samples) estimates = [] print loadmat('results/vanhateren/poe/matlab_up=022150_T=10000000_ais.mat')['t_range'][:, imidx], 'intermediate distributions' logZ = logmeanexp(ais_weights[:, -1]) for k in arange(len(num_samples)): estimates_ = [] for _ in arange(num_repetitions[k]): # pick samples at random idx = permutation(ais_weights.shape[0])[:num_samples[k]] # estimate log-partf. using num_samples[k] samples loglik_ = loglik + (logZ - logmeanexp(ais_weights[idx, imidx])) # store estimate of log-likelihood estimates_.append(loglik_) estimates.append(mean(estimates_)) gca().width = 5 gca().height = 5 # gca().ymin = 0.85 # gca().ymax = 1.55 # ytick([0.9, 1.1, 1.3, 1.5]) semilogx(num_samples, estimates / log(2.) / dim, '.-') xlabel('number of AIS samples') ylabel('estimated log-likelihood') savefig('results/vanhateren/convergence_poe.tex') draw() return 0
def load_dataset(dataset): if dataset == 'umls': mat = loadmat('../data/%s/uml.mat' % (dataset)) T = np.array(mat['Rs'], np.float32) elif dataset == 'nation': mat = loadmat('../data/%s/dnations.mat' % (dataset)) T = np.array(mat['R'], np.float32) elif dataset == 'kinship': mat = loadmat('../data/%s/alyawarradata.mat' % (dataset)) T = np.array(mat['Rs'], np.float32) elif dataset == 'wordnet': T = pickle.load(open('../data/%s/reduced_wordnet.pkl' % (dataset), 'rb')) T[np.isnan(T)] = 0 return T
def preprocess_dataset(self, dataset, n_jobs=-1, verbosity=2): """ :param dataset: :param n_jobs: :return: """ if self.skip: return if verbosity > 1: print(" Loading masks from .mat file") data = loadmat(self.path) masks = data[self.var_name][0] if not self.invert: masks_probe = masks.take(range(0, masks.size, 2)) masks_gallery = masks.take(range(1, masks.size, 2)) else: masks_gallery = masks.take(range(1, masks.size, 2)) masks_probe = masks.take(range(0, masks.size, 2)) dataset.probe.masks_train = list(masks_probe[dataset.train_indexes]) dataset.probe.masks_test = list(masks_probe[dataset.test_indexes]) dataset.gallery.masks_train = list(masks_gallery[dataset.train_indexes]) dataset.gallery.masks_test = list(masks_gallery[dataset.test_indexes])
def read_mat_profile_files( path, loc, var, dataSetName='test', dataSetType='ms'): """Reads generic time series from matlab file and converts data to python format""" varToChar = {'salt': 's', 'elev': 'e', 'temp': 't', 'u': 'u', 'v': 'v'} pattern = os.path.join( path, dataSetName + '.' + dataSetType + '.' + varToChar[var] + '.' + loc + '.mat') fList = sorted(glob.glob(pattern)) if not fList: raise Exception('File not found: ' + pattern) f = fList[0] print 'Reading', f d = loadmat(f) t = d['t'].flatten() # (1,nTime) z = d['z'] # (nVert,nTime) data = d['data'] # (nVert,nTime) # convert time from Matlab datenum (in PST) to epoch (UTC) time = datenumPSTToEpoch(t) # round to nearest minute time = np.round(time / 60.) * 60. print ' Loaded data range: ', str(timeArray.epochToDatetime(time[0])), ' -> ', str(timeArray.epochToDatetime(time[-1])) return time, z, data
def test_rdop4_zero_rowscutoff(self): matfile = 'nastran_op4_data/r_c_rc.mat' filenames = glob('nastran_op4_data/*.op4') o4 = op4.OP4() o4._rowsCutoff = 0 m = matlab.loadmat(matfile) for filename in filenames: if filename.find('badname') > -1: with assert_warns(RuntimeWarning) as cm: dct = o4.dctload(filename) the_warning = str(cm.warning) assert 0 == the_warning.find('Output4 file has matrix ' 'name: 1mat') with assert_warns(RuntimeWarning) as cm: names, mats, forms, mtypes = o4.listload(filename) the_warning = str(cm.warning) assert 0 == the_warning.find('Output4 file has matrix ' 'name: 1mat') with assert_warns(RuntimeWarning) as cm: names2, sizes, forms2, mtypes2 = o4.dir(filename, verbose=False) the_warning = str(cm.warning) assert 0 == the_warning.find('Output4 file has matrix ' 'name: 1mat') else: dct = o4.dctload(filename) names, mats, forms, mtypes = o4.listload(filename) names2, sizes, forms2, mtypes2 = o4.dir(filename, verbose=False) assert sorted(dct.keys()) == sorted(names) assert names == names2 assert forms == forms2 assert mtypes == mtypes2 for mat, sz in zip(mats, sizes): assert mat.shape == sz for nm in dct: if nm[-1] == 's': matnm = nm[:-1] elif nm == '_1mat': matnm = 'rmat' else: matnm = nm assert np.allclose(m[matnm], dct[nm][0]) pos = names.index(nm) assert np.allclose(m[matnm], mats[pos]) assert dct[nm][1] == forms[pos] assert dct[nm][2] == mtypes[pos] nm2 = nm = 'rcmat' if filename.find('single') > -1: nm2 = 'rcmats' if filename.find('badname') > -1: with assert_warns(RuntimeWarning) as cm: dct = o4.dctload(filename, nm2) name, mat, *_ = o4.listload(filename, [nm2]) else: dct = o4.dctload(filename, [nm2]) name, mat, *_ = o4.listload(filename, nm2) assert np.allclose(m[nm], dct[nm2][0]) assert np.allclose(m[nm], mat[0])
def subtract_background_from_stacks(scanfile, indir, outdir, scannumber=-1): """Subtract background from SAXS data in MAT-file stacks. """ scans = read_yaml(scanfile) if scannumber > 0: scannos = [ scannumber ] else: scannos = scans.keys() scannos.sort() for scanno in scannos: print("Scan #%03d" % scanno) try: bufscan = scans[scanno][0] except TypeError: print("Scan #%03d is a buffer" % scanno) continue try: conc = scans[scanno][1] except TypeError: print("No concentration for scan #02d." % scanno) conc = 1.0 print("Using concentration %g g/l." % conc) stackname = "s%03d" % scanno stack = loadmat(indir+'/'+stackname+'.mat')[stackname] subs = np.zeros_like(stack) (npos, nrep, _, _) = stack.shape for pos in range(npos): print(pos) buf = get_bg(indir, bufscan, pos) for rep in range(nrep): subs[pos,rep,...] = errsubtract(stack[pos,rep,...], buf) subs[pos,rep,1:3,:] = subs[pos,rep,1:3,:] / conc outname = "subs%03d" % scanno savemat(outdir+'/'+outname + ".mat", {outname: subs}, do_compression=1, oned_as='row')
def read_training_data(): """ Returns a dictionary of features for the training data """ filename=os.path.join('..','data/Piton','TrainingSet_2.mat') data_orig=loadmat(filename) # create a clean dictionnary of data # taking logarithms of the features for which # the test set also has logarithms (thanks Clement!) # for now only deal with the two features that are ok in the two datasets data={} data['KurtoEB']=log(np.array(data_orig['KurtoEB'].flat)) data['KurtoVT']=log(np.array(data_orig['KurtoVT'].flat)) data['AsDecVT']=log(np.array(data_orig['AsDecVT'].flat)) data['AsDecEB']=log(np.array(data_orig['AsDecEB'].flat)) data['RappMaxMeanEB']=log(np.array(data_orig['RappMaxMeanEB'].flat)) data['RappMaxMeanVT']=log(np.array(data_orig['RappMaxMeanVT'].flat)) data['DurVT']=np.abs(np.array(data_orig['DurVT'].flat)) data['DurEB']=np.abs(np.array(data_orig['DurEB'].flat)) data['EneEB']=log(np.array(data_orig['EneFFTeB'].flat)) data['EneVT']=log(np.array(data_orig['EneFFTvT'].flat)) return data
def __readOldMatBEvecs(file_path): """ Returns information about the excitation BE waveform present in the .mat file Inputs: filepath -- Absolute filepath of the .mat parameter file Outputs: Tuple -- (bin_inds, bin_w, bin_FFT, BE_wave, dc_amp_vec_full)\n bin_inds -- Bin indices\n bin_w -- Excitation bin Frequencies\n bin_FFT -- FFT of the BE waveform for the excited bins\n BE_wave -- Band Excitation waveform\n dc_amp_vec_full -- spectroscopic waveform. This information will be necessary for fixing the UDVS for AC modulation for example """ matread = loadmat(file_path, squeeze_me=True) BE_wave = matread['BE_wave_1'] bin_inds = matread['bin_ind_s'] - 1 # Python base 0. note also _s, for this case bin_w = matread['bin_w'] dc_amp_vec_full = matread['dc_amp_vec_full'] FFT_full = np.fft.fftshift(np.fft.fft(BE_wave)) bin_FFT = np.conjugate(FFT_full[bin_inds]) return bin_inds, bin_w, bin_FFT, BE_wave, dc_amp_vec_full
def test(): """ Test with Kinship dataset Use all positive triples and negative triples as a training set See how the reconstruction error is reduced during training """ from scipy.io.matlab import loadmat mat = loadmat('../data/kinship/alyawarradata.mat') T = np.array(mat['Rs'], np.float32) T[T == 0] = -1 # set negative value to -1 E, K = T.shape[0], T.shape[2] max_iter = E * E * K * 10 n_dim = 10 # p_idx = np.ravel_multi_index((T == 1).nonzero(), T.shape) # raveled positive index # n_idx = np.ravel_multi_index((T == -1).nonzero(), T.shape) # raveled negative index # model.fit(T, p_idx, n_idx, max_iter, e_gap=10000) training = np.random.binomial(1., 0.01, T.shape) testing = np.random.binomial(1., 0.5, T.shape) testing[training == 1] = 0 model = AMDC(n_dim) model.population = True model.do_active_learning(T, training, 15000, testing)
def show_predictions(alpha="alpha", symbol="GE", xtn=".PNG"): if type(alpha) == str: print ("Loading file named " + alpha + ".mat") a = mat.loadmat( alpha + ".mat", mat_dtype=False ) # load a matlab style set of matrices from the file named by the string alpha if a.has_key(alpha): alpha = a.get(alpha).reshape(-1) # get the variable with the name of the string in alpha else: alpha = a.get(a.keys()[2]).reshape(-1) # get the first non-hidden key and reshape into a 1-D array print ("Loading financial data for stock symbol", symbol) r = np.recfromcsv("/home/hobs/Desktop/References/quant/lyle/data/" + symbol + "_yahoo.csv", skiprows=1) r.sort() r.high = r.high * r.adj_close / r.close # adjust the high and low prices for stock splits r.low = r.low * r.adj_close / r.close # adjust the high and low prices for stock splits daily_returns = r.adj_close[1:] / r.adj_close[0:-1] - 1 predictions = lfilt(alpha, daily_returns) print ( "Plotting a scatter plot of", len(daily_returns), "returns vs", len(predictions), "predictions using a filter of length", len(alpha), ) (ax, fig) = plot(predictions, daily_returns[len(alpha) :], s="bo", xtn=".PNG") ax.set_xlabel("Predicted Returns") ax.set_ylabel("Actual Returns") big_mask = np.abs(predictions) > np.std(predictions) * 1.2 bigs = predictions[big_mask] true_bigs = daily_returns[big_mask] (ax, fig) = plot(bigs, true_bigs, s="r.", xtn=".PNG") fig.show() return (predictions, daily_returns, bigs, true_bigs, big_mask)
def test_srmr(): fs = 16000 s = loadmat("test/test.mat")["s"][:,0] correct_ratios = loadmat("test/correct_ratios.mat")['correct_ratios'][0] ratio, avg_energy = srmr(s, fs) assert np.allclose(ratio, correct_ratios[1], rtol=1e-6, atol=1e-12) ratio_norm_fast, avg_energy_norm_fast = srmr(s, fs, fast=True, norm=True, max_cf=30) assert np.allclose(ratio_norm_fast, correct_ratios[2], rtol=1e-6, atol=1e-12) ratio_slow, avg_energy_slow = srmr(s, fs, fast=False) assert np.allclose(ratio_slow, correct_ratios[0], rtol=1e-6, atol=1e-12) ratio_norm, avg_energy_norm = srmr(s, fs, fast=False, norm=True, max_cf=30) assert np.allclose(ratio_norm, correct_ratios[3], rtol=1e-6, atol=1e-12)
def ReadDatasetFile(dataset_file_path): """Reads dataset file in Revisited Oxford/Paris ".mat" format. Args: dataset_file_path: Path to dataset file, in .mat format. Returns: query_list: List of query image names. index_list: List of index image names. ground_truth: List containing ground-truth information for dataset. Each entry is a dict corresponding to the ground-truth information for a query. The dict may have keys 'easy', 'hard', 'junk' or 'ok', mapping to a list of integers; additionally, it has a key 'bbx' mapping to a list of floats with bounding box coordinates. """ with tf.gfile.GFile(dataset_file_path, 'r') as f: cfg = matlab.loadmat(f) # Parse outputs according to the specificities of the dataset file. query_list = [str(im_array[0]) for im_array in np.squeeze(cfg['qimlist'])] index_list = [str(im_array[0]) for im_array in np.squeeze(cfg['imlist'])] ground_truth_raw = np.squeeze(cfg['gnd']) ground_truth = [] for query_ground_truth_raw in ground_truth_raw: query_ground_truth = {} for ground_truth_key in _GROUND_TRUTH_KEYS: if ground_truth_key in query_ground_truth_raw.dtype.names: adjusted_labels = query_ground_truth_raw[ground_truth_key] - 1 query_ground_truth[ground_truth_key] = adjusted_labels.flatten() query_ground_truth['bbx'] = np.squeeze(query_ground_truth_raw['bbx']) ground_truth.append(query_ground_truth) return query_list, index_list, ground_truth
def convert(in_filename, out_filename=None, spacings=None): A = loadmat(in_filename, struct_as_record=False) # struct S = A['Save_data'][0,0] # volume V = S.P # output filename if out_filename == None: out_filename = os.path.splitext(in_filename)[0] + '.nrrd' logger.debug('Output filename: %s', out_filename) logger.debug('Writing NRRD file.') # NRRD options options = {} if spacings == None: xs = float((S.xmax - S.xmin) / V.shape[0]) ys = float((S.ymax - S.ymin) / V.shape[1]) zs = float((S.zmax - S.zmin) / V.shape[2]) options['spacings'] = [xs, ys, zs] else: options['spacings'] = eval(spacings) logger.debug('Setting spacings to: %s', options['spacings']) nrrd.write(out_filename, V, options)
def get_top_scores(self, i=100, force_num=True): fn_scores = os.path.join(self.ds.path, "cpmc", "MySegmentsMat", self.name, "scores.mat") sc = ml.loadmat(fn_scores)["scores"] scores = list(np.sort(sc.ravel())[-1 : (-1 - i) : -1]) if len(scores) < i and force_num: scores = (list(scores) * 100)[:100] return scores
def __init__(self, directory, filename=None): self.images = [] self.directory = directory if filename != None: imagefiles = np.loadtxt(os.path.join(directory, filename), np.str) for f in imagefiles: self.images.append(GrazImageDataSP(directory,"%s.image"%f, self)) #self.features = ['dense_color_sift_3_scales', 'dense_sift_4_scales'] self.features = ['dense_sift_4_scales'] self.codebooks = dict() self.codebooks['dense_sift_4_scales'] = loadmat(os.path.join(self.directory, "MyCodebooks/kmeans_dense_sift_4_scales_300_words.mat"))['codebook'] self.codebooks['dense_color_sift_3_scales'] = loadmat(os.path.join(self.directory, "MyCodebooks/kmeans_dense_color_sift_3_scales_300_words.mat"))['codebook'] self.num_features = len(self.features) self.images = self.images self.classes = ["cars","bikes","people"] self.train_classes = np.array([0,1,2]) # no sky and gras self.num_classes = len(self.train_classes)
def coastlines_read(path): coastlines = matlab.loadmat(path + '/coastlines.mat')['tmp'] lon = coastlines[:,0] lat = coastlines[:,1] idx = np.isfinite(lon) & np.isfinite(lat) lon = lon[idx] lat = lat[idx] return lon, lat
def timeseries_design(subject_id,whatParadigm,onsets_dir): import scipy.signal import scipy.special as sp import numpy as np import math from nipype.interfaces.base import Bunch from copy import deepcopy from scipy.io.matlab import loadmat import glob import os #from Facematch import onsets_dir print "Entered timeseries_design once with arguments SUBID = "+subject_id+", paradigm = "+whatParadigm+", and onsets dir = "+onsets_dir+"." output = [] regressor_names = None regressors = None onsets_temp = os.path.join(onsets_dir, subject_id+'*onsets.mat') onsets_files = sorted(glob.glob(onsets_temp)) print onsets_files for r in range(len(onsets_files)): print "Run %d"%(r) mat = loadmat(onsets_files[r], struct_as_record=False) ons = mat['onsets'][0] nam = mat['names'][0] dur = mat['durations'][0] #Paradigm-specifics if whatParadigm == 'WMSTAT': #24 types... names = ['inst_2r','inst_4r','inst_dry','inst_4ry','stim_2r','stim_4r','stim_dry','stim_4ry','probe_2r','probe_4r','probe_dry','probe_4ry'] durations = [] run_onsets = [] for i in range(len(names)): print names[i]+": " durations.append([0]) run_onsets.append(ons[i][0]) else: names = [] durations = [] run_onsets = [] for i in range(len(nam)): names.append(str(nam[i][0])) run_onsets.append(ons[i][0]) durations.append(dur[i][0]) # regressor_names.append(['Linear','Quadratic','Cubic']) # x = np.linspace(-1,1,numTRs) # regressors.append([list(sp.legendre(1)(x)),list(sp.legendre(2)(x)),list(sp.legendre(3)(x))] output.insert(r, Bunch(conditions=deepcopy(names), onsets=deepcopy(run_onsets), durations=deepcopy(durations), amplitudes=None, tmod=None, pmod=None, regressor_names=regressor_names, regressors=regressors)) #here is where we can do linear, quad, etc detrending return output
def draw_annotations(frame_n, annotation_path, drawn_image): annotation_file = annotation_path + '/' + frame_n + '_00.mat' if os.path.isfile(annotation_file): mat_file = loadmat(annotation_file) bnd_box = mat_file['box'] cv2.rectangle(drawn_image, tuple([int(bnd_box[0][0]), int(bnd_box[0][1])]), tuple([int(bnd_box[0][2]), int(bnd_box[0][3])]), color=(192, 192, 192), thickness=1) else: cv2.putText(drawn_image, 'no annotation', (50, 90), cv2.FONT_HERSHEY_PLAIN, 2, (192, 192, 192), thickness=2)
def object_gt_hol_features(self, norm=1): fnfeat = os.path.join(self.ds.path, "cpmc", "MySegmentsMat", self.name, "holistic_features_object_gt.mat") data = ml.loadmat(fnfeat)["holgtfeat"].ravel() featnames = ("csift_fg", "sift_fg", "csift_bg", "sift_bg", "phog_contour", "phog_edges", "phog_edges_square") res = {} for i, n in enumerate(featnames): res[n] = data[i] if norm is not None: res[n] = _normalize(res[n], norm) return res
def readGmodeParms(parm_path): """ Translates the parameters stored in a G-mode parms.mat file into a python dictionary Parameters ------------ parm_path : String / unicode Absolute path of the parms.mat file Returns ---------- parm_dict : Dictionary dictionary of relevant parameters neatly formatted with units """ parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) parm_dict = dict() IO_parms = parm_data['IOparms'] parm_dict['IO_samp_rate_[Hz]'] = np.int32(IO_parms['sampRate'].item()) parm_dict['IO_down_samp_rate_[Hz]'] = np.int32(IO_parms['downSampRate'].item()) parm_dict['IO_AO0_amp'] = np.int32(IO_parms['AO0_amp'].item()) parm_dict['IO_AO1_amp'] = np.int32(IO_parms['AO1_amp'].item()) parm_dict['IO_AI_chans'] = np.int32(parm_data['aiChans']) env_parms = parm_data['envParms'] parm_dict['envelope_mode'] = np.int32(env_parms['envMode'].item()) parm_dict['envelope_type'] = np.int32(env_parms['envType'].item()) parm_dict['envelope_smoothing'] = np.int32(env_parms['smoothing'].item()) forc_parms = parm_data['forcParms'] parm_dict['FORC_V_high_1_[V]'] = np.float(forc_parms['vHigh1'].item()) parm_dict['FORC_V_high_2_[V]'] = np.float(forc_parms['vHigh2'].item()) parm_dict['FORC_V_low_1_[V]'] = np.float(forc_parms['vLow1'].item()) parm_dict['FORC_V_low_2_[V]'] = np.float(forc_parms['vLow2'].item()) gen_sig = parm_data['genSig'] parm_dict['wfm_f_fast_[Hz]'] = np.float(gen_sig['fFast'].item()) parm_dict['wfm_d_fast_[s]'] = np.float(gen_sig['dFast'].item()) parm_dict['wfm_p_slow_[s]'] = np.float(gen_sig['pSlow'].item()) parm_dict['wfm_n_cycles'] = np.int32(gen_sig['nCycles'].item()) parm_dict['wfm_swap_mode'] = np.int32(gen_sig['swapMode'].item()) parm_dict['wfm_reps'] = np.int32(gen_sig['mReps'].item()) scl_parms = parm_data['sclParms'] parm_dict['wfm_amp_tip_fast_[V]'] = np.float(scl_parms['ampTipFast'].item()) parm_dict['wfm_off_tip_fast_[V]'] = np.float(scl_parms['offTipFast'].item()) parm_dict['wfm_amp_tip_slow_[V]'] = np.float(scl_parms['ampTipSlow'].item()) parm_dict['wfm_off_tip_slow_[V]'] = np.float(scl_parms['offTipSlow'].item()) parm_dict['wfm_amp_BD_fast_[V]'] = np.float(scl_parms['ampBDfast'].item()) parm_dict['wfm_off_BD_fast_[V]'] = np.float(scl_parms['offBDfast'].item()) parm_dict['grid_num_rows'] = parm_data['numrows'] parm_dict['grid_num_cols'] = parm_data['numcols'] return parm_dict
def sub_names(subject_id,whatParadigm,onsets_dir): import scipy.signal import scipy.special as sp import numpy as np import math from nipype.interfaces.base import Bunch from copy import deepcopy from scipy.io.matlab import loadmat import glob import os #from Facematch import onsets_dir print "Entered sub_names once with arguments SUBID = "+subject_id+", paradigm = "+whatParadigm+", and onsets dir = "+onsets_dir+"." onsets_temp = os.path.join(onsets_dir, subject_id+'*onsets.mat') onsets_files = sorted(glob.glob(onsets_temp)) subs = [] testmat = loadmat(onsets_files[0], struct_as_record=False) testnames = testmat['names'][0] names_count_vec = np.zeros(len(testnames)) for r in range(len(onsets_files)): mat = loadmat(onsets_files[r], struct_as_record=False) ons = mat['onsets'][0] nam = mat['names'][0] dur = mat['durations'][0] names = [] durations = [] run_onsets = [] for condition in range(len(nam)): for onset in range(len(ons[condition][0])): names_count_vec[condition] += 1 names.append(str(nam[condition][0])+'_%d'%(names_count_vec[condition])) run_onsets.append([ons[condition][0][onset]]) durations.append(dur[condition][0]) subs.append(('_estimate_model%d/pe%d.nii'%(r,condition*len(ons[condition][0])+onset+1),str(nam[condition][0])+'_%04d.nii'%(names_count_vec[condition]))) return subs
def load_dataset(dataset): if dataset == 'umls': mat = loadmat('../data/%s/uml.mat' % (dataset)) T = np.array(mat['Rs'], np.float32) elif dataset == 'nation': mat = loadmat('../data/%s/dnations.mat' % (dataset)) T = np.array(mat['R'], np.float32) elif dataset == 'kinship': mat = loadmat('../data/%s/alyawarradata.mat' % (dataset)) T = np.array(mat['Rs'], np.float32) elif dataset == 'wordnet': T = pickle.load(open('../data/%s/reduced_wordnet.pkl' % (dataset), 'rb')) elif dataset == 'freebase': T, _, _ = pickle.load(open('../data/freebase/subset_5000.pkl', 'rb')) if dataset == 'umls' or dataset == 'nation' or dataset == 'kinship': T = np.swapaxes(T, 1, 2) T = np.swapaxes(T, 0, 1) # [relation, entity, entity] T[np.isnan(T)] = 0 return T
def __readparms(parm_path): """ Copies experimental parameters from the .mat file to a dictionary Parameters ------------ parm_path : string / unicode Absolute path of the parameters file Returns -------- (parm_dict, excit_wfm, spec_ind_mat) : tuple parm_dict : dictionary Dictionary containing all relevant parameters excit_wfm : 1d numpy float array Excitation waveform spec_ind_mat : 2D numpy flaot array Spectroscopic indicies matrix """ parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) parm_dict = dict() IO_parms = parm_data['IOparms'] parm_dict['IO_samp_rate_[Hz]'] = np.int32(IO_parms['sampRate'].item()) parm_dict['IO_down_samp_rate_[Hz]'] = np.int32(IO_parms['downSampRate'].item()) parm_dict['IO_AO0_amp'] = np.int32(IO_parms['AO0_amp'].item()) parm_dict['IO_AI_chans'] = np.int32(parm_data['aiChans']) parm_dict['grid_num_rows'] = parm_data['numrows'] parm_dict['grid_num_cols'] = parm_data['numcols'] sporc_parms = parm_data['sporcParms'] parm_dict['SPORC_V_max_[V]'] = np.float32(sporc_parms['V_max'].item()) parm_dict['SPORC_N_steps'] = np.int32(sporc_parms['N_steps'].item()) parm_dict['SPORC_N_reps'] = np.int32(sporc_parms['N_reps'].item()) parm_dict['SPORC_t_max_[sec]'] = np.float32(sporc_parms['t_max']) parm_dict['SPORC_f_cutoff_[Hz]'] = np.int32(sporc_parms['f_cutoff']) parm_dict['SPORC_f_rolloff_[Hz]'] = np.int32(sporc_parms['f_rolloff']) if 'FORC_vec' in parm_data.keys() and 'ind_vecs' in parm_data.keys(): excit_wfm = np.squeeze(np.float32(parm_data['FORC_vec'])) spec_ind_mat = np.transpose(np.float32(parm_data['ind_vecs'])) else: # Look for a second parms file that contains these vectors: fold, basename = path.split(parm_path) second_path = path.join(fold, 'SPORC_wave.mat') h5_sporc_parms = h5py.File(second_path, 'r') # Use this for v7.3 and beyond. excit_wfm = np.squeeze(h5_sporc_parms['FORC_vec'].value) spec_ind_mat = VALUES_DTYPE(h5_sporc_parms['ind_vecs'].value) h5_sporc_parms.close() return parm_dict, excit_wfm, spec_ind_mat
def get_out_vars(self): """ Obtain all BECAS output variables and store into arrays """ rst = spio.loadmat(self.utils_rst_filename, squeeze_me=True) # iterate over structured numpy array strc = rst['csprops'] self.csprops = np.array([]) for k in strc.dtype.names: if k == 'MassPerMaterial': # skipped because array needs to be flat pass else: v = strc[k] self.csprops = np.append(self.csprops,v) self.masspermaterial = rst['csprops']['MassPerMaterial'] matmatrix = spio.loadmat(self.utils_rst_filename, squeeze_me=True, struct_as_record=False) self.k_matrix = matmatrix['constitutive'].Ks self.m_matrix = matmatrix['constitutive'].Ms
""" Converting matlab velocity files to raw binary files. """ import numpy as np from scipy.io import matlab import sys length = len(sys.argv) if (length == 1): length = 1 else: length = int(sys.argv[1]) for filenum in range(1, length + 1): matfile = matlab.loadmat('snapshot' + str(filenum) + '.mat') dx = matfile['DeltaX'] dy = matfile['DeltaY'] zc = matfile['ZC'] u = matfile['UCenter'] v = matfile['VCenter'] w = matfile['WCenter'] charac = np.where(np.isnan(u), -1.0, 1.0) print(dx, dy) print(zc.T) i = 487 j = 67 k = 19 print(u[i, j, k], v[i, j, k], w[i, j, k]) print(u.shape)
#!/usr/bin/env python import logging from scipy.io.matlab import loadmat from sktensor import dtensor, cp_als # Set logging to DEBUG to see CP-ALS information logging.basicConfig(level=logging.DEBUG) # Load Matlab data and convert it to dense tensor format mat = loadmat('../data/sensory-bread/brod.mat') T = dtensor(mat['X']) # Decompose tensor using CP-ALS P, fit, itr, exectimes = cp_als(T, 3, init='random')
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets( meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets( chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path
Tc[mask_idx[2][i]][mask_idx[0][i], mask_idx[1][i]] = 0 # predict unknown values P = predict_BE_als(Tc) P = normalize_predictions(P, e, k) # compute area under precision recall curve prec, recall, _ = precision_recall_curve(GROUND_TRUTH[target_idx], P[target_idx]) return auc(recall, prec) if __name__ == '__main__': # load data st = dt.datetime.now() mat = loadmat('E:/experiments/rescal-bilinear/data/countries_s3.mat') #mat = loadmat('F:/experiment/rescal-bilinear/data/umls.mat') #mat = loadmat('F:/experiment/rescal-bilinear/data/nations.mat') K = array(mat['Rs'], np.float32) #print(K[0],K.shape) K = Tensor2Matrix(K) #print(K[0],'\r',K.shape) K = HimmingDistance(K, -0.0005) #print(K) e, k = K.shape[0], K.shape[2] SZ = e * e * k # copy ground truth before preprocessing GROUND_TRUTH = K.copy() # construct array for rescal
def translate(self, parm_path): """ Basic method that translates .mat data files to a single .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ self.parm_path = path.abspath(parm_path) (folder_path, file_name) = path.split(parm_path) (file_name, base_name) = path.split(folder_path) h5_path = path.join(folder_path, base_name + '.h5') # Read parameters parm_dict = readGmodeParms(parm_path) # Add the w^2 specific parameters to this list parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) freq_sweep_parms = parm_data['freqSweepParms'] parm_dict['freq_sweep_delay'] = np.float( freq_sweep_parms['delay'].item()) gen_sig = parm_data['genSig'] parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item()) freq_array = np.float32(parm_data['freqArray']) # prepare and write spectroscopic values samp_rate = parm_dict['IO_down_samp_rate_[Hz]'] num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate) w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins)) # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE) spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array)) spec_val_mat[:, 1] = np.repeat(freq_array, num_bins) spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32) spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array)) spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins) num_rows = parm_dict['grid_num_rows'] num_cols = parm_dict['grid_num_cols'] parm_dict['data_type'] = 'GmodeW2' num_pix = num_rows * num_cols global_parms = dict() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict[ 'data_type'] # self.__class__.__name__ global_parms['translator'] = 'W2' # Now start creating datasets and populating: if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) pos_dims = [ Dimension('X', 'nm', num_rows), Dimension('Y', 'nm', num_cols) ] spec_dims = [ Dimension('Response Bin', 'a.u.', num_bins), Dimension('Excitation Frequency ', 'Hz', len(freq_array)) ] # Minimize file size to the extent possible. # DAQs are rated at 16 bit so float16 should be most appropriate. # For some reason, compression is more effective on time series data h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data', 'Deflection', 'V', pos_dims, spec_dims, chunks=(1, num_bins), dtype=np.float32) h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec) # Now doing link_h5_objects_as_attrs: link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq]) # Now read the raw data files: pos_ind = 0 for row_ind in range(1, num_rows + 1): for col_ind in range(1, num_cols + 1): file_path = path.join( folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat') print('Working on row {} col {}'.format(row_ind, col_ind)) if path.exists(file_path): # Load data file pix_data = loadmat(file_path, squeeze_me=True) pix_mat = pix_data['AI_mat'] # Take the inverse FFT on 2nd dimension pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1) # Verified with Matlab - no conjugate required here. pix_vec = pix_mat.transpose().reshape(pix_mat.size) h5_main[pos_ind, :] = np.float32(pix_vec) h5_f.flush() # flush from memory! else: print('File not found for: row {} col {}'.format( row_ind, col_ind)) pos_ind += 1 if (100.0 * pos_ind / num_pix) % 10 == 0: print('completed translating {} %'.format( int(100 * pos_ind / num_pix))) h5_f.close() return h5_path
@jit def uncorrelated_rayleigh_channel(in_cnt, out_cnt, realization_cnt=1, realization_index_first=True): res = np.sqrt(0.5) * np.random.randn( in_cnt, out_cnt, realization_cnt) + 1j * np.random.randn( in_cnt, out_cnt, realization_cnt) if realization_index_first: res = np.transpose(res, axes=(2, 0, 1)) return res if test: data_matlab = loadmat('test/test_1_18.mat') H_NLoS_desired = data_matlab['H_NLoS_desired'].transpose(2, 0, 1).copy() H_NLoS_interfering = data_matlab['H_NLoS_interfering'].transpose(2, 0, 1).copy() SE_MMSE_NLoS_montecarlo_ref = data_matlab['SE_MMSE_NLoS_montecarlo'] SE_MMSE_NLoS_nonlinear_ref = data_matlab['SE_MMSE_NLoS_nonlinear'] else: H_NLoS_desired = uncorrelated_rayleigh_channel(Mmax, Kmax, numberOfRealizations) H_NLoS_interfering = np.sqrt(betabar) * uncorrelated_rayleigh_channel( Mmax, Kmax, numberOfRealizations) # %Preallocate matrices for storing the simulation results SE_MMSE_NLoS_montecarlo = np.zeros((len(K), len(C))) SE_MMSE_NLoS_nonlinear = np.zeros((len(K), len(C)))
----------------------------------------- Here we create a structure that provides information about the experimental paradigm. This is used by the :class:`nipype.interfaces.spm.SpecifyModel` to create the information necessary to generate an SPM design matrix. """ from nipype.interfaces.base import Bunch """We're importing the onset times from a mat file (found on http://www.fil.ion.ucl.ac.uk/spm/data/face_rep/) """ from scipy.io.matlab import loadmat mat = loadmat(os.path.join(data_dir, "sots.mat"), struct_as_record=False) sot = mat['sot'][0] itemlag = mat['itemlag'][0] subjectinfo = [Bunch(conditions=['N1', 'N2', 'F1', 'F2'], onsets=[sot[0], sot[1], sot[2], sot[3]], durations=[[0], [0], [0], [0]], amplitudes=None, tmod=None, pmod=None, regressor_names=None, regressors=None)] """Setup the contrast structure that needs to be evaluated. This is a list of lists. The inner list specifies the contrasts and has the following format - [Name,Stat,[list of condition names],[weights on
def read_metadata_mat_file(meta_mat): """Read ILSVRC2012 metadata from the distributed MAT file. Parameters ---------- meta_mat : str or file-like object The filename or file-handle for `meta.mat` from the ILSVRC2012 development kit. Returns ------- synsets : ndarray, 1-dimensional, compound dtype A table containing ILSVRC2012 metadata for the "synonym sets" or "synsets" that comprise the classes and superclasses, including the following fields: * `ILSVRC2012_ID`: the integer ID used in the original competition data. * `WNID`: A string identifier that uniquely identifies a synset in ImageNet and WordNet. * `wordnet_height`: The length of the longest path to a leaf node in the FULL ImageNet/WordNet hierarchy (leaf nodes in the FULL ImageNet/WordNet hierarchy have `wordnet_height` 0). * `gloss`: A string representation of an English textual description of the concept represented by this synset. * `num_children`: The number of children in the hierarchy for this synset. * `words`: A string representation, comma separated, of different synoym words or phrases for the concept represented by this synset. * `children`: A vector of `ILSVRC2012_ID`s of children of this synset, padded with -1. Note that these refer to `ILSVRC2012_ID`s from the original data and *not* the zero-based index in the table. * `num_train_images`: The number of training images for this synset. """ mat = loadmat(meta_mat, squeeze_me=True) synsets = mat['synsets'] new_dtype = numpy.dtype([('ILSVRC2012_ID', numpy.int16), ('WNID', ('S', max(map(len, synsets['WNID'])))), ('wordnet_height', numpy.int8), ('gloss', ('S', max(map(len, synsets['gloss'])))), ('num_children', numpy.int8), ('words', ('S', max(map(len, synsets['words'])))), ('children', (numpy.int8, max(synsets['num_children']))), ('num_train_images', numpy.uint16)]) new_synsets = numpy.empty(synsets.shape, dtype=new_dtype) for attr in [ 'ILSVRC2012_ID', 'WNID', 'wordnet_height', 'gloss', 'num_children', 'words', 'num_train_images' ]: new_synsets[attr] = synsets[attr] children = [numpy.atleast_1d(ch) for ch in synsets['children']] padded_children = [ numpy.concatenate( (c, -numpy.ones(new_dtype['children'].shape[0] - len(c), dtype=numpy.int16))) for c in children ] new_synsets['children'] = padded_children return new_synsets
def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0, download_if_missing=True, return_X_y=False): """Load the Olivetti faces data-set from AT&T (classification). Download it if necessary. ================= ===================== Classes 40 Samples total 400 Dimensionality 4096 Features real, between 0 and 1 ================= ===================== Read more in the :ref:`User Guide <olivetti_faces_dataset>`. Parameters ---------- data_home : optional, default: None Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. shuffle : boolean, optional If True the order of the dataset is shuffled to avoid having images of the same person grouped. random_state : int, RandomState instance or None, default=0 Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary <random_state>`. download_if_missing : optional, True by default If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. return_X_y : boolean, default=False. If True, returns `(data, target)` instead of a `Bunch` object. See below for more information about the `data` and `target` object. .. versionadded:: 0.22 Returns ------- bunch : Bunch object with the following attributes: - data: ndarray, shape (400, 4096). Each row corresponds to a ravelled face image of original size 64 x 64 pixels. - images : ndarray, shape (400, 64, 64). Each row is a face image corresponding to one of the 40 subjects of the dataset. - target : ndarray, shape (400,). Labels associated to each face image. Those labels are ranging from 0-39 and correspond to the Subject IDs. - DESCR : string. Description of the modified Olivetti Faces Dataset. (data, target) : tuple if `return_X_y=True` .. versionadded:: 0.22 """ data_home = get_data_home(data_home=data_home) if not exists(data_home): makedirs(data_home) filepath = _pkl_filepath(data_home, 'olivetti.pkz') if not exists(filepath): if not download_if_missing: raise IOError("Data not found and `download_if_missing` is False") print('downloading Olivetti faces from %s to %s' % (FACES.url, data_home)) mat_path = _fetch_remote(FACES, dirname=data_home) mfile = loadmat(file_name=mat_path) # delete raw .mat data remove(mat_path) faces = mfile['faces'].T.copy() joblib.dump(faces, filepath, compress=6) del mfile else: faces = _refresh_cache([filepath], 6) # TODO: Revert to the following line in v0.23 # faces = joblib.load(filepath) # We want floating point data, but float32 is enough (there is only # one byte of precision in the original uint8s anyway) faces = np.float32(faces) faces = faces - faces.min() faces /= faces.max() faces = faces.reshape((400, 64, 64)).transpose(0, 2, 1) # 10 images per class, 400 images total, each class is contiguous. target = np.array([i // 10 for i in range(400)]) if shuffle: random_state = check_random_state(random_state) order = random_state.permutation(len(faces)) faces = faces[order] target = target[order] faces_vectorized = faces.reshape(len(faces), -1) module_path = dirname(__file__) with open(join(module_path, 'descr', 'olivetti_faces.rst')) as rst_file: fdescr = rst_file.read() if return_X_y: return faces_vectorized, target return Bunch(data=faces_vectorized, images=faces, target=target, DESCR=fdescr)