def generate_data(): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# print '... loading data' train_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 50000, [5, 10], changeratio = [1.0]) valid_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 10000, [5, 10], changeratio = [1.0]) test_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 10000, [5, 10], changeratio = [1.0]) train_set_frame_x = numpy.zeros((len(train_set[1]), 56 * 56)) train_set_frame_y = train_set[1] valid_set_frame_x = numpy.zeros((len(valid_set[1]), 56 * 56)) valid_set_frame_y = valid_set[1] test_set_frame_x = numpy.zeros((len(test_set[1]), 56 * 56)) test_set_frame_y = test_set[1] #add black blocks for i in range(len(train_set[1])): preprocessing.addblock(train_set[0][i].reshape(28, 28)) for i in range(len(valid_set[1])): preprocessing.addblock(valid_set[0][i].reshape(28, 28)) preprocessing.addblock(test_set[0][i].reshape(28, 28)) #put digits into frame for i in range(len(train_set[1])): train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset = 5) for i in range(len(valid_set[1])): valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset = 5) test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset = 5) #do not save the images # sio.savemat('sets.mat', {'trainsetx' : train_set_frame_x, # 'validsetx' : valid_set_frame_x, # 'testsetx' : test_set_frame_x, # 'trainsety' : train_set_frame_y, # 'validsety' : valid_set_frame_y, # 'testsety' : test_set_frame_y}) train_set = train_set_frame_x, train_set_frame_y valid_set = valid_set_frame_x, valid_set_frame_y test_set = test_set_frame_x, test_set_frame_y def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
import preprocessing import feature_extraction as fe # ex = '..\\..\\boy_and_girl\\class1\\arctic_a0012.wav' # ex = '..\\..\\cello_and_viola\\viola\\Viola.arco.ff.sulA.A4.stereo.aiff' time_series, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best') time_series = preprocessing.avoid_overlap(time_series, N=100, f=500, fs=fs, plot=False) time_series = preprocessing.downsample(time_series, fs, 4410) print(fs) frames = preprocessing.frame(time_series, int(0.03 * fs), int(0.015 * fs)) for i in range(frames.shape[1]): acf1 = stattools.acf(frames[:, i], nlags=100) fft, _ = fe.fft_singleside(frames[:, i], 4410, 8096) plt.figure() plt.subplot(211) plt.plot(np.abs(fft)) plt.subplot(212) plt.stem(acf1) plt.show() def acf_fundamental_freq(x, fmin, fmax, fs): y = copy.copy(x) y = preprocessing.avoid_overlap(y, N=100, f=fmax + 100, fs=fs,
'nccf_thresh1': 0.3, 'nccf_thresh2': 0.9, 'nccf_maaxcands': 3, 'nccf_pwidth': 5, # 5 'merit_boost': 5, 'merit_pivot': 0.20, 'merit_extra': 0.4, 'median_value': 7, 'dp_w1': 0.15, 'dp_w2': 0.5, 'dp_w3': 100, 'dp_w4': 0.9 } pitch = pYAAPT.yaapt(signal, **params) frames = preprocessing.frame(silence_remove, frame_length, frame_overlap) f, t, stft = fea.stft(silence_remove, pic=None, fs=sample_rate, nperseg=frame_length, noverlap=frame_overlap, nfft=8192, padded=True, boundary=None) f,t,stft = scipy.signal.stft(x=silence_remove, fs=sample_rate, window='hann', nperseg=frame_length, noverlap=frame_overlap, nfft=8192, detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1) print(pitch.samp_values.shape[0], frames.shape[1]) for i in range(min(pitch.samp_values.shape[0], frames.shape[1])): plt.figure() plt.subplot(211) X, _ = np.abs(fea.fft_singleside(x=frames[:,i], fs=sample_rate, n=8192, pic=None)) plt.plot(np.arange(0, 8192/2+1), np.abs(stft[:,i]), 'y') plt.axvline(pitch.samp_interp[i], c='b') plt.axvline(pitch.samp_values[i], c='g') plt.subplot(212) plt.plot(np.arange(0, 8192 / 2 + 1), X, 'r') plt.axvline(pitch.samp_interp[i], c='b')
def load_data(dataset, preprocess=False): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. new_path = os.path.join(os.path.split(__file__)[0], "..", "data", dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': import urllib origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' print 'Downloading data from %s' % origin urllib.urlretrieve(origin, dataset) print '... loading data' # Load the dataset f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() #train_set, valid_set, test_set format: tuple(input, target) #input is an numpy.ndarray of 2 dimensions (a matrix) #witch row's correspond to an example. target is a #numpy.ndarray of 1 dimensions (vector)) that have the same length as #the number of rows in the input. It should give the target #target to the example with the same index in the input. if preprocess: train_set_frame_x = numpy.zeros((50000, 56 * 56)) train_set_frame_y = train_set[1] valid_set_frame_x = numpy.zeros((10000, 56 * 56)) valid_set_frame_y = valid_set[1] test_set_frame_x = numpy.zeros((10000, 56 * 56)) test_set_frame_y = test_set[1] #add black blocks for i in range(50000): preprocessing.addblock(train_set[0][i].reshape(28, 28)) for i in range(10000): preprocessing.addblock(valid_set[0][i].reshape(28, 28)) preprocessing.addblock(test_set[0][i].reshape(28, 28)) #put digits into frame for i in range(50000): train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset = 5) for i in range(10000): valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset = 5) test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset = 5) train_set = train_set_frame_x, train_set_frame_y valid_set = valid_set_frame_x, valid_set_frame_y test_set = test_set_frame_x, test_set_frame_y def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def reload_and_feature(picall, feature_type, average, nmel, order_frft, nmfcc, saveprojectpath, savedata, savepic, savetestdata, savepreprocess, savefeature, path, downsample_rate, frame_time, frame_length, frame_overlap, test_rate): ''' fe.stft, # 0 fe.zero_crossing_rate, # 1 fe.energy, # 2 fe.entropy_of_energy, # 3 fe.spectral_centroid_spread, # 4 fe.spectral_entropy, # 5 fe.spectral_flux, # 6 fe.spectral_rolloff, # 7 fe.bandwidth, # 8 fe.mfccs, # 9 fe.rms # 10 fe.stfrft # 11 fe.frft_mfcc # 12 ''' labelname = os.listdir(path) # 获取该数据集路径下的子文件名 if not os.path.exists(savefeature): os.mkdir(savefeature) # 创建保存特征结果的文件 for i in range(len(labelname)): if not os.path.exists(savefeature + '\\' + labelname[i]): os.mkdir(savefeature + '\\' + labelname[i]) datafile = open(savepreprocess, encoding='utf-8') # 读取预处理结果 csv_reader = csv.reader(datafile) # 以这种方式读取文件得到的结果是一个迭代器 feature_set = [] # 当使用统计量作为特征时,将所有样本的特征缓存入该变量以进行归一化 for row in csv_reader: # row中的元素是字符类型 time_series = np.array(row[2:]).astype( 'float32') # row的前两个元素分别是标签和对应文件次序 ####################################################################### frames = preprocessing.frame(time_series, frame_length, frame_overlap) # 分帧 f, t, stft = fe.stft(time_series, pic=None, fs=downsample_rate, nperseg=frame_length, noverlap=frame_length - frame_overlap, nfft=8192, boundary=None, padded=False) # if stft.shape[1] != frames.shape[1]: # 防止stft的时间个数和帧的个数不一样 # dim = min(stft.shape[1], frames.shape[1]) # stft = stft[:, 0:dim] # frames = frames[:, 0:dim] # Mel = lib.feature.melspectrogram(S=np.abs(stft), sr=downsample_rate, n_fft=2*(stft.shape[0]-1), n_mels=512) feature_list = [] # 用于存放各种类型的特征,每个帧对应一个特征向量,其元素分别是每种类型的特征 if picall: # 用于绘图控制 pic = savepic + '\\' + row[0] + '_' + row[1] else: pic = None for i in feature_type: if i == 0: feature0 = np.abs(stft) feature_list.append(feature0) elif i == 1: feature1 = fe.zero_crossing_rate(frames, pic=pic) feature_list.append(feature1) elif i == 2: feature2 = fe.energy(frames, pic=pic) feature_list.append(feature2) elif i == 3: feature3 = fe.entropy_of_energy(frames, pic=pic) feature_list.append(feature3) elif i == 4: feature4, feature41 = fe.spectral_centroid_spread( stft, downsample_rate, pic=pic) feature_list.append(feature4) feature_list.append(feature41) elif i == 5: feature5 = fe.spectral_entropy(stft, pic=pic) feature_list.append(feature5) elif i == 6: feature6 = fe.spectral_flux(stft, pic=pic) feature_list.append(feature6) elif i == 7: feature7 = fe.spectral_rolloff(stft, 0.85, downsample_rate, pic=pic) feature_list.append(feature7) elif i == 8: feature8 = fe.bandwidth(stft, f, pic=pic) feature_list.append(feature8) elif i == 9: feature9 = fe.mfccs( X=stft, fs=downsample_rate, # nfft=2*(stft.shape[0]-1), nfft=8192, n_mels=nmel, n_mfcc=nmfcc, pic=pic) feature_list.append(feature9) elif i == 10: feature10 = fe.rms(stft, pic=pic) feature_list.append(feature10) elif i == 11: feature11 = fe.stfrft(frames, p=order_frft[int(row[0])], pic=pic) feature_list.append(feature11) elif i == 12: tmp = fe.stfrft(frames, p=order_frft[int(row[0])]) feature12 = fe.frft_MFCC(S=tmp, fs=downsample_rate, n_mfcc=nmfcc, n_mels=nmel, pic=pic) feature_list.append(feature12) elif i == 13: feature13, feature13_ = fe.fundalmental_freq( frames=frames, fs=downsample_rate, pic=pic) feature_list.append(feature13) elif i == 14: feature14 = fe.chroma_stft(S=stft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=2, base_c=True, norm=2) feature_list.append(feature14) elif i == 15: feature15 = fe.log_attack_time(x=time_series, lower_ratio=0.02, upper_ratio=0.99, fs=downsample_rate, n=frames.shape[1]) feature_list.append(feature15) elif i == 16: feature16 = fe.temoporal_centroid(S=stft, hop_length=frame_overlap, fs=downsample_rate) feature_list.append(feature16) elif i == 17: # harm_freq, harm_mag = fe.harmonics(nfft=8192, nht=0.15, f=f, S=stft, fs=downsample_rate, fmin=50, fmax=500, threshold=0.2) # hsc = fe.harmonic_spectral_centroid(harm_freq, harm_mag) # hsd = fe.harmonic_spectral_deviation(harm_mag) # hss = fe.harmonic_spectral_spread(hsc, harm_freq, harm_mag) # hsv = fe.harmonic_spectral_variation(harm_mag) # feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0) # feature_list.append(feature17) harm_freq, harm_mag = timbral.harmonics(frames=frames, fs=downsample_rate, S=stft, f=f, nfft=8192, fmin=50, fmax=500, nht=0.15) hsc = timbral.harmonic_spectral_centroid(harm_freq, harm_mag) hsd = timbral.harmonic_spectral_deviation(harm_mag) hss = timbral.harmonic_spectral_spread(hsc, harm_freq, harm_mag) hsv = timbral.harmonic_spectral_variation(harm_mag) feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0) feature_list.append(feature17) elif i == 18: feature18 = fe.pitches_mag_CDSV(f=f, S=stft, fs=downsample_rate, fmin=50, fmax=downsample_rate / 2, threshold=0.2) feature_list.append(feature18) elif i == 19: feature19 = fe.delta_features(feature9, order=1) feature_list.append(feature19) elif i == 20: feature20 = fe.delta_features(feature9, order=2) feature_list.append(feature20) features = np.concatenate([j for j in feature_list], axis=0) # 我很欣赏这一句代码,将各种特征拼在一起 long = list(range(features.shape[1])) # 删除含有nan的帧 for t in long[::-1]: if np.isnan(features[:, t]).any(): features = np.delete(features, t, 1) if average: # 使用统计量作为特征 mean = np.mean(features, axis=1).reshape( 1, features.shape[0]) # 原来的特征向量是列向量,这里转成行向量 var = np.var(features, axis=1).reshape(1, features.shape[0]) # std = np.std(features, axis=1).reshape(1, features.shape[0]) # ske = np.zeros((1, features.shape[0])) # kur = np.zeros((1, features.shape[0])) # for n in range(features.shape[0]): # ske[0, i] = sts.skewness(features[i, :]) # kur[0, i] = sts.kurtosis(features[i, :]) features = np.concatenate([ mean, var, np.array([int(row[0]), int(row[1])]).reshape(1, 2) ], axis=1) # 使用统计平均代替每个帧的特征 feature_set.append(features) else: scale = StandardScaler().fit(features) features = scale.transform(features) # 进行归一化 csv_path = savefeature + '\\' + labelname[int( row[0])] + '\\' + row[0] + '_' + row[1] + '.csv' with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile: csv_writer = csv.writer(csvfile) buffer = np.concatenate([ features.T, int(row[0]) * np.ones((features.shape[1], 1)), int(row[1]) * np.ones((features.shape[1], 1)) ], axis=1) csv_writer.writerows(buffer) print('featuring:', row[0], row[1]) datafile.close() # 关闭文件,避免不必要的错误 if average: # 使用统计量作为特征 features = np.concatenate([k for k in feature_set], axis=0) # 我很欣赏这一句代码 行表示样本数,列表示特征数 tmp = features[:, -2:] # 防止归一化的时候把标签也归一化 features = features[:, 0:-2] scale = StandardScaler().fit(features) features = scale.transform(features) # 进行归一化 features = np.concatenate([features, tmp], axis=1) # 把之前分开的特征和标签拼在一起 for k in range(features.shape[0]): csv_path = savefeature + '\\' + labelname[int(features[k, -2])] + \ '\\' + str(int(features[k, -2])) + '_' + str(int(features[k, -1])) + '.csv' with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile: csv_writer = csv.writer(csvfile) # 每个音频文件只有一个特征向量,并存入一个csv文件 csv_writer.writerow(features[k, :]) # 注意这里写入的是一行,要用writerow
import preprocessing import feature_extraction as fe import visualization as visual import timbral_feature as timbral import matplotlib.pyplot as plt # ex = '..\\..\\suhao.wav' # ex = '..\\..\\boy_and_girl\\class1\\arctic_a0012.wav' # ex = '..\\..\\数据集2\\pre2012\\bflute\\BassFlute.mf.C4B4.aiff' # ex = '..\\..\\cello_and_viola\\viola\\Viola.arco.ff.sulA.A4.stereo.aiff' ex = '..\\..\\数据集2\\post2012\\cello\\Cello.arco.ff.sulA.A5.stereo.aiff' data, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best') frame_length = int(0.03*fs) frame_lap = int(0.015*fs) # data = data + np.random.randn(len(data)) frames = preprocessing.frame(data, frame_length, frame_lap) f, t, stft = fe.stft(data, pic=None, fs=fs, nperseg=frame_length, noverlap=frame_length-frame_lap, nfft=8192, boundary=None, padded=False) stft = np.abs(stft) harm_freq, harm_mag = timbral.harmonics(frames, fs, stft, f, nfft=8192, fmin=50, fmax=500, nht=0.15) # 绘制谐波以及频谱 i = 20 y2 = harm_freq[i] # y2 = y2[0: 10] visual.picfftandpitch(f, stft[:, i], y2, title='谐波提取', xlabel='freq(Hz)', ylabel='mag', pic=None) plt.figure() plt.plot(frames[:, i])
def generate_data(): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# print '... loading data' train_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 50000, [5, 10], changeratio=[1.0]) valid_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 10000, [5, 10], changeratio=[1.0]) test_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 10000, [5, 10], changeratio=[1.0]) train_set_frame_x = numpy.zeros((len(train_set[1]), 56 * 56)) train_set_frame_y = train_set[1] valid_set_frame_x = numpy.zeros((len(valid_set[1]), 56 * 56)) valid_set_frame_y = valid_set[1] test_set_frame_x = numpy.zeros((len(test_set[1]), 56 * 56)) test_set_frame_y = test_set[1] #add black blocks for i in range(len(train_set[1])): preprocessing.addblock(train_set[0][i].reshape(28, 28)) for i in range(len(valid_set[1])): preprocessing.addblock(valid_set[0][i].reshape(28, 28)) preprocessing.addblock(test_set[0][i].reshape(28, 28)) #put digits into frame for i in range(len(train_set[1])): train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset=5) for i in range(len(valid_set[1])): valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset=5) test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset=5) #do not save the images # sio.savemat('sets.mat', {'trainsetx' : train_set_frame_x, # 'validsetx' : valid_set_frame_x, # 'testsetx' : test_set_frame_x, # 'trainsety' : train_set_frame_y, # 'validsety' : valid_set_frame_y, # 'testsety' : test_set_frame_y}) train_set = train_set_frame_x, train_set_frame_y valid_set = valid_set_frame_x, valid_set_frame_y test_set = test_set_frame_x, test_set_frame_y def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def load_data(dataset, preprocess=False): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. new_path = os.path.join( os.path.split(__file__)[0], "..", "data", dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': import urllib origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' print 'Downloading data from %s' % origin urllib.urlretrieve(origin, dataset) print '... loading data' # Load the dataset f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() #train_set, valid_set, test_set format: tuple(input, target) #input is an numpy.ndarray of 2 dimensions (a matrix) #witch row's correspond to an example. target is a #numpy.ndarray of 1 dimensions (vector)) that have the same length as #the number of rows in the input. It should give the target #target to the example with the same index in the input. if preprocess: train_set_frame_x = numpy.zeros((50000, 56 * 56)) train_set_frame_y = train_set[1] valid_set_frame_x = numpy.zeros((10000, 56 * 56)) valid_set_frame_y = valid_set[1] test_set_frame_x = numpy.zeros((10000, 56 * 56)) test_set_frame_y = test_set[1] #add black blocks for i in range(50000): preprocessing.addblock(train_set[0][i].reshape(28, 28)) for i in range(10000): preprocessing.addblock(valid_set[0][i].reshape(28, 28)) preprocessing.addblock(test_set[0][i].reshape(28, 28)) #put digits into frame for i in range(50000): train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset=5) for i in range(10000): valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset=5) test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset=5) train_set = train_set_frame_x, train_set_frame_y valid_set = valid_set_frame_x, valid_set_frame_y test_set = test_set_frame_x, test_set_frame_y def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
# how many columns can we fit within MAX_MEM_BLOCK? n_columns = int(util.MAX_MEM_BLOCK / (stft_matrix.shape[0] * stft_matrix.itemsize)) for bl_s in range(0, stft_matrix.shape[1], n_columns): bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) stft_matrix[:, bl_s:bl_t] = fft.fft(fft_window * y_frames[:, bl_s:bl_t], axis=0)[:stft_matrix.shape[0]] f = np.linspace(0, np.pi, stft_matrix.shape[0], endpoint=True) * fs / np.pi / 2 return stft_matrix, f def stft_specgram(f, t, zxx, picname=None): plt.figure() plt.pcolormesh(t, f, (np.abs(zxx))) plt.colorbar() plt.title('STFT Magnitude') plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') plt.tight_layout() t=np.arange(0, 1, 0.0001) x = np.sin(2*np.pi*200 * t) + np.sin(2*np.pi*50*t) frames = frame(x, 1000, 100) f1, _, S1 = stft(x, pic=None, fs=10000, nperseg=1000, noverlap=1000-100, nfft=8192, boundary=None, padded=False) S2, f2 = mystft(frames, 10000, 8192) # S3, f3 = libstft(x, fs=10000, n_fft=8192, hop_length=100, win_length=1000,center=False, dtype=np.complex64, pad_mode='reflect') stft_specgram(f2, np.arange(0,S2.shape[1]), S2, picname=None)