def test_spec_augment_layer(data_format, atol=1e-4): """ Tests the complete layer, checking if the parameter `training` has the expected behaviour. """ batch_src, input_shape = get_spectrogram(data_format) model = tf.keras.Sequential() spec_augment = SpecAugment( input_shape=input_shape, freq_mask_param=5, time_mask_param=10, n_freq_masks=4, n_time_masks=3, mask_value=0.0, data_format=data_format, ) model.add(spec_augment) # Fist, enforce training to True and check the shapes spec_augmented = model(batch_src, training=True) np.testing.assert_equal(model.layers[0].output_shape[1:], spec_augmented[0].shape) # Second, check that it doesn't change anything in default spec_augmented = model(batch_src) np.testing.assert_allclose(spec_augmented, batch_src, atol)
def check(): wav_label_list = get_wav_label_list(mode='test') label_dic = get_label_dic() cnt = 0 for line in tqdm(wav_label_list): _, wav_name, labels = line.strip().split('\t') labels = labels.split('|') wav_path = os.path.join(hp.wavs_dir, wav_name) train_x = get_spectrogram(wav_path) train_y = np.zeros(hp.lab_size) train_mask = np.zeros(hp.lab_size) for label in labels: train_y[int(label_dic[label][0])] = 1 for label in labels: train_mask[int(label_dic[label][0])] = 1 for label in labels: if len(label_dic[label]) <= 1: continue for reverse_label in label_dic[label][1:]: train_mask[int(label_dic[reverse_label][0])] = 1 print(train_y) print(train_mask) cnt += 1 if cnt >= 1: break
def get_spectrogram_and_text(_inputs): '''From `_inputs`, which has been fetched from slice queues, makes text, spectrogram, and magnitude, then enqueue them again. ''' sound_fpath, text = _inputs spectrogram = get_spectrogram(sound_fpath) spectrogram = reduce_frames(spectrogram, hp.r) text = np.fromstring(text, np.int32) return spectrogram, text
def test_spec_augment_depth_exception(): """ Checks that SpecAugments fails if Spectrogram has depth greater than 1. """ data_format = "default" with pytest.raises(RuntimeError): batch_src, input_shape = get_spectrogram(data_format=data_format, n_ch=4) model = tf.keras.Sequential() spec_augment = SpecAugment( input_shape=input_shape, freq_mask_param=5, time_mask_param=10, data_format=data_format ) model.add(spec_augment) _ = model(batch_src, training=True)[0]
def thread_process(args): (tfid, split_dataset) = args writer = tf.python_io.TFRecordWriter( os.path.join(hp.TF_DIR, f'{tfid}.tfrecord')) for i in tqdm(split_dataset): fpath = i[0] y = i[1] x = get_spectrogram(fpath) example = tf.train.Example(features=tf.train.Features( feature={ 'x': tf.train.Feature(float_list=tf.train.FloatList( value=x.reshape(-1))), 'y': tf.train.Feature(float_list=tf.train.FloatList( value=y.reshape(-1))) })) serialized = example.SerializeToString() writer.write(serialized) writer.close()
def test_save_load_spec_augment(data_format, save_format): batch_src, input_shape = get_spectrogram(data_format=data_format) spec_augment = SpecAugment( input_shape=input_shape, freq_mask_param=5, time_mask_param=10, n_freq_masks=4, n_time_masks=3, mask_value=0.0, data_format=data_format, ) save_load_compare( spec_augment, batch_src, np.testing.assert_allclose, save_format=save_format, layer_class=SpecAugment, training=None, )
def load_eval_data(): from utils import get_spectrogram, reduce_frames """We evaluate on the last mini-batch.""" sound_fpaths, texts = pickle.load(open('data/eval.pkl', 'rb')) # Extract spectrogram from sound_fpaths char2idx, idx2char = load_vocab() xs, maxlen = [], 0 for sound_fpath in sound_fpaths: spectrogram = get_spectrogram(sound_fpath) x = reduce_frames(spectrogram, hp.r) maxlen = max(maxlen, len(x)) xs.append(x) # Set the length of samples in X to the maximum among them. X = np.zeros(shape=(len(xs), maxlen, hp.n_mels*hp.r), dtype=np.float32) for i, x in enumerate(xs): X[i, :len(x), :] = x return X, texts # 3d array, list of str
def test_spec_augment_apply_masks_to_axis(inputs): """ Tests the method _apply_masks_to_axis to see if shape is kept and exceptions are caught """ data_format, axis, mask_param, n_masks = inputs batch_src, input_shape = get_spectrogram(data_format) spec_augment = SpecAugment( input_shape=input_shape, freq_mask_param=5, time_mask_param=10, n_freq_masks=4, n_time_masks=3, mask_value=0.0, data_format=data_format, ) # We force axis that will trigger NotImplementedError if axis not in [0, 1, 2]: # Check axis error with pytest.raises(NotImplementedError): # We use batch_src instead of batch_src[0] to simulate a 4D spectrogram inputs = (batch_src, axis, mask_param, n_masks) spec_augment._apply_masks_to_axis(*inputs) # We force mask_params that will trigger the ValueError. If it is not triggered, then # inputs are ok, so we must only test if the shapes are kept during transformation elif mask_param != 5: # Check mask_param error with pytest.raises(ValueError): inputs = (batch_src[0], axis, mask_param, n_masks) spec_augment._apply_masks_to_axis(*inputs) else: # Check that transformation keeps shape inputs = (batch_src[0], axis, mask_param, n_masks) mask = spec_augment._apply_masks_to_axis(*inputs) np.testing.assert_equal(mask.shape[axis], input_shape[axis])
def process(args): (wav_label_list, label_dic, cpu_id, mode) = args if mode == 'train': writer = tf.python_io.TFRecordWriter(os.path.join(hp.train_dir, '{}.tfrecord'.format(cpu_id))) elif mode == 'eval': writer = tf.python_io.TFRecordWriter(os.path.join(hp.eval_dir, '{}.tfrecord'.format(cpu_id))) else: # test writer = tf.python_io.TFRecordWriter(os.path.join(hp.test_dir, '{}.tfrecord'.format(cpu_id))) for line in tqdm(wav_label_list): _, wav_name, labels = line.strip().split('\t') labels = labels.split('|') wav_path = os.path.join(hp.wavs_dir, wav_name) train_x = get_spectrogram(wav_path) train_y = np.zeros(shape=[hp.lab_size]) train_mask = np.zeros(shape=[hp.lab_size]) for label in labels: train_y[int(label_dic[label][0])] = 1 for label in labels: train_mask[int(label_dic[label][0])] = 1 for label in labels: if len(label_dic[label]) <= 1: continue for reverse_label in label_dic[label][1:]: train_mask[int(label_dic[reverse_label][0])] = 1 #--------write into tf record file----------# features = {} features['x'] = tf.train.Feature(float_list=tf.train.FloatList(value=train_x.reshape(-1))) features['x_shape'] = tf.train.Feature(int64_list=tf.train.Int64List(value=train_x.shape)) features['y'] = tf.train.Feature(float_list=tf.train.FloatList(value=train_y.reshape(-1))) features['y_shape'] = tf.train.Feature(int64_list=tf.train.Int64List(value=train_y.shape)) features['mask'] = tf.train.Feature(float_list=tf.train.FloatList(value=train_mask.reshape(-1))) features['mask_shape'] = tf.train.Feature(int64_list=tf.train.Int64List(value=train_mask.shape)) tf_features = tf.train.Features(feature=features) tf_example = tf.train.Example(features=tf_features) tf_serialized = tf_example.SerializeToString() writer.write(tf_serialized) #--------write into tf record file----------# writer.close()
def _make_example(self, wav_name, text): wav_file = os.path.join(self.wav_dir, wav_name + '.wav') wav = load_audio(wav_file) mel, mag = get_spectrogram(wav) return {'text': text, 'mel': mel, 'mag': mag}
def audio2npys(input_file, config): # read an audio file and then write a lot of numpy files song_name = input_file.split('/')[-1][:-4] print('!song_name = {}!'.format(song_name)) y, sr = read_via_scipy(input_file) print("dtype={}, sampling rate={}, len_samples={}".format( y.dtype, sr, len(y))) num_ch, mul_win_len = cal_num_channels(config) print('num_ch = {}, mul_win_len={}'.format(num_ch, mul_win_len)) Len = y.shape[0] cnt = 0 st_idx = 0 ed_idx = st_idx + config['audio_samples_frame_size'] nxt_idx = st_idx + config['audio_samples_hop_length'] while st_idx < Len: if ed_idx > Len: ed_idx = Len data = np.zeros(config['audio_samples_frame_size'], dtype='float32') data[:ed_idx - st_idx] = y[st_idx:ed_idx] out_var = np.zeros( (num_ch, config['output_hei'], config['output_wid']), dtype='float32') list_spec = [] list_ceps = [] list_d_spec = [] list_spec_enve = [] channel_anchor = 0 # use this to save thourgh out_var[:,hei,wid] for idx, w_len in enumerate(mul_win_len): # config['is_multi'] is decided by "current for-loop" list_spec.append(get_spectrogram(data, config, w_len)) out_var[channel_anchor] = list_spec[-1] channel_anchor += 1 if config['use_ceps']: list_ceps.append(get_cepstrogram(list_spec[-1], config, w_len)) out_var[channel_anchor] = list_ceps[-1] channel_anchor += 1 if config['use_d_spec']: # mode: all, decay, or attack list_d_spec.append( get_diff_spectrogram(list_spec[-1], mode=config['d_spec_type'])) out_var[channel_anchor] = list_d_spec[-1] channel_anchor += 1 if config['use_spec_enve']: list_spec_enve.append( get_spectral_envelope(list_spec[-1], config)) out_var[channel_anchor] = list_spec_enve[-1] channel_anchor += 1 #print('channel_anchor = ', channel_anchor, num_ch) npy_name = specpath + song_name + '_' + str(cnt).zfill( config['num_digit']) + '.npy' #print('cnt ={}, max={}'.format(cnt, np.max(list_spec[-1]))) np.save(npy_name, out_var) img_name = imgpath + song_name + '_' + str(cnt).zfill( config['num_digit']) + '.png' # plots: 1. spec 2. ceps (all in single file) plot_figure(img_name, list_spec, list_ceps, list_d_spec, list_spec_enve, config) cnt += 1 st_idx = nxt_idx ed_idx = st_idx + config['audio_samples_frame_size'] nxt_idx = st_idx + config['audio_samples_hop_length']
lines = codecs.open(hp.label_info, 'r').readlines()[1:] for line in lines: label_id, label_name, _ = line.strip().split('\t') id_label_dic[int(label_id)] = label_name return id_label_dic if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--wav_path', '-i', type=str, help='The path of music passed through model, only supported wav.') parser.add_argument('--threshold', '-t', type=float, help='The threshold for class type.') parser.set_defaults(wav_path=None) parser.set_defaults(threshold=0.5) args = parser.parse_args() fpath = args.wav_path threshold = args.threshold if os.path.isfile(fpath) and os.path.basename(fpath)[-3:] == 'wav': id_label_dic = get_id_label_dic() input_x = get_spectrogram(fpath) y = pass_model(input_x, threshold) label_list = [id_label_dic[i] for i in range(len(y)) if y[i] == 1] print('该首歌标签为:') print(label_list)