Ejemplo n.º 1
0
def test_spec_augment_layer(data_format, atol=1e-4):
    """
    Tests the complete layer, checking if the parameter `training` has the expected behaviour.
    """

    batch_src, input_shape = get_spectrogram(data_format)

    model = tf.keras.Sequential()
    spec_augment = SpecAugment(
        input_shape=input_shape,
        freq_mask_param=5,
        time_mask_param=10,
        n_freq_masks=4,
        n_time_masks=3,
        mask_value=0.0,
        data_format=data_format,
    )

    model.add(spec_augment)

    # Fist, enforce training to True and check the shapes
    spec_augmented = model(batch_src, training=True)
    np.testing.assert_equal(model.layers[0].output_shape[1:], spec_augmented[0].shape)

    # Second, check that it doesn't change anything in default
    spec_augmented = model(batch_src)
    np.testing.assert_allclose(spec_augmented, batch_src, atol)
Ejemplo n.º 2
0
def check():
    wav_label_list = get_wav_label_list(mode='test')
    label_dic = get_label_dic()
    cnt = 0
    for line in tqdm(wav_label_list):
        _, wav_name, labels = line.strip().split('\t')
        labels = labels.split('|')
        wav_path = os.path.join(hp.wavs_dir, wav_name)
        train_x = get_spectrogram(wav_path)
        train_y = np.zeros(hp.lab_size)
        train_mask = np.zeros(hp.lab_size)
        for label in labels:
            train_y[int(label_dic[label][0])] = 1
        for label in labels:
            train_mask[int(label_dic[label][0])] = 1
        for label in labels:
            if len(label_dic[label]) <= 1:
                continue
            for reverse_label in label_dic[label][1:]:
                train_mask[int(label_dic[reverse_label][0])] = 1
        print(train_y)
        print(train_mask)
        cnt += 1
        if cnt >= 1:
            break
Ejemplo n.º 3
0
def get_spectrogram_and_text(_inputs):
    '''From `_inputs`, which has been fetched from slice queues,
       makes text, spectrogram, and magnitude,
       then enqueue them again. 
    '''
    sound_fpath, text = _inputs
    spectrogram = get_spectrogram(sound_fpath)
    spectrogram = reduce_frames(spectrogram, hp.r)

    text = np.fromstring(text, np.int32)
    return spectrogram, text
Ejemplo n.º 4
0
def test_spec_augment_depth_exception():
    """
    Checks that SpecAugments fails if Spectrogram has depth greater than 1.
    """

    data_format = "default"
    with pytest.raises(RuntimeError):

        batch_src, input_shape = get_spectrogram(data_format=data_format, n_ch=4)

        model = tf.keras.Sequential()
        spec_augment = SpecAugment(
            input_shape=input_shape, freq_mask_param=5, time_mask_param=10, data_format=data_format
        )
        model.add(spec_augment)
        _ = model(batch_src, training=True)[0]
Ejemplo n.º 5
0
def thread_process(args):
    (tfid, split_dataset) = args
    writer = tf.python_io.TFRecordWriter(
        os.path.join(hp.TF_DIR, f'{tfid}.tfrecord'))
    for i in tqdm(split_dataset):
        fpath = i[0]
        y = i[1]
        x = get_spectrogram(fpath)
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'x':
                tf.train.Feature(float_list=tf.train.FloatList(
                    value=x.reshape(-1))),
                'y':
                tf.train.Feature(float_list=tf.train.FloatList(
                    value=y.reshape(-1)))
            }))
        serialized = example.SerializeToString()
        writer.write(serialized)
    writer.close()
Ejemplo n.º 6
0
def test_save_load_spec_augment(data_format, save_format):
    batch_src, input_shape = get_spectrogram(data_format=data_format)

    spec_augment = SpecAugment(
        input_shape=input_shape,
        freq_mask_param=5,
        time_mask_param=10,
        n_freq_masks=4,
        n_time_masks=3,
        mask_value=0.0,
        data_format=data_format,
    )
    save_load_compare(
        spec_augment,
        batch_src,
        np.testing.assert_allclose,
        save_format=save_format,
        layer_class=SpecAugment,
        training=None,
    )
Ejemplo n.º 7
0
def load_eval_data():
    from utils import get_spectrogram, reduce_frames
    """We evaluate on the last mini-batch."""
    sound_fpaths, texts = pickle.load(open('data/eval.pkl', 'rb'))

    # Extract spectrogram from sound_fpaths
    char2idx, idx2char = load_vocab() 
    
    xs, maxlen = [], 0
    for sound_fpath in sound_fpaths:
        spectrogram = get_spectrogram(sound_fpath)
        x = reduce_frames(spectrogram, hp.r)
        maxlen = max(maxlen, len(x))
        xs.append(x)
        
    # Set the length of samples in X to the maximum among them.
    X = np.zeros(shape=(len(xs), maxlen, hp.n_mels*hp.r), dtype=np.float32)
    for i, x in enumerate(xs):
        X[i, :len(x), :] = x
        
    return X, texts # 3d array, list of str 
Ejemplo n.º 8
0
def test_spec_augment_apply_masks_to_axis(inputs):
    """
    Tests the method _apply_masks_to_axis to see if shape is kept and
    exceptions are caught
    """

    data_format, axis, mask_param, n_masks = inputs
    batch_src, input_shape = get_spectrogram(data_format)

    spec_augment = SpecAugment(
        input_shape=input_shape,
        freq_mask_param=5,
        time_mask_param=10,
        n_freq_masks=4,
        n_time_masks=3,
        mask_value=0.0,
        data_format=data_format,
    )

    # We force axis that will trigger NotImplementedError
    if axis not in [0, 1, 2]:
        # Check axis error
        with pytest.raises(NotImplementedError):
            # We use batch_src instead of batch_src[0] to simulate a 4D spectrogram
            inputs = (batch_src, axis, mask_param, n_masks)
            spec_augment._apply_masks_to_axis(*inputs)

    # We force mask_params that will trigger the ValueError. If it is not triggered, then
    # inputs are ok, so we must only test if the shapes are kept during transformation
    elif mask_param != 5:
        # Check mask_param error
        with pytest.raises(ValueError):
            inputs = (batch_src[0], axis, mask_param, n_masks)
            spec_augment._apply_masks_to_axis(*inputs)
    else:
        # Check that transformation keeps shape
        inputs = (batch_src[0], axis, mask_param, n_masks)
        mask = spec_augment._apply_masks_to_axis(*inputs)
        np.testing.assert_equal(mask.shape[axis], input_shape[axis])
Ejemplo n.º 9
0
def process(args):
    (wav_label_list, label_dic, cpu_id, mode) = args
    if mode == 'train':
        writer = tf.python_io.TFRecordWriter(os.path.join(hp.train_dir, '{}.tfrecord'.format(cpu_id)))
    elif mode == 'eval':
        writer = tf.python_io.TFRecordWriter(os.path.join(hp.eval_dir, '{}.tfrecord'.format(cpu_id)))
    else: # test
        writer = tf.python_io.TFRecordWriter(os.path.join(hp.test_dir, '{}.tfrecord'.format(cpu_id)))
    for line in tqdm(wav_label_list):
        _, wav_name, labels = line.strip().split('\t')
        labels = labels.split('|')
        wav_path = os.path.join(hp.wavs_dir, wav_name)
        train_x = get_spectrogram(wav_path)
        train_y = np.zeros(shape=[hp.lab_size])
        train_mask = np.zeros(shape=[hp.lab_size])
        for label in labels:
            train_y[int(label_dic[label][0])] = 1
        for label in labels:
            train_mask[int(label_dic[label][0])] = 1
        for label in labels:
            if len(label_dic[label]) <= 1:
                continue
            for reverse_label in label_dic[label][1:]:
                train_mask[int(label_dic[reverse_label][0])] = 1
        #--------write into tf record file----------#
        features = {}
        features['x'] = tf.train.Feature(float_list=tf.train.FloatList(value=train_x.reshape(-1)))
        features['x_shape'] = tf.train.Feature(int64_list=tf.train.Int64List(value=train_x.shape))
        features['y'] = tf.train.Feature(float_list=tf.train.FloatList(value=train_y.reshape(-1)))
        features['y_shape'] = tf.train.Feature(int64_list=tf.train.Int64List(value=train_y.shape))
        features['mask'] = tf.train.Feature(float_list=tf.train.FloatList(value=train_mask.reshape(-1)))
        features['mask_shape'] = tf.train.Feature(int64_list=tf.train.Int64List(value=train_mask.shape))
        tf_features = tf.train.Features(feature=features)
        tf_example = tf.train.Example(features=tf_features)
        tf_serialized = tf_example.SerializeToString()
        writer.write(tf_serialized)
        #--------write into tf record file----------#
    writer.close()
Ejemplo n.º 10
0
 def _make_example(self, wav_name, text):
     wav_file = os.path.join(self.wav_dir, wav_name + '.wav')
     wav = load_audio(wav_file)
     mel, mag = get_spectrogram(wav)
     return {'text': text, 'mel': mel, 'mag': mag}
def audio2npys(input_file, config):
    # read an audio file and then write a lot of numpy files
    song_name = input_file.split('/')[-1][:-4]
    print('!song_name = {}!'.format(song_name))

    y, sr = read_via_scipy(input_file)
    print("dtype={}, sampling rate={}, len_samples={}".format(
        y.dtype, sr, len(y)))
    num_ch, mul_win_len = cal_num_channels(config)
    print('num_ch = {}, mul_win_len={}'.format(num_ch, mul_win_len))

    Len = y.shape[0]
    cnt = 0
    st_idx = 0
    ed_idx = st_idx + config['audio_samples_frame_size']
    nxt_idx = st_idx + config['audio_samples_hop_length']

    while st_idx < Len:
        if ed_idx > Len:
            ed_idx = Len
        data = np.zeros(config['audio_samples_frame_size'], dtype='float32')
        data[:ed_idx - st_idx] = y[st_idx:ed_idx]

        out_var = np.zeros(
            (num_ch, config['output_hei'], config['output_wid']),
            dtype='float32')

        list_spec = []
        list_ceps = []
        list_d_spec = []
        list_spec_enve = []
        channel_anchor = 0  # use this to save thourgh out_var[:,hei,wid]
        for idx, w_len in enumerate(mul_win_len):
            # config['is_multi'] is decided by "current for-loop"
            list_spec.append(get_spectrogram(data, config, w_len))
            out_var[channel_anchor] = list_spec[-1]
            channel_anchor += 1
            if config['use_ceps']:
                list_ceps.append(get_cepstrogram(list_spec[-1], config, w_len))
                out_var[channel_anchor] = list_ceps[-1]
                channel_anchor += 1
            if config['use_d_spec']:
                # mode: all, decay, or attack
                list_d_spec.append(
                    get_diff_spectrogram(list_spec[-1],
                                         mode=config['d_spec_type']))
                out_var[channel_anchor] = list_d_spec[-1]
                channel_anchor += 1
            if config['use_spec_enve']:
                list_spec_enve.append(
                    get_spectral_envelope(list_spec[-1], config))
                out_var[channel_anchor] = list_spec_enve[-1]
                channel_anchor += 1
        #print('channel_anchor = ', channel_anchor, num_ch)
        npy_name = specpath + song_name + '_' + str(cnt).zfill(
            config['num_digit']) + '.npy'
        #print('cnt ={}, max={}'.format(cnt, np.max(list_spec[-1])))

        np.save(npy_name, out_var)
        img_name = imgpath + song_name + '_' + str(cnt).zfill(
            config['num_digit']) + '.png'

        # plots: 1. spec 2. ceps (all in single file)
        plot_figure(img_name, list_spec, list_ceps, list_d_spec,
                    list_spec_enve, config)

        cnt += 1
        st_idx = nxt_idx
        ed_idx = st_idx + config['audio_samples_frame_size']
        nxt_idx = st_idx + config['audio_samples_hop_length']
Ejemplo n.º 12
0
    lines = codecs.open(hp.label_info, 'r').readlines()[1:]
    for line in lines:
        label_id, label_name, _ = line.strip().split('\t')
        id_label_dic[int(label_id)] = label_name
    return id_label_dic


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--wav_path',
        '-i',
        type=str,
        help='The path of music passed through model, only supported wav.')
    parser.add_argument('--threshold',
                        '-t',
                        type=float,
                        help='The threshold for class type.')
    parser.set_defaults(wav_path=None)
    parser.set_defaults(threshold=0.5)
    args = parser.parse_args()
    fpath = args.wav_path
    threshold = args.threshold
    if os.path.isfile(fpath) and os.path.basename(fpath)[-3:] == 'wav':
        id_label_dic = get_id_label_dic()
        input_x = get_spectrogram(fpath)
        y = pass_model(input_x, threshold)
        label_list = [id_label_dic[i] for i in range(len(y)) if y[i] == 1]
        print('该首歌标签为:')
        print(label_list)