예제 #1
0
def test_melFilterbankLayer():
    with make_scope() as session:
        n_in, n_out = 257, 3
        layer_name = "mel_filterbank_layer"
        config = Config()
        config.update({
            "num_outputs": n_out,
            "num_inputs": n_in,
            "network": {
                layer_name: {
                    "class": "mel_filterbank",
                    "fft_size": 512,
                    "nr_of_filters": n_out,
                    "n_out": n_out,
                    "is_output_layer": True
                }
            }
        })
        network = TFNetwork(config=config, train_flag=True)
        network.construct_from_dict(config.typed_value("network"))
        layer = network.layers[layer_name]
        test_out = session.run(layer.output.placeholder,
                               feed_dict={
                                   network.get_extern_data('data').placeholder:
                                   np.ones((1, 1, 257))
                               })
        assert np.sum(test_out - np.asarray(
            [28.27923584, 53.10634232, 99.71585846], dtype=np.float32)) < 1e-5
예제 #2
0
def test_complexLinearProjectionLayer():
  with make_scope() as session:
    n_in, n_out = 514, 128
    layer_name = "clp_layer"
    config = Config()
    config.update({
      "num_outputs": n_out,
      "num_inputs": n_in,
      "network": {
        layer_name: {
          "class": "complex_linear_projection", "nr_of_filters": n_out, "n_out": n_out, "is_output_layer": True}
      }})
    network = TFNetwork(config=config, train_flag=True)
    network.construct_from_dict(config.typed_value("network"))
    layer = network.layers[layer_name]
    assert isinstance(layer, ComplexLinearProjectionLayer)
    i_r = np.ones((1, n_in // 2))
    i_i = np.ones((1, n_in // 2)) * 0.5
    test_input = np.expand_dims(np.reshape(np.transpose(
      np.reshape(np.concatenate([i_r, i_i], axis=1), (1, 2, 257)), [0, 2, 1]), (1, 514)), 0)
    test_clp_kernel = np.ones((2, n_in // 2, 128))
    test_clp_output = session.run(
      layer.output.placeholder,
      feed_dict={network.get_extern_data('data').placeholder: test_input, layer._clp_kernel: test_clp_kernel})
    assert test_clp_output[0, 0, 0] - 6.00722122 < 1e-5
예제 #3
0
 def test_rfftStftConfig_01():
   with make_scope() as session:
     layer_name = "stft_layer"
     fft_size = 400
     frame_size = 400
     frame_shift = 160
     window = "hanning"
     test_input = np.ones((1, 32000, 2), dtype=np.float32)
     config = Config()
     config.update({
       "num_outputs": int(fft_size / 2) + 1 * test_input.shape[2],
       "num_inputs": test_input.shape[2],
       "network": {
         layer_name: {
           "class": "multichannel_stft_layer", "frame_shift": frame_shift, "frame_size": frame_size, "window": window, "fft_size": fft_size, "use_rfft": True, "nr_of_channels": 2, "is_output_layer": True}
       }})
     network = TFNetwork(config=config, train_flag=True)
     network.construct_from_dict(config.typed_value("network"))
     layer = network.layers[layer_name]
     test_output = session.run(layer.output.placeholder, {network.get_extern_data('data').placeholder: test_input})
     ref0 = _get_ref_output(test_input, fft_size, frame_size, frame_shift, window, 0, 0)
     # np.fft.rfft and tensorflow.python.ops.rfft differ a little bit in their 
     # results, thus an error margin is allowed in the result
     resultDiff = np.abs(test_output[0, 0, 0:(int(fft_size / 2) + 1)] - ref0)
     assert np.mean(resultDiff) < 0.02 
     assert np.max(resultDiff) < 1 
     pass
예제 #4
0
 def test_rfftStftConfig_01():
   with make_scope() as session:
     layer_name = "stft_layer"
     fft_size = 400
     frame_size = 400
     frame_shift = 160
     window = "hanning"
     test_input = np.ones((1, 32000, 2), dtype=np.float32)
     config = Config()
     config.update({
       "num_outputs": int(fft_size / 2) + 1 * test_input.shape[2],
       "num_inputs": test_input.shape[2],
       "network": {
         layer_name: {
           "class": "multichannel_stft_layer", "frame_shift": frame_shift, "frame_size": frame_size, "window": window, "fft_size": fft_size, "use_rfft": True, "nr_of_channels": 2, "is_output_layer": True}
       }})
     network = TFNetwork(config=config, train_flag=True)
     network.construct_from_dict(config.typed_value("network"))
     layer = network.layers[layer_name]
     test_output = session.run(layer.output.placeholder, {network.get_extern_data('data').placeholder: test_input})
     ref0 = _get_ref_output(test_input, fft_size, frame_size, frame_shift, window, 0, 0)
     # np.fft.rfft and tensorflow.python.ops.rfft differ a little bit in their 
     # results, thus an error margin is allowed in the result
     resultDiff = np.abs(test_output[0, 0, 0:(int(fft_size / 2) + 1)] - ref0)
     assert np.mean(resultDiff) < 0.02 
     assert np.max(resultDiff) < 1 
     pass
예제 #5
0
def test_complexLinearProjectionLayer():
  with make_scope() as session:
    n_in, n_out = 514, 128
    layer_name = "clp_layer"
    config = Config()
    config.update({
      "num_outputs": n_out,
      "num_inputs": n_in,
      "network": {
        layer_name: {
          "class": "complex_linear_projection", "nr_of_filters": n_out, "n_out": n_out, "is_output_layer": True}
      }})
    network = TFNetwork(config=config, train_flag=True)
    network.construct_from_dict(config.typed_value("network"))
    layer = network.layers[layer_name]
    assert isinstance(layer, ComplexLinearProjectionLayer)
    i_r = np.ones((1, n_in // 2))
    i_i = np.ones((1, n_in // 2)) * 0.5
    test_input = np.expand_dims(np.reshape(np.transpose(
      np.reshape(np.concatenate([i_r, i_i], axis=1), (1, 2, 257)), [0, 2, 1]), (1, 514)), 0)
    test_clp_kernel = np.ones((2, n_in // 2, 128))
    test_clp_output = session.run(
      layer.output.placeholder,
      feed_dict={network.get_extern_data('data').placeholder: test_input, layer._clp_kernel: test_clp_kernel})
    assert test_clp_output[0, 0, 0] - 6.00722122 < 1e-5
예제 #6
0
 def test_stftConfig_multi_res_02():
   with make_scope() as session:
     layer_name = "stft_layer"
     fft_sizes = [400, 200, 800]
     frame_sizes = [400, 200, 800]
     frame_shift = 160
     window = "hanning"
     test_input = np.random.normal(0, 0.6, (1, 3200, 2))
     num_outputs = int(np.sum([(int(fft_size / 2) + 1) * test_input.shape[2] for fft_size in fft_sizes]))
     config = Config()
     config.update({
       "num_outputs": num_outputs,
       "num_inputs": test_input.shape[2],
       "network": {
         layer_name: {
           "class": "multichannel_multiresolution_stft_layer", "frame_shift": frame_shift, "frame_sizes": frame_sizes, "window": window, "fft_sizes": fft_sizes, "use_rfft": True, "nr_of_channels": 2, "is_output_layer": True}
       }})
     network = TFNetwork(config=config, train_flag=True)
     network.construct_from_dict(config.typed_value("network"))
     layer = network.layers[layer_name]
     test_output = session.run(layer.output.placeholder, {network.get_extern_data('data').placeholder: test_input})
     assert test_output.shape[2] == num_outputs
     comparison_frame = 6
     ref00 = _get_ref_output_single_res(test_input, fft_sizes[0], frame_sizes[0], frame_shift, window, comparison_frame, 0)
     ref01 = _get_ref_output_single_res(test_input, fft_sizes[0], frame_sizes[0], frame_shift, window, comparison_frame, 1)
     ref10 = _get_ref_output_single_res(test_input, fft_sizes[1], frame_sizes[1], frame_shift, window, comparison_frame, 0)
     ref11 = _get_ref_output_single_res(test_input, fft_sizes[1], frame_sizes[1], frame_shift, window, comparison_frame, 1)
     ref20 = _get_ref_output_single_res(test_input, fft_sizes[2], frame_sizes[2], frame_shift, window, comparison_frame, 0)
     ref21 = _get_ref_output_single_res(test_input, fft_sizes[2], frame_sizes[2], frame_shift, window, comparison_frame, 1)
     ref = np.concatenate([ref00, ref01, ref10, ref11, ref20, ref21], axis=0)
     resultDiff = np.abs(test_output[0, comparison_frame, :] - ref)
     assert np.mean(resultDiff) < 0.06
     assert np.max(resultDiff) < 1 
예제 #7
0
 def test_stftConfig_single_res_01():
   with make_scope() as session:
     layer_name = "stft_layer"
     fft_sizes = [400]
     frame_sizes = [400]
     frame_shift = 160
     window = "hanning"
     test_input = np.ones((1, 32000, 2), dtype=np.float32)
     num_outputs = (int(fft_sizes[0] / 2) + 1) * test_input.shape[2]
     config = Config()
     config.update({
       "num_outputs": num_outputs,
       "num_inputs": test_input.shape[2],
       "network": {
         layer_name: {
           "class": "multichannel_multiresolution_stft_layer", "frame_shift": frame_shift, "frame_sizes": frame_sizes, "window": window, "fft_sizes": fft_sizes, "use_rfft": True, "nr_of_channels": 2, "is_output_layer": True}
       }})
     network = TFNetwork(config=config, train_flag=True)
     network.construct_from_dict(config.typed_value("network"))
     layer = network.layers[layer_name]
     test_output = session.run(layer.output.placeholder, {network.get_extern_data('data').placeholder: test_input})
     ref0 = _get_ref_output_single_res(test_input, fft_sizes[0], frame_sizes[0], frame_shift, window, 0, 0)
     resultDiff = np.abs(test_output[0, 0, 0:(int(fft_sizes[0] / 2) + 1)] - ref0)
     assert test_output.shape[2] == num_outputs
     assert np.mean(resultDiff) < 0.02 
     assert np.max(resultDiff) < 1 
예제 #8
0
def test_melFilterbankLayer():
  with make_scope() as session:
    n_in, n_out = 257, 3
    layer_name = "mel_filterbank_layer"
    config = Config()
    config.update({
      "num_outputs": n_out,
      "num_inputs": n_in,
      "network": {
        layer_name: {
          "class": "mel_filterbank", "fft_size": 512, "nr_of_filters": n_out, "n_out": n_out, "is_output_layer": True}
      }})
    network = TFNetwork(config=config, train_flag=True)
    network.construct_from_dict(config.typed_value("network"))
    layer = network.layers[layer_name]
    test_out = session.run(
      layer.output.placeholder,
      feed_dict={network.get_extern_data('data').placeholder: np.ones((1, 1, 257))})
    assert np.sum(test_out - np.asarray([28.27923584, 53.10634232, 99.71585846], dtype=np.float32)) < 1e-5