def input_block(net, config, melspec=False, verbose=True):
    """
    """
    # load scaler
    sclr = joblib.load(config.paths.preproc.scaler)

    net['input'] = L.InputLayer(shape=get_in_shape(config), name='input')
    sigma = theano.shared(np.array(0., dtype=np.float32),
                          name='noise_controller')
    net['noise'] = L.GaussianNoiseLayer(net['input'],
                                        sigma=sigma,
                                        name='input_corruption')

    if config.hyper_parameters.input == "melspec":

        net['sclr'] = L.standardize(net['noise'],
                                    offset=sclr.mean_.astype(np.float32),
                                    scale=sclr.scale_.astype(np.float32),
                                    shared_axes=(0, 1, 2))
    else:
        net['stft'] = STFTLayer(L.ReshapeLayer(net['noise'],
                                               ([0], [1], [2], 1),
                                               name='reshape'),
                                n_fft=config.hyper_parameters.n_fft,
                                hop_size=config.hyper_parameters.hop_size)

        if melspec:
            net['melspec'] = MelSpecLayer(
                sr=config.hyper_parameters.sample_rate,
                n_fft=config.hyper_parameters.n_fft,
                n_mels=128,
                log_amplitude=True)

            net['sclr'] = L.standardize(net['melspec'],
                                        offset=sclr.mean_.astype(np.float32),
                                        scale=sclr.scale_.astype(np.float32),
                                        shared_axes=(0, 1, 2))

        else:
            net['sclr'] = L.standardize(net['stft'],
                                        offset=sclr.mean_.astype(np.float32),
                                        scale=sclr.scale_.astype(np.float32),
                                        shared_axes=(0, 1, 2))

            # only pooling freq domain
            net['stft.pl'] = L.MaxPool2DLayer(net['sclr'],
                                              pool_size=(2, 1),
                                              name='stft.pl')

    if verbose:
        print(net['input'].output_shape)
        # if melspec:
        #     print(net['melspec'].output_shape)
        # else:
        #     print(net['stft'].output_shape)
        #     print(net['stft.pl'].output_shape)
        print(net['sclr'].output_shape)

    return net, sigma
Ejemplo n.º 2
0
def pons_cnn(params):
    """"""
    layers = L.InputLayer((None, 1, params['dur'], 128))
    print layers.output_shape

    sclr = joblib.load(params['scaler'])
    layers = L.standardize(layers,
                           sclr.mean_.astype(np.float32),
                           sclr.scale_.astype(np.float32),
                           shared_axes=(0, 1, 2))
    print layers.output_shape

    layers_timbre = L.GlobalPoolLayer(
        L.batch_norm(L.Conv2DLayer(layers, 64, (1, 96))))

    layers_rhythm = L.GlobalPoolLayer(
        L.batch_norm(L.Conv2DLayer(layers, 64, (params['dur'] - 10, 1))))

    layers = L.ConcatLayer([layers_rhythm, layers_timbre], axis=-1)

    layers = L.DenseLayer(layers, 64, nonlinearity=nl.rectify)
    print layers.output_shape

    layers = L.DenseLayer(layers, 16, nonlinearity=nl.softmax)
    print layers.output_shape

    return layers
Ejemplo n.º 3
0
def test_standardize():
    # Simple example
    X = np.random.standard_normal((1000, 20)).astype(theano.config.floatX)
    l_in = InputLayer((None, 20))
    l_std = standardize(
        l_in, X.min(axis=0), (X.max(axis=0) - X.min(axis=0)), shared_axes=0)
    out = get_output(l_std).eval({l_in.input_var: X})
    assert np.allclose(out.max(axis=0), 1.)
    assert np.allclose(out.min(axis=0), 0.)
    # More complicated example
    X = np.random.standard_normal(
        (50, 3, 100, 10)).astype(theano.config.floatX)
    mean = X.mean(axis=(0, 2))
    std = X.std(axis=(0, 2))
    l_in = InputLayer((None, 3, None, 10))
    l_std = standardize(l_in, mean, std, shared_axes=(0, 2))
    out = get_output(l_std).eval({l_in.input_var: X})
    assert np.allclose(out.mean(axis=(0, 2)), 0., atol=1e-5)
    assert np.allclose(out.std((0, 2)), 1., atol=1e-5)
Ejemplo n.º 4
0
def test_standardize():
    # Simple example
    X = np.random.standard_normal((1000, 20)).astype(theano.config.floatX)
    l_in = InputLayer((None, 20))
    l_std = standardize(l_in,
                        X.min(axis=0), (X.max(axis=0) - X.min(axis=0)),
                        shared_axes=0)
    out = get_output(l_std).eval({l_in.input_var: X})
    assert np.allclose(out.max(axis=0), 1.)
    assert np.allclose(out.min(axis=0), 0.)
    # More complicated example
    X = np.random.standard_normal(
        (50, 3, 100, 10)).astype(theano.config.floatX)
    mean = X.mean(axis=(0, 2))
    std = X.std(axis=(0, 2))
    l_in = InputLayer((None, 3, None, 10))
    l_std = standardize(l_in, mean, std, shared_axes=(0, 2))
    out = get_output(l_std).eval({l_in.input_var: X})
    assert np.allclose(out.mean(axis=(0, 2)), 0., atol=1e-5)
    assert np.allclose(out.std((0, 2)), 1., atol=1e-5)
Ejemplo n.º 5
0
def shallow_cnn_2d_vanilla(params):
    """"""
    layers = L.InputLayer((None, 1, params['dur'], 128))
    print layers.output_shape

    sclr = joblib.load(params['scaler'])
    layers = L.standardize(layers,
                           sclr.mean_.astype(np.float32),
                           sclr.scale_.astype(np.float32),
                           shared_axes=(0, 1, 2))
    print layers.output_shape

    n_filter = [8, 16, 16, 32]  # l
    filter_sz = [(5, 5), (5, 5), (1, 1), (5, 5)]  # m
    pool_sz = [(3, 3), (3, 3), None, (3, 3)]  # n
    pool_strd = [None, None, None, None]  # s
    batch_norm = [False, False, False, False]  # b
    conv_spec = zip(n_filter, filter_sz, pool_sz, pool_strd, batch_norm)

    for l, m, n, s, b in conv_spec:
        if b:
            layers = L.batch_norm(
                L.Conv2DLayer(layers, l, m, nonlinearity=nl.rectify))
        else:
            layers = L.Conv2DLayer(layers, l, m, nonlinearity=nl.rectify)
        if n is not None:
            layers = L.MaxPool2DLayer(layers, pool_size=n, stride=s)
        print layers.output_shape

    layers = L.DenseLayer(layers, 64, nonlinearity=nl.rectify)
    print layers.output_shape

    layers = L.DenseLayer(layers, 16, nonlinearity=nl.softmax)
    print layers.output_shape

    return layers
def define_net():
    net = {}

    print("Generator layer shapes:")
    net['input'] = ll.InputLayer(shape=(None, 3, IMAGE_SHAPE[0],
                                        IMAGE_SHAPE[1]))

    leaky_relu = lasagne.nonlinearities.LeakyRectify(0.2)

    # net['stand'] = ll.standardize(net['input'], offset=np.array([0, 0, 0], dtype='float32'),
    #                             scale=np.array([128.0, 128.0, 128.0], dtype='float32'))

    net['conv_1'] = Conv2DLayer(net['input'],
                                num_filters=64,
                                stride=(2, 2),
                                filter_size=(4, 4),
                                nonlinearity=leaky_relu)
    print(lasagne.layers.get_output_shape(net['conv_1']))
    net['conv_2'] = batch_norm(
        Conv2DLayer(net['conv_1'],
                    num_filters=128,
                    stride=(2, 2),
                    filter_size=(4, 4),
                    nonlinearity=leaky_relu))
    print(lasagne.layers.get_output_shape(net['conv_2']))
    net['conv_3'] = batch_norm(
        Conv2DLayer(net['conv_2'],
                    num_filters=256,
                    stride=(2, 2),
                    filter_size=(4, 4),
                    nonlinearity=leaky_relu))
    print(lasagne.layers.get_output_shape(net['conv_3']))
    net['conv_4'] = batch_norm(
        Conv2DLayer(net['conv_3'],
                    num_filters=512,
                    stride=(2, 2),
                    filter_size=(4, 4),
                    nonlinearity=leaky_relu))
    print(lasagne.layers.get_output_shape(net['conv_4']))
    net['conv_5'] = batch_norm(
        Conv2DLayer(net['conv_4'],
                    num_filters=512,
                    stride=(2, 2),
                    filter_size=(4, 4),
                    nonlinearity=leaky_relu))
    print(lasagne.layers.get_output_shape(net['conv_5']))
    net['conv_6'] = batch_norm(
        Conv2DLayer(net['conv_5'],
                    num_filters=512,
                    stride=(2, 2),
                    filter_size=(4, 4),
                    nonlinearity=leaky_relu))
    print(lasagne.layers.get_output_shape(net['conv_6']))

    net['conv_6'] = DropoutLayer(net['conv_6'])
    net['unconv_1'] = ll.batch_norm(
        ll.TransposedConv2DLayer(net['conv_6'],
                                 num_filters=512,
                                 stride=(2, 2),
                                 filter_size=(4, 4)))
    print(lasagne.layers.get_output_shape(net['unconv_1']))

    concat = DropoutLayer(
        ll.ConcatLayer([net['unconv_1'], net['conv_5']], axis=1))
    net['unconv_2'] = (ll.batch_norm(
        ll.TransposedConv2DLayer(concat,
                                 num_filters=512,
                                 stride=(2, 2),
                                 filter_size=(4, 4))))
    print(lasagne.layers.get_output_shape(net['unconv_2']))

    concat = DropoutLayer(
        ll.ConcatLayer([net['unconv_2'], net['conv_4']], axis=1))
    net['unconv_3'] = ll.batch_norm(
        ll.TransposedConv2DLayer(concat,
                                 num_filters=256,
                                 stride=(2, 2),
                                 filter_size=(4, 4)))
    print(lasagne.layers.get_output_shape(net['unconv_3']))

    concat = ll.ConcatLayer([net['unconv_3'], net['conv_3']], axis=1)
    net['unconv_4'] = ll.batch_norm(
        ll.TransposedConv2DLayer(concat,
                                 num_filters=128,
                                 stride=(2, 2),
                                 filter_size=(4, 4)))
    print(lasagne.layers.get_output_shape(net['unconv_4']))

    concat = ll.ConcatLayer([net['unconv_4'], net['conv_2']], axis=1)
    net['unconv_5'] = ll.batch_norm(
        ll.TransposedConv2DLayer(concat,
                                 num_filters=64,
                                 stride=(2, 2),
                                 filter_size=(5, 5)))
    print(lasagne.layers.get_output_shape(net['unconv_5']))

    concat = ll.ConcatLayer([net['unconv_5'], net['conv_1']], axis=1)
    net['unconv_6'] = ll.batch_norm(
        ll.TransposedConv2DLayer(concat,
                                 num_filters=32,
                                 stride=(2, 2),
                                 filter_size=(4, 4)))

    print(lasagne.layers.get_output_shape(net['unconv_6']))
    net['pre_out'] = batch_norm(
        Conv2DLayer(net['unconv_6'],
                    num_filters=3,
                    filter_size=(3, 3),
                    nonlinearity=lasagne.nonlinearities.tanh,
                    pad='same'))
    print(lasagne.layers.get_output_shape(net['pre_out']))
    net['out'] = ll.standardize(net['pre_out'],
                                offset=np.array([0, 0, 0], dtype='float32'),
                                scale=np.array(
                                    [1 / 128.0, 1 / 128.0, 1 / 128.0],
                                    dtype='float32'))

    print(lasagne.layers.get_output_shape(net['out']))

    return net
Ejemplo n.º 7
0
def deep_cnn_2d(params):
    """"""
    nonlin = nl.elu

    layers = L.InputLayer((None, 1, params['dur'], 128))
    print layers.output_shape

    sclr = joblib.load(params['scaler'])
    layers = L.standardize(layers,
                           sclr.mean_.astype(np.float32),
                           sclr.scale_.astype(np.float32),
                           shared_axes=(0, 1, 2))
    print layers.output_shape

    n_filter = [16, 32, 64, 64, 128, 256, 256]  # l
    filter_sz = [(5, 5), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (1, 1)]  # m
    if params['dur'] > 50:
        conv_strd = [(2, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1),
                     (1, 1)]  # c
        pool_sz = [(2, 2), (2, 2), (2, 2), (2, 2), (2, 2), None, None]  # n
    else:
        conv_strd = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1),
                     (1, 1)]  # c
        pool_sz = [(1, 2), (2, 2), (2, 2), (2, 2), (2, 2), None, None]  # n
    pool_strd = [None, None, None, None, None, None, None]  # s
    batch_norm = [False, True, False, True, False, False, False]  # b
    dropout = [True, True, False, True, False, False, False]  # d # added
    conv_spec = zip(n_filter, filter_sz, conv_strd, pool_sz, pool_strd,
                    batch_norm, dropout)

    for l, m, c, n, s, b, d in conv_spec:
        if b:
            layers = L.batch_norm(
                L.Conv2DLayer(layers,
                              l,
                              m,
                              stride=c,
                              pad='same',
                              nonlinearity=nonlin), )
        else:
            layers = L.Conv2DLayer(layers,
                                   l,
                                   m,
                                   stride=c,
                                   pad='same',
                                   nonlinearity=nonlin)
        if n is not None:
            layers = L.MaxPool2DLayer(layers, pool_size=n, stride=s)

        if d:
            layers = L.dropout(layers, p=0.1)

        print layers.output_shape

    layers = L.batch_norm(L.GlobalPoolLayer(layers))
    layers = L.dropout(layers)  # added
    print layers.output_shape

    layers = L.batch_norm(L.DenseLayer(layers, 256, nonlinearity=nonlin))
    layers = L.dropout(layers)
    print layers.output_shape

    layers = L.DenseLayer(layers, 16, nonlinearity=nl.softmax)
    print layers.output_shape

    return layers
Ejemplo n.º 8
0
def deep_cnn_2d_mtl_at_2(params):
    """"""
    assert 'targets' in params
    nonlin = nl.elu

    layers = L.InputLayer((None, 1, params['dur'], 128))
    print layers.output_shape

    sclr = joblib.load(params['scaler'])
    layers = L.standardize(layers,
                           sclr.mean_.astype(np.float32),
                           sclr.scale_.astype(np.float32),
                           shared_axes=(0, 1, 2))
    print layers.output_shape

    n_filter = [32, 64, 64, 128, 256, 256]  # l
    if 'kernel_multiplier' in params:
        n_filter = map(lambda x: x * params['kernel_multiplier'], n_filter)
    filter_sz = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (1, 1)]  # m
    if params['dur'] > 50:
        conv_strd = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)]  # c
        pool_sz = [(2, 2), (2, 2), (2, 2), (2, 2), None, None]  # n
    else:
        conv_strd = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)]  # c
        pool_sz = [(2, 2), (2, 2), (2, 2), (2, 2), None, None]  # n
    pool_strd = [None, None, None, None, None, None]  # s
    batch_norm = [True, False, True, False, False, False]  # b
    dropout = [True, False, True, False, False, False]  # d # added
    conv_spec = zip(n_filter, filter_sz, conv_strd, pool_sz, pool_strd,
                    batch_norm, dropout)

    # Shared first layer
    if 'kernel_multiplier' in params:
        n_1st_ker = 16 * params['kernel_multiplier']
    else:
        n_1st_ker = 16
    layers = L.Conv2DLayer(layers,
                           n_1st_ker, (5, 5),
                           stride=(1, 1),
                           pad='same',
                           nonlinearity=nonlin)
    layers = L.MaxPool2DLayer(layers, pool_size=(1, 2))
    layers = L.dropout(layers, p=0.1)

    layer_heads = OrderedDict()
    for target in params['targets']:
        first_trgt_spec_layer = True  # n_layer checker
        for l, m, c, n, s, b, d in conv_spec:
            if first_trgt_spec_layer:
                layer_heads[target['name']] = layers
                first_trgt_spec_layer = False
            if b:
                layer_heads[target['name']] = L.batch_norm(
                    L.Conv2DLayer(layer_heads[target['name']],
                                  l,
                                  m,
                                  stride=c,
                                  pad='same',
                                  nonlinearity=nonlin), )
            else:
                layer_heads[target['name']] = L.Conv2DLayer(
                    layer_heads[target['name']],
                    l,
                    m,
                    stride=c,
                    pad='same',
                    nonlinearity=nonlin)
            if n is not None:
                layer_heads[target['name']] = L.MaxPool2DLayer(
                    layer_heads[target['name']], pool_size=n, stride=s)

            if d:
                layer_heads[target['name']] = L.dropout(
                    layer_heads[target['name']], p=0.1)
            print layer_heads[target['name']].output_shape

        layer_heads[target['name']] = L.batch_norm(
            L.GlobalPoolLayer(layer_heads[target['name']]))
        layer_heads[target['name']] = L.dropout(
            layer_heads[target['name']])  # added
        print layer_heads[target['name']].output_shape

        if 'kernel_multiplier' in params:
            n_hid = 256 * params['kernel_multiplier']
        else:
            n_hid = 256
        layer_heads[target['name']] = L.batch_norm(
            L.DenseLayer(layer_heads[target['name']],
                         n_hid,
                         nonlinearity=nonlin))
        layer_heads[target['name']] = L.dropout(layer_heads[target['name']])
        print layer_heads[target['name']].output_shape

        layer_heads[target['name']] = L.DenseLayer(layer_heads[target['name']],
                                                   target['n_out'],
                                                   nonlinearity=nl.softmax)
        print target['name'], layer_heads[target['name']].output_shape

    return layer_heads