def input_block(net, config, melspec=False, verbose=True): """ """ # load scaler sclr = joblib.load(config.paths.preproc.scaler) net['input'] = L.InputLayer(shape=get_in_shape(config), name='input') sigma = theano.shared(np.array(0., dtype=np.float32), name='noise_controller') net['noise'] = L.GaussianNoiseLayer(net['input'], sigma=sigma, name='input_corruption') if config.hyper_parameters.input == "melspec": net['sclr'] = L.standardize(net['noise'], offset=sclr.mean_.astype(np.float32), scale=sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) else: net['stft'] = STFTLayer(L.ReshapeLayer(net['noise'], ([0], [1], [2], 1), name='reshape'), n_fft=config.hyper_parameters.n_fft, hop_size=config.hyper_parameters.hop_size) if melspec: net['melspec'] = MelSpecLayer( sr=config.hyper_parameters.sample_rate, n_fft=config.hyper_parameters.n_fft, n_mels=128, log_amplitude=True) net['sclr'] = L.standardize(net['melspec'], offset=sclr.mean_.astype(np.float32), scale=sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) else: net['sclr'] = L.standardize(net['stft'], offset=sclr.mean_.astype(np.float32), scale=sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) # only pooling freq domain net['stft.pl'] = L.MaxPool2DLayer(net['sclr'], pool_size=(2, 1), name='stft.pl') if verbose: print(net['input'].output_shape) # if melspec: # print(net['melspec'].output_shape) # else: # print(net['stft'].output_shape) # print(net['stft.pl'].output_shape) print(net['sclr'].output_shape) return net, sigma
def pons_cnn(params): """""" layers = L.InputLayer((None, 1, params['dur'], 128)) print layers.output_shape sclr = joblib.load(params['scaler']) layers = L.standardize(layers, sclr.mean_.astype(np.float32), sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) print layers.output_shape layers_timbre = L.GlobalPoolLayer( L.batch_norm(L.Conv2DLayer(layers, 64, (1, 96)))) layers_rhythm = L.GlobalPoolLayer( L.batch_norm(L.Conv2DLayer(layers, 64, (params['dur'] - 10, 1)))) layers = L.ConcatLayer([layers_rhythm, layers_timbre], axis=-1) layers = L.DenseLayer(layers, 64, nonlinearity=nl.rectify) print layers.output_shape layers = L.DenseLayer(layers, 16, nonlinearity=nl.softmax) print layers.output_shape return layers
def test_standardize(): # Simple example X = np.random.standard_normal((1000, 20)).astype(theano.config.floatX) l_in = InputLayer((None, 20)) l_std = standardize( l_in, X.min(axis=0), (X.max(axis=0) - X.min(axis=0)), shared_axes=0) out = get_output(l_std).eval({l_in.input_var: X}) assert np.allclose(out.max(axis=0), 1.) assert np.allclose(out.min(axis=0), 0.) # More complicated example X = np.random.standard_normal( (50, 3, 100, 10)).astype(theano.config.floatX) mean = X.mean(axis=(0, 2)) std = X.std(axis=(0, 2)) l_in = InputLayer((None, 3, None, 10)) l_std = standardize(l_in, mean, std, shared_axes=(0, 2)) out = get_output(l_std).eval({l_in.input_var: X}) assert np.allclose(out.mean(axis=(0, 2)), 0., atol=1e-5) assert np.allclose(out.std((0, 2)), 1., atol=1e-5)
def test_standardize(): # Simple example X = np.random.standard_normal((1000, 20)).astype(theano.config.floatX) l_in = InputLayer((None, 20)) l_std = standardize(l_in, X.min(axis=0), (X.max(axis=0) - X.min(axis=0)), shared_axes=0) out = get_output(l_std).eval({l_in.input_var: X}) assert np.allclose(out.max(axis=0), 1.) assert np.allclose(out.min(axis=0), 0.) # More complicated example X = np.random.standard_normal( (50, 3, 100, 10)).astype(theano.config.floatX) mean = X.mean(axis=(0, 2)) std = X.std(axis=(0, 2)) l_in = InputLayer((None, 3, None, 10)) l_std = standardize(l_in, mean, std, shared_axes=(0, 2)) out = get_output(l_std).eval({l_in.input_var: X}) assert np.allclose(out.mean(axis=(0, 2)), 0., atol=1e-5) assert np.allclose(out.std((0, 2)), 1., atol=1e-5)
def shallow_cnn_2d_vanilla(params): """""" layers = L.InputLayer((None, 1, params['dur'], 128)) print layers.output_shape sclr = joblib.load(params['scaler']) layers = L.standardize(layers, sclr.mean_.astype(np.float32), sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) print layers.output_shape n_filter = [8, 16, 16, 32] # l filter_sz = [(5, 5), (5, 5), (1, 1), (5, 5)] # m pool_sz = [(3, 3), (3, 3), None, (3, 3)] # n pool_strd = [None, None, None, None] # s batch_norm = [False, False, False, False] # b conv_spec = zip(n_filter, filter_sz, pool_sz, pool_strd, batch_norm) for l, m, n, s, b in conv_spec: if b: layers = L.batch_norm( L.Conv2DLayer(layers, l, m, nonlinearity=nl.rectify)) else: layers = L.Conv2DLayer(layers, l, m, nonlinearity=nl.rectify) if n is not None: layers = L.MaxPool2DLayer(layers, pool_size=n, stride=s) print layers.output_shape layers = L.DenseLayer(layers, 64, nonlinearity=nl.rectify) print layers.output_shape layers = L.DenseLayer(layers, 16, nonlinearity=nl.softmax) print layers.output_shape return layers
def define_net(): net = {} print("Generator layer shapes:") net['input'] = ll.InputLayer(shape=(None, 3, IMAGE_SHAPE[0], IMAGE_SHAPE[1])) leaky_relu = lasagne.nonlinearities.LeakyRectify(0.2) # net['stand'] = ll.standardize(net['input'], offset=np.array([0, 0, 0], dtype='float32'), # scale=np.array([128.0, 128.0, 128.0], dtype='float32')) net['conv_1'] = Conv2DLayer(net['input'], num_filters=64, stride=(2, 2), filter_size=(4, 4), nonlinearity=leaky_relu) print(lasagne.layers.get_output_shape(net['conv_1'])) net['conv_2'] = batch_norm( Conv2DLayer(net['conv_1'], num_filters=128, stride=(2, 2), filter_size=(4, 4), nonlinearity=leaky_relu)) print(lasagne.layers.get_output_shape(net['conv_2'])) net['conv_3'] = batch_norm( Conv2DLayer(net['conv_2'], num_filters=256, stride=(2, 2), filter_size=(4, 4), nonlinearity=leaky_relu)) print(lasagne.layers.get_output_shape(net['conv_3'])) net['conv_4'] = batch_norm( Conv2DLayer(net['conv_3'], num_filters=512, stride=(2, 2), filter_size=(4, 4), nonlinearity=leaky_relu)) print(lasagne.layers.get_output_shape(net['conv_4'])) net['conv_5'] = batch_norm( Conv2DLayer(net['conv_4'], num_filters=512, stride=(2, 2), filter_size=(4, 4), nonlinearity=leaky_relu)) print(lasagne.layers.get_output_shape(net['conv_5'])) net['conv_6'] = batch_norm( Conv2DLayer(net['conv_5'], num_filters=512, stride=(2, 2), filter_size=(4, 4), nonlinearity=leaky_relu)) print(lasagne.layers.get_output_shape(net['conv_6'])) net['conv_6'] = DropoutLayer(net['conv_6']) net['unconv_1'] = ll.batch_norm( ll.TransposedConv2DLayer(net['conv_6'], num_filters=512, stride=(2, 2), filter_size=(4, 4))) print(lasagne.layers.get_output_shape(net['unconv_1'])) concat = DropoutLayer( ll.ConcatLayer([net['unconv_1'], net['conv_5']], axis=1)) net['unconv_2'] = (ll.batch_norm( ll.TransposedConv2DLayer(concat, num_filters=512, stride=(2, 2), filter_size=(4, 4)))) print(lasagne.layers.get_output_shape(net['unconv_2'])) concat = DropoutLayer( ll.ConcatLayer([net['unconv_2'], net['conv_4']], axis=1)) net['unconv_3'] = ll.batch_norm( ll.TransposedConv2DLayer(concat, num_filters=256, stride=(2, 2), filter_size=(4, 4))) print(lasagne.layers.get_output_shape(net['unconv_3'])) concat = ll.ConcatLayer([net['unconv_3'], net['conv_3']], axis=1) net['unconv_4'] = ll.batch_norm( ll.TransposedConv2DLayer(concat, num_filters=128, stride=(2, 2), filter_size=(4, 4))) print(lasagne.layers.get_output_shape(net['unconv_4'])) concat = ll.ConcatLayer([net['unconv_4'], net['conv_2']], axis=1) net['unconv_5'] = ll.batch_norm( ll.TransposedConv2DLayer(concat, num_filters=64, stride=(2, 2), filter_size=(5, 5))) print(lasagne.layers.get_output_shape(net['unconv_5'])) concat = ll.ConcatLayer([net['unconv_5'], net['conv_1']], axis=1) net['unconv_6'] = ll.batch_norm( ll.TransposedConv2DLayer(concat, num_filters=32, stride=(2, 2), filter_size=(4, 4))) print(lasagne.layers.get_output_shape(net['unconv_6'])) net['pre_out'] = batch_norm( Conv2DLayer(net['unconv_6'], num_filters=3, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.tanh, pad='same')) print(lasagne.layers.get_output_shape(net['pre_out'])) net['out'] = ll.standardize(net['pre_out'], offset=np.array([0, 0, 0], dtype='float32'), scale=np.array( [1 / 128.0, 1 / 128.0, 1 / 128.0], dtype='float32')) print(lasagne.layers.get_output_shape(net['out'])) return net
def deep_cnn_2d(params): """""" nonlin = nl.elu layers = L.InputLayer((None, 1, params['dur'], 128)) print layers.output_shape sclr = joblib.load(params['scaler']) layers = L.standardize(layers, sclr.mean_.astype(np.float32), sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) print layers.output_shape n_filter = [16, 32, 64, 64, 128, 256, 256] # l filter_sz = [(5, 5), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (1, 1)] # m if params['dur'] > 50: conv_strd = [(2, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)] # c pool_sz = [(2, 2), (2, 2), (2, 2), (2, 2), (2, 2), None, None] # n else: conv_strd = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)] # c pool_sz = [(1, 2), (2, 2), (2, 2), (2, 2), (2, 2), None, None] # n pool_strd = [None, None, None, None, None, None, None] # s batch_norm = [False, True, False, True, False, False, False] # b dropout = [True, True, False, True, False, False, False] # d # added conv_spec = zip(n_filter, filter_sz, conv_strd, pool_sz, pool_strd, batch_norm, dropout) for l, m, c, n, s, b, d in conv_spec: if b: layers = L.batch_norm( L.Conv2DLayer(layers, l, m, stride=c, pad='same', nonlinearity=nonlin), ) else: layers = L.Conv2DLayer(layers, l, m, stride=c, pad='same', nonlinearity=nonlin) if n is not None: layers = L.MaxPool2DLayer(layers, pool_size=n, stride=s) if d: layers = L.dropout(layers, p=0.1) print layers.output_shape layers = L.batch_norm(L.GlobalPoolLayer(layers)) layers = L.dropout(layers) # added print layers.output_shape layers = L.batch_norm(L.DenseLayer(layers, 256, nonlinearity=nonlin)) layers = L.dropout(layers) print layers.output_shape layers = L.DenseLayer(layers, 16, nonlinearity=nl.softmax) print layers.output_shape return layers
def deep_cnn_2d_mtl_at_2(params): """""" assert 'targets' in params nonlin = nl.elu layers = L.InputLayer((None, 1, params['dur'], 128)) print layers.output_shape sclr = joblib.load(params['scaler']) layers = L.standardize(layers, sclr.mean_.astype(np.float32), sclr.scale_.astype(np.float32), shared_axes=(0, 1, 2)) print layers.output_shape n_filter = [32, 64, 64, 128, 256, 256] # l if 'kernel_multiplier' in params: n_filter = map(lambda x: x * params['kernel_multiplier'], n_filter) filter_sz = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (1, 1)] # m if params['dur'] > 50: conv_strd = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)] # c pool_sz = [(2, 2), (2, 2), (2, 2), (2, 2), None, None] # n else: conv_strd = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)] # c pool_sz = [(2, 2), (2, 2), (2, 2), (2, 2), None, None] # n pool_strd = [None, None, None, None, None, None] # s batch_norm = [True, False, True, False, False, False] # b dropout = [True, False, True, False, False, False] # d # added conv_spec = zip(n_filter, filter_sz, conv_strd, pool_sz, pool_strd, batch_norm, dropout) # Shared first layer if 'kernel_multiplier' in params: n_1st_ker = 16 * params['kernel_multiplier'] else: n_1st_ker = 16 layers = L.Conv2DLayer(layers, n_1st_ker, (5, 5), stride=(1, 1), pad='same', nonlinearity=nonlin) layers = L.MaxPool2DLayer(layers, pool_size=(1, 2)) layers = L.dropout(layers, p=0.1) layer_heads = OrderedDict() for target in params['targets']: first_trgt_spec_layer = True # n_layer checker for l, m, c, n, s, b, d in conv_spec: if first_trgt_spec_layer: layer_heads[target['name']] = layers first_trgt_spec_layer = False if b: layer_heads[target['name']] = L.batch_norm( L.Conv2DLayer(layer_heads[target['name']], l, m, stride=c, pad='same', nonlinearity=nonlin), ) else: layer_heads[target['name']] = L.Conv2DLayer( layer_heads[target['name']], l, m, stride=c, pad='same', nonlinearity=nonlin) if n is not None: layer_heads[target['name']] = L.MaxPool2DLayer( layer_heads[target['name']], pool_size=n, stride=s) if d: layer_heads[target['name']] = L.dropout( layer_heads[target['name']], p=0.1) print layer_heads[target['name']].output_shape layer_heads[target['name']] = L.batch_norm( L.GlobalPoolLayer(layer_heads[target['name']])) layer_heads[target['name']] = L.dropout( layer_heads[target['name']]) # added print layer_heads[target['name']].output_shape if 'kernel_multiplier' in params: n_hid = 256 * params['kernel_multiplier'] else: n_hid = 256 layer_heads[target['name']] = L.batch_norm( L.DenseLayer(layer_heads[target['name']], n_hid, nonlinearity=nonlin)) layer_heads[target['name']] = L.dropout(layer_heads[target['name']]) print layer_heads[target['name']].output_shape layer_heads[target['name']] = L.DenseLayer(layer_heads[target['name']], target['n_out'], nonlinearity=nl.softmax) print target['name'], layer_heads[target['name']].output_shape return layer_heads