コード例 #1
0
from userconfig import *
import util
from layers_custom import *

###################################################################################################################
# create Theano variables for input minibatch
input_var = T.tensor4('X')
# note that in general, the main data tensors will have these axes:
#   - minibatchsize
#   - numchannels (always 1 for us, since spectrograms)
#   - numfilts (or specbinnum for input)
#   - numtimebins

if example_is_audio:
	# load our example audio file as a specgram
	examplegram = util.standard_specgram((util.load_soundfile(examplewavpath, 0)))
	print("examplegram is of shape %s" % str(np.shape(examplegram)))

###################################################################################################################
# here we define our "semi-convolutional" autoencoder
# NOTE: lasagne assumes pooling is on the TRAILING axis of the tensor, so we always use time as the trailing axis

def make_custom_convlayer(network, in_num_chans, out_num_chans):
	"Applies our special padding and reshaping to do 1D convolution on 2D data"
	network = lasagne.layers.PadLayer(network, width=(featframe_len-1)/2, batch_ndim=3) # NOTE: the "batch_ndim" is used to stop batch dims being padded, but here ALSO to skip first data dim
	print("shape after pad layer: %s" % str(network.output_shape))
	network = lasagne.layers.Conv2DLayer(network, out_num_chans, (in_num_chans, featframe_len), stride=(1,1), pad=0, nonlinearity=very_leaky_rectify, W=lasagne.init.Orthogonal()) # we pad "manually" in order to do it in one dimension only
	filters = network.W
	network = lasagne.layers.ReshapeLayer(network, ([0], [2], [1], [3])) # reinterpret channels as rows
	print("shape after conv layer: %s" % str(network.output_shape))
	return network, filters
コード例 #2
0
import util
from layers_custom import *

###################################################################################################################
# create Theano variables for input minibatch
input_var = T.tensor4('X')
# note that in general, the main data tensors will have these axes:
#   - minibatchsize
#   - numchannels (always 1 for us, since spectrograms)
#   - numfilts (or specbinnum for input)
#   - numtimebins

if example_is_audio:
    # load our example audio file as a specgram
    foregroundgram = util.standard_specgram(
        (util.load_soundfile(foregroundwavpath, 0)))
    backgroundgram = util.standard_specgram(
        (util.load_soundfile(backgroundwavpath, 0)))
    print("foregroundgram is of shape %s" % str(np.shape(foregroundgram)))
    print("backgroundgram is of shape %s" % str(np.shape(backgroundgram)))

###################################################################################################################
# here we define our "semi-convolutional" autoencoder
# NOTE: lasagne assumes pooling is on the TRAILING axis of the tensor, so we always use time as the trailing axis


def make_custom_convlayer(network, in_num_chans, out_num_chans):
    "Applies our special padding and reshaping to do 1D convolution on 2D data"
    network = lasagne.layers.PadLayer(
        network, width=(featframe_len - 1) / 2, batch_ndim=3
    )  # NOTE: the "batch_ndim" is used to stop batch dims being padded, but here ALSO to skip first data dim
コード例 #3
0
from userconfig import *
import util
from layers_custom import *

###################################################################################################################
# create Theano variables for input minibatch
input_var = T.tensor4('X')
# note that in general, the main data tensors will have these axes:
#   - minibatchsize
#   - numchannels (always 1 for us, since spectrograms)
#   - numfilts (or specbinnum for input)
#   - numtimebins

if example_is_audio:
	# load our example audio file as a specgram
	foregroundgram = util.standard_specgram((util.load_soundfile(foregroundwavpath, 0)))
	backgroundgram = util.standard_specgram((util.load_soundfile(backgroundwavpath, 0)))
	print("foregroundgram is of shape %s" % str(np.shape(foregroundgram)))
	print("backgroundgram is of shape %s" % str(np.shape(backgroundgram)))


###################################################################################################################
# here we define our "semi-convolutional" autoencoder
# NOTE: lasagne assumes pooling is on the TRAILING axis of the tensor, so we always use time as the trailing axis

def make_custom_convlayer(network, in_num_chans, out_num_chans):
	"Applies our special padding and reshaping to do 1D convolution on 2D data"
	network = lasagne.layers.PadLayer(network, width=(featframe_len-1)/2, batch_ndim=3) # NOTE: the "batch_ndim" is used to stop batch dims being padded, but here ALSO to skip first data dim
	print("shape after pad layer: %s" % str(network.output_shape))
	network = lasagne.layers.Conv2DLayer(network, out_num_chans, (in_num_chans, featframe_len), stride=(1,1), pad=0, nonlinearity=very_leaky_rectify, W=lasagne.init.Orthogonal()) # we pad "manually" in order to do it in one dimension only
	filters = network.W