def generator(options): """ A function that defines the generator based on the specified options. """ skips = [] num_layers = len(options['generator_encoder_num_kernels']) audio_shape = (options['window_length'], options['feat_dim']) generator_encoder_num_kernels = options['generator_encoder_num_kernels'] generator_decoder_num_kernels = options['generator_decoder_num_kernels'] filter_length = options['filter_length'] strides = options['strides'] padding = options['padding'] use_bias = options['use_bias'] std_dev = options['initializer_std_dev'] show_summary = options['show_summary'] z_in_use = options['z_in_use'] ## Define the encoder encoder_in = Input(shape=audio_shape) encoder_out = encoder_in for layer_i, num_kernels in enumerate(generator_encoder_num_kernels): # Add convolution layer encoder_out = Conv1D( num_kernels, filter_length, strides=strides, padding=padding, use_bias=use_bias, kernel_initializer=tf.truncated_normal_initializer( stddev=std_dev))(encoder_out) # Add skip connections if layer_i < num_layers - 1: skips.append(encoder_out) # Apply PReLU encoder_out = PReLU(alpha_initializer='zeros', weights=None)(encoder_out) ## Define the intermediate noise layer z z_dim = options['z_dim'] # z = Input(shape=z_dim, name='noise_input') z = Input(shape=z_dim) ## Define the decoder if z_in_use: decoder_out = keras.layers.concatenate([encoder_out, z]) else: decoder_out = encoder_out # Shape variables updated through the loop n_rows = z_dim[0] n_cols = decoder_out.get_shape().as_list()[-1] for layer_i, num_kernels in enumerate(generator_decoder_num_kernels): shape_in = decoder_out.get_shape().as_list() # Need to transform the data to be in 3D, as conv2dtranspose need 3D input new_shape = (shape_in[1], 1, shape_in[2]) decoder_out = Reshape(new_shape)(decoder_out) decoder_out = Conv2DTranspose( num_kernels, [filter_length, 1], strides=[strides, 1], padding=padding, use_bias=use_bias, kernel_initializer=tf.truncated_normal_initializer( stddev=std_dev))(decoder_out) # Reshape back to 2D n_rows = strides * n_rows n_cols = num_kernels decoder_out.set_shape([None, n_rows, 1, n_cols]) new_shape = (n_rows, n_cols) if layer_i == (num_layers - 1): decoder_out = Reshape(new_shape)(decoder_out) else: decoder_out = Reshape(new_shape)(decoder_out) if layer_i < num_layers - 1: # Apply PReLU decoder_out = PReLU(alpha_initializer='zeros', weights=None)(decoder_out) # Add skip connections skips_dec = skips[-(layer_i + 1)] decoder_out = keras.layers.concatenate([decoder_out, skips_dec]) ## Create the model graph if z_in_use: G = Model(inputs=[encoder_in, z], outputs=decoder_out) else: G = Model(inputs=[encoder_in], outputs=decoder_out) if show_summary: G.summary() return G
def generator(opts): kwidth = opts['filterlength'] strides = opts['strides'] pool = strides g_enc_numkernels = opts['g_enc_numkernels'] g_dec_numkernels = opts['g_dec_numkernels'] window_length = opts['window_length'] featdim = opts['featdim'] batch_size = opts['batch_size'] if opts['GT_init_G']: gt = np.expand_dims(opts['gt'], axis=1) num_gt_filters = gt.shape[2] gt_filterlength = gt.shape[0] gt_bias = np.zeros((num_gt_filters, )) if opts['preemph_G']: preemph_init = np.array(opts['preemph_init']).T preemph_init = np.expand_dims(preemph_init, axis=1) use_bias = True skips = [] #kernel_init = keras.initializers.TruncatedNormal(stddev=0.02) kernel_init = 'glorot_uniform' wav_in = Input(shape=(window_length, featdim)) if opts['preemph_G']: enc_out = Conv1D(1, 2, kernel_initializer=kernel_init, strides=opts['preemph_stride'], padding="same", use_bias=False, name="G_preemphlayer")(wav_in) else: enc_out = wav_in # Defining the Encoder for layernum, numkernels in enumerate(g_enc_numkernels): if layernum == 0 and opts['GT_init_G']: enc_out = Conv1D(num_gt_filters, gt_filterlength, kernel_initializer=kernel_init, strides=pool, padding="same", use_bias=use_bias, name="G_gtlayer")(enc_out) else: enc_out = Conv1D(numkernels, kwidth, strides=pool, kernel_initializer=kernel_init, padding="same", use_bias=use_bias)(enc_out) # for skip connections if layernum < len(g_enc_numkernels) - 1: skips.append(enc_out) if opts['applyprelu']: enc_out = PReLU(alpha_initializer='zero', weights=None)(enc_out) else: enc_out = LeakyReLU(alpha=opts['leakyrelualpha'])(enc_out) num_enc_layers = len(g_enc_numkernels) z_rows = int(window_length / (pool**num_enc_layers)) z_cols = g_enc_numkernels[-1] # Adding the intermediate noise layer if not opts['z_off']: z = Input(shape=(z_rows, z_cols), name='noise_input') dec_out = keras.layers.concatenate([enc_out, z]) else: dec_out = enc_out # Now to the decoder part nrows = z_rows ncols = dec_out.get_shape().as_list()[-1] for declayernum, decnumkernels in enumerate(g_dec_numkernels): # reshape for the conv2dtranspose layer as it needs 3D input indim = dec_out.get_shape().as_list() newshape = (indim[1], 1, indim[2]) dec_out = Reshape(newshape)(dec_out) # add the conv2dtranspose layer dec_out = Conv2DTranspose(decnumkernels, [kwidth, 1], strides=[strides, 1], kernel_initializer=kernel_init, padding="same", use_bias=use_bias)(dec_out) # Reshape back to 2D nrows *= strides # number of rows get multiplied by strides ncols = decnumkernels # number of cols is the same as number of kernels dec_out.set_shape( [None, nrows, 1, ncols]) # for correcting shape issue with conv2dtranspose newshape = (nrows, ncols) if declayernum == len(g_dec_numkernels) - 1: dec_out = Reshape(newshape, name="g_output")( dec_out) # name the final output as g_output else: dec_out = Reshape(newshape)(dec_out) # add skip and prelu until the second-last layer if declayernum < len(g_dec_numkernels) - 1: if opts['applyprelu']: dec_out = PReLU(alpha_initializer='zero', weights=None)(dec_out) else: dec_out = LeakyReLU(alpha=opts['leakyrelualpha'])(dec_out) # Now add the skip connection skip_ = skips[-(declayernum + 1)] dec_out = keras.layers.concatenate([dec_out, skip_]) # Add tanh of G uses tanh activation if opts['Gtanh']: dec_out = Activation('tanh')(dec_out) # Create the model graph if opts['z_off']: G = Model(inputs=[wav_in], outputs=[dec_out]) else: G = Model(inputs=[wav_in, z], outputs=[dec_out]) # add GT initilization if opts['GT_init_G']: G.get_layer("G_gtlayer").set_weights([gt, gt_bias]) # set it trainable or not if opts['gt_fixed']: G.get_layer("G_gtlayer").trainable = False # add preemph initialization if opts['preemph_G']: G.get_layer("G_preemphlayer").set_weights([preemph_init]) if opts['show_summary']: G.summary() return G