def conv_3d(inputs, filter_size, num_filters, layer_name, stride=1, is_train=True, add_batch_norm=False, add_reg=False, activation=tf.identity): """ Create a 3D convolution layer :param inputs: input array :param filter_size: size of the filter :param num_filters: number of filters (or output feature maps) :param layer_name: layer name :param stride: convolution filter stride :param add_batch_norm: boolean to use batch norm (or not) :param is_train: boolean to differentiate train and test (useful when applying batch normalization) :param add_reg: boolean to add norm-2 regularization (or not) :param activation: type of activation to be applied :return: The output array """ num_in_channel = get_num_channels(inputs) with tf.variable_scope(layer_name): shape = [filter_size, filter_size, filter_size, num_in_channel, num_filters] weights = weight_variable(layer_name, shape=shape) tf.summary.histogram('W', weights) layer = tf.nn.conv3d(input=inputs, filter=weights, strides=[1, stride, stride, stride, 1], padding="SAME") # print('{}: {}'.format(layer_name, layer.get_shape())) if add_batch_norm: layer = batch_norm(layer, is_train) else: biases = bias_variable(layer_name, [num_filters]) layer += biases layer = activation(layer) if add_reg: tf.add_to_collection('weights', weights) return layer
def BN_Relu_conv_3d(inputs, filter_size, num_filters, layer_name, stride=1, is_train=True, add_batch_norm=True, use_relu=True, add_reg=False): """ Create a BN, ReLU, and 3D convolution layer :param inputs: input array :param filter_size: size of the filter :param num_filters: number of filters (or output feature maps) :param layer_name: layer name :param stride: convolution filter stride :param add_batch_norm: boolean to use batch norm (or not) :param is_train: boolean to differentiate train and test (useful when applying batch normalization) :param add_reg: boolean to add norm-2 regularization (or not) :param use_relu: :return: The output array """ num_in_channel = get_num_channels(inputs) with tf.variable_scope(layer_name): if add_batch_norm: inputs = batch_norm(inputs, is_train) if use_relu: inputs = tf.nn.relu(inputs) shape = [filter_size, filter_size, filter_size, num_in_channel, num_filters] weights = weight_variable(layer_name, shape=shape) layer = tf.nn.conv3d(input=inputs, filter=weights, strides=[1, stride, stride, stride, 1], padding="SAME") if add_reg: tf.add_to_collection('weights', weights) return layer
def main_denoising(wav_files, output_dir, verbose=False, **kwargs): """Perform speech enhancement for WAV files in ``wav_dir``. Parameters ---------- wav_files : list of str Paths to WAV files to enhance. output_dir : str Path to output directory for enhanced WAV files. verbose : bool, optional If True, print full stacktrace to STDERR for files with errors. kwargs Keyword arguments to pass to ``denoise_wav``. """ if not os.path.exists(output_dir): os.makedirs(output_dir) # Load global MVN statistics. global_mean_var = sio.loadmat(GLOBAL_MEAN_VAR_MATF) global_mean = global_mean_var['global_mean'] global_var = global_mean_var['global_var'] # Perform speech enhancement. for src_wav_file in wav_files: # Perform basic checks of input WAV. if not os.path.exists(src_wav_file): utils.error('File "%s" does not exist. Skipping.' % src_wav_file) continue if not utils.is_wav(src_wav_file): utils.error('File "%s" is not WAV. Skipping.' % src_wav_file) continue if utils.get_sr(src_wav_file) != SR: utils.error('Sample rate of file "%s" is not %d Hz. Skipping.' % (src_wav_file, SR)) continue if utils.get_num_channels(src_wav_file) != NUM_CHANNELS: utils.error('File "%s" is not monochannel. Skipping.' % src_wav_file) continue if utils.get_bitdepth(src_wav_file) != BITDEPTH: utils.error('Bitdepth of file "%s" is not %d. Skipping.' % (src_wav_file, BITDEPTH)) continue # Denoise. try: bn = os.path.basename(src_wav_file) dest_wav_file = os.path.join(output_dir, bn) denoise_wav(src_wav_file, dest_wav_file, global_mean, global_var, **kwargs) print('Finished processing file "%s".' % src_wav_file) except Exception as e: msg = 'Problem encountered while processing file "%s". Skipping.' % src_wav_file if verbose: msg = '%s Full error output:\n%s' % (msg, e) utils.error(msg) continue
def down_conv(self, x): num_out_channels = get_num_channels(x) * 2 x = conv_3d(inputs=x, filter_size=2, num_filters=num_out_channels, layer_name='conv_down', stride=2, batch_norm=self.conf.use_BN, is_train=self.is_training, activation=self.act_fcn) return x
def up_conv(self, x, out_shape): num_out_channels = get_num_channels(x) // 2 x = deconv_3d(inputs=x, filter_size=2, num_filters=num_out_channels, layer_name='conv_up', stride=2, batch_norm=self.conf.use_BN, is_train=self.is_training, out_shape=out_shape, activation=self.act_fcn) return x
def __init__(self, input_shape, num_actions, env, key=None, folder=None): # TODO sistemare signature di costruttore e init super(ConvPolicyNet, self).__init__(input_shape, num_actions, env, key, folder) self.conv = nn.Conv2d(in_channels=get_num_channels(), out_channels=128, kernel_size=2) o = conv_output_size(input_shape[1], 2, 0, 1) self.fc = nn.Linear(128 * o * o, num_actions) self.input_shape = input_shape self.optimizer = optim.Adam(self.parameters(), lr=10**-5)
def down_conv(self, x): num_out_channels = get_num_channels(x) x = BN_Relu_conv_2d(inputs=x, filter_size=1, num_filters=num_out_channels, layer_name='conv_down', stride=1, batch_norm=self.conf.use_BN, add_reg=self.conf.use_reg, is_train=self.is_training, use_relu=True) x = tf.nn.dropout(x, self.keep_prob) x = max_pool(x, self.conf.pool_filter_size, name='maxpool') return x
def conv_block_up(self, layer_input, fine_grained_features, num_convolutions): x = tf.concat((layer_input, fine_grained_features), axis=-1) n_channels = get_num_channels(layer_input) for i in range(num_convolutions): x = conv_3d(inputs=x, filter_size=self.k_size, num_filters=n_channels, layer_name='conv_' + str(i + 1), batch_norm=self.conf.use_BN, is_train=self.is_training) if i == num_convolutions - 1: x = x + layer_input x = self.act_fcn(x, name='prelu_' + str(i + 1)) x = tf.nn.dropout(x, self.keep_prob) return x
def __init__(self, input_shape, lr=1e-3, folder=None): super(ConvRewardNet, self).__init__(input_shape, lr, folder) self.input_shape = input_shape # simple net with: 2D convolutional layer -> activation layer -> fully connected layer self.conv = nn.Conv2d(in_channels=get_num_channels(), out_channels=64, kernel_size=2) o = conv_output_size(input_shape[1], 2, 0, 1) self.fc = nn.Linear(64 * o * o + 1, 1) self.step_weight = nn.Linear(1, 1) # regularization self.optimizer = optim.Adam(self.parameters(), lr=lr) self.lambda_abs_rewards = 10**-4 # penalty for rewards regularization
def conv_block_down(self, layer_input, num_convolutions): x = layer_input n_channels = get_num_channels(x) if n_channels == 1: n_channels = self.conf.start_channel_num for i in range(num_convolutions): x = conv_3d(inputs=x, filter_size=self.k_size, num_filters=n_channels, layer_name='conv_' + str(i + 1), batch_norm=self.conf.use_BN, is_train=self.is_training) if i == num_convolutions - 1: x = x + layer_input x = self.act_fcn(x, name='prelu_' + str(i + 1)) x = tf.nn.dropout(x, self.keep_prob) return x