コード例 #1
0
    def prediction(self):
        # Focus network

        shape = tf.shape(self.X)

        channels = []

        for i in range(self.S):
            V = tf.gather(self.speaker_focus_vector,
                          self.I[:, i])  # [B, focus_dim]

            V = tf.tile(tf.reshape(V, [-1, 1, self.focus_dim]),
                        [1, tf.shape(self.X)[-2], 1])

            layers = [
                BLSTM(self.layer_size, 'BLSTM_' + str(i) + '_' + str(j))
                for j in range(self.nb_layers)
            ]

            layers_sp = [
                Conv1D([1, self.layer_size, self.embedding_size * self.F]),
                Reshape([self.B, shape[1], self.F, self.embedding_size]),
                Normalize(3)
            ]

            layers += layers_sp
            input_ = tf.concat([self.X, V], -1)

            channels.append(f_props(layers, input_))

        return channels
コード例 #2
0
    def prediction(self):
        # L41 network
        shape = tf.shape(self.X)

        self.true_masks = 1.0 + self.y

        X_in = tf.identity(self.X)

        layers = [
            BLSTM(self.layer_size, 'BLSTM_' + str(i))
            for i in range(self.nb_layers)
        ]

        layers_sp = [
            Conv1D([1, self.layer_size, self.embedding_size * self.F]),
            Reshape([self.B, shape[1], self.F, self.embedding_size])
        ]

        if self.normalize:
            layers_sp += [Normalize(3)]

        layers += layers_sp

        y = f_props(layers, X_in)

        return y
コード例 #3
0
    def prediction(self):
        # L41 network
        self.true_masks = 1.0 + self.y

        X_in = tf.identity(self.X)
        if self.abs_input:
            X_in = tf.abs(X_in)

        if self.normalize_input == '01':
            self.min_ = tf.reduce_min(X_in, axis=[1, 2], keep_dims=True)
            self.max_ = tf.reduce_max(X_in, axis=[1, 2], keep_dims=True)
            X_in = (X_in - self.min_) / (self.max_ - self.min_)
        elif self.normalize_input == 'meanstd':
            mean, var = tf.nn.moments(X_in, axes=[1, 2], keep_dims=True)
            X_in = (X_in - mean) / tf.sqrt(var)

        f = 128
        X_in = tf.expand_dims(X_in, 3)
        X_in = tf.reshape(X_in, [-1, 80, 256, 1])
        y = tf.contrib.layers.conv2d(X_in, f, [1, 7], rate=[1, 1])
        y = tf.contrib.layers.conv2d(y, f, [7, 1], rate=[1, 1])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[4, 1])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[8, 1])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[16, 1])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[32, 1])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[1, 1])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[2, 2])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[4, 4])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[8, 8])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[16, 16])
        y = tf.contrib.layers.conv2d(y, f, [5, 5], rate=[32, 32])
        y = tf.contrib.layers.conv2d(y, 4, [5, 5], rate=[1, 1])

        y = tf.reshape(y, [self.B, 80, 256 * 4])

        y = BLSTM(400, 'BLSTM_1').f_prop(y)
        y = BLSTM(400, 'BLSTM_2').f_prop(y)
        y = BLSTM(400, 'BLSTM_3').f_prop(y)
        y = Conv1D([1, 400, self.embedding_size * self.F]).f_prop(y)
        y = tf.reshape(y, [self.B, 80, self.F, self.embedding_size])

        if self.normalize:
            y += tf.nn.l2_normalize(y)

        return y
コード例 #4
0
    def enhance(self):
        # [B, S, T, F]
        separated = tf.reshape(self.separate, [self.B, self.S, -1, self.F])

        # X [B, T, F]
        # Tiling the input S time - like [ a, b, c] -> [ a, a, b, b, c, c], not [a, b, c, a, b, c]
        X_in = tf.expand_dims(self.X_input, 1)
        X_in = tf.tile(X_in, [1, self.S, 1, 1])
        X_in = tf.reshape(X_in, [self.B, self.S, -1, self.F])

        # Concat the separated input and the actual tiled input
        sep_and_in = tf.concat([separated, X_in], axis=3)
        sep_and_in = tf.reshape(sep_and_in, [self.B * self.S, -1, 2 * self.F])

        if self.args['normalize_enhance']:
            mean, var = tf.nn.moments(sep_and_in, axes=[1, 2], keep_dims=True)
            sep_and_in = (sep_and_in - mean) / tf.sqrt(var)

        layers = [
            BLSTM(self.args['layer_size_enhance'],
                  drop_val=self.args["recurrent_dropout_enhance"],
                  name='BLSTM_' + str(i))
            for i in range(self.args['nb_layers_enhance'])
        ]

        layers += [Conv1D([1, self.args['layer_size_enhance'], self.F])]

        y = f_props(layers, sep_and_in)

        y = tf.reshape(y, [self.B, self.S, -1])  # [B, S, TF]

        tf.summary.image('mask/predicted/enhanced',
                         tf.reshape(y, [self.B * self.S, -1, self.F, 1]))
        y = tf.transpose(y, [0, 2, 1])  # [B, TF, S]

        if self.args['nonlinearity'] == 'softmax':
            y = tf.nn.softmax(y)
        elif self.args['nonlinearity'] == 'tanh':
            y = tf.nn.tanh(y)

        self.enhanced_masks = tf.identity(y, name='enhanced_masks')

        tf.summary.image(
            'mask/predicted/enhanced_soft',
            tf.reshape(tf.transpose(y, [0, 2, 1]),
                       [self.B * self.S, -1, self.F, 1]))
        y = y * tf.reshape(self.X_input, [
            self.B, -1, 1
        ])  # Apply enhanced filters # [B, TF, S] -> [BS, T, F, 1]

        self.cost_in = y

        y = tf.transpose(y, [0, 2, 1])
        self.separated = y

        return tf.reshape(y, [self.B * self.S, -1, self.F, 1])
コード例 #5
0
	def prediction(self):
		# DPCL network

		shape = tf.shape(self.X)

		layers = [BLSTM(self.layer_size, 'BLSTM_'+str(i)) for i in range(self.nb_layers)]

		layers_sp = [
			Conv1D([1, self.layer_size, self.embedding_size*self.F]),
			Reshape([self.B, shape[1], self.F, self.embedding_size]),
			Normalize(3)
		]

		layers += layers_sp

		y = f_props(layers, self.X)
		
		return y
コード例 #6
0
    def enhance(self):
        # [B, S, T, F]
        separated = tf.reshape(self.separate, [self.B, self.S, -1, self.F])
        min_ = tf.reduce_min(separated, axis=[1, 2], keep_dims=True)
        max_ = tf.reduce_max(separated, axis=[1, 2], keep_dims=True)
        separated = (separated - min_) / (max_ - min_)

        # X [B, T, F]
        # Tiling the input S time - like [ a, b, c] -> [ a, a, b, b, c, c], not [a, b, c, a, b, c]
        X_in = tf.expand_dims(self.X, 1)
        X_in = tf.tile(X_in, [1, self.S, 1, 1])
        X_in = tf.reshape(X_in, [self.B, self.S, -1, self.F])

        # Concat the binary separated input and the actual tiled input
        sep_and_in = tf.concat([separated, X_in], axis=3)
        sep_and_in = tf.reshape(sep_and_in, [self.B * self.S, -1, 2 * self.F])

        layers = [
            BLSTM(self.args['layer_size_enhance'], 'BLSTM_' + str(i))
            for i in range(self.args['nb_layers_enhance'])
        ]

        y = f_props(layers, sep_and_in)
        y = tf.layers.dense(y, self.F)

        y = tf.reshape(y, [self.B, self.S, -1])  # [B, S, TF]

        y = tf.transpose(y, [0, 2, 1])  # [B, TF, S]
        if self.args['nonlinearity'] == 'softmax':
            y = tf.nn.softmax(y) * tf.reshape(self.X, [
                self.B, -1, 1
            ])  # Apply enhanced filters # [B, TF, S] -> [BS, T, F, 1]
        elif self.args['nonlinearity'] == 'tanh':
            y = tf.nn.tanh(y) * tf.reshape(self.X, [
                self.B, -1, 1
            ])  # Apply enhanced filters # [B, TF, S] -> [BS, T, F, 1]

        # y = y * tf.reshape(self.X, [self.B, -1, 1]) # Apply enhanced filters # [B, TF, S] -> [BS, T, F, 1]
        self.cost_in = y

        y = tf.transpose(y, [0, 2, 1])
        return tf.reshape(y, [self.B * self.S, -1, self.F, 1])
コード例 #7
0
    def prediction(self):
        # L41 network
        shape = tf.shape(self.X)

        self.true_masks = 1.0 + self.y

        X_in = tf.identity(self.X)

        layers = [
            BLSTM(self.layer_size,
                  name='BLSTM_' + str(i),
                  drop_val=self.rdropout) for i in range(self.nb_layers)
        ]

        layers_sp = [
            Conv1D([1, self.layer_size, self.embedding_size * self.F]),
            Reshape([self.B, shape[1], self.F, self.embedding_size]),
        ]

        layers += layers_sp

        y = f_props(layers, X_in)

        return y
コード例 #8
0
    def prediction(self):
        # L41 network
        shape = tf.shape(self.X)

        self.true_masks = 1.0 + self.y

        X_in = tf.identity(self.X)
        if self.abs_input:
            X_in = tf.abs(X_in)

        if self.normalize_input == '01':
            self.min_ = tf.reduce_min(X_in, axis=[1, 2], keep_dims=True)
            self.max_ = tf.reduce_max(X_in, axis=[1, 2], keep_dims=True)
            X_in = (X_in - self.min_) / (self.max_ - self.min_)
        elif self.normalize_input == 'meanstd':
            mean, var = tf.nn.moments(X_in, axes=[1, 2], keep_dims=True)
            X_in = (X_in - mean) / tf.sqrt(var)

        layers = [
            BLSTM(self.layer_size, 'BLSTM_' + str(i))
            for i in range(self.nb_layers)
        ]

        layers_sp = [
            Conv1D([1, self.layer_size, self.embedding_size * self.F]),
            Reshape([self.B, shape[1], self.F, self.embedding_size])
        ]

        if self.normalize:
            layers_sp += [Normalize(3)]

        layers += layers_sp

        y = f_props(layers, X_in)

        return y