Exemple #1
0
    def build_img_G(self):
        audio_embeddings = Input(batch_shape=(self.batch_size, self.audio_emb_dim))
        audio_noise = Input(batch_shape=(self.batch_size, self.noise_dim))

        #audio_label_embedding = ()(Embedding(self.classes, self.audio_emb_dim)(audio_labels))

        audio_input = Concatenate()([audio_embeddings, audio_noise])
        x = Dense(256)(audio_input)
        x = LeakyReLU(alpha=0.2)(x)
        x = BatchNormalization(momentum=0.8)(x)
        x = Dense(512)(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = BatchNormalization(momentum=0.8)(x)
        x = Dense(1024)(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = BatchNormalization(momentum=0.8)(x)

        x = Dense(np.prod(self.img_shape), activation='sigmoid')(x)
        img = Reshape(self.img_shape, name='generated_img')(x)

        encoded_audio = Encoding_layer()(img)

        img_G = Model(inputs=[audio_embeddings, audio_noise], outputs=encoded_audio)

        return img_G
Exemple #2
0
    def build_img_G_conv_full(self):
       
        audio_embeddings = Input(batch_shape=(self.batch_size, self.audio_emb_dim))
        audio_noise      = Input(batch_shape=(self.batch_size, self.noise_dim))

        audio_input = Concatenate()([audio_embeddings, audio_noise])
        x = Dense(128*7*7, activation='relu')(audio_input)
        x = Reshape((7,7,128))(x)
       
        x = UpSampling2D()(x) 
        x = Conv2D(128, kernel_size=5, padding="same")(x)
        x = BatchNormalization(momentum=0.8)(x)
        x = Activation("relu")(x)
         
        x = UpSampling2D()(x) 
        x = Conv2D(64, kernel_size=5, padding="same")(x)
        x = BatchNormalization(momentum=0.8)(x)
        x = Activation("relu")(x) 
        
        #x = UpSampling2D()(x) 
        #x = Conv2D(32, kernel_size=5, padding="same")(x)
        #x = BatchNormalization(momentum=0.8)(x)
        #x = Activation("relu")(x)

        x = Conv2D(self.channels, kernel_size=3, padding="same")(x)
        img = Activation(activation='sigmoid', name='generated_img')(x)

        encoded_audio = Encoding_layer()(img)

        img_G = Model(inputs=[audio_embeddings, audio_noise], outputs=encoded_audio)
        
        return img_G
Exemple #3
0
    def build_audio_encoder(self):
        img_input = Input(batch_shape=(self.batch_size, self.img_rows, self.img_cols, self.channels),
                          name='img_input')

        output = Encoding_layer(name='vOICe')(img_input)

        audio_encoder = Model(inputs=img_input, outputs=output)

        return audio_encoder
    def build_audio_C(self):

        img_input = Input(batch_shape=(self.batch_size, self.img_rows, self.img_cols, self.channels),
                          name='img_input')

        x = Encoding_layer(name='vOICe')(img_input)
        spectro = logMelSpectrogram(name='logSpectrogram')(x)

        # Block 1
        x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu',  padding='same', name='conv1')(spectro)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(x)

        # Block 2
        x = Conv2D(128, (3, 3), strides=(1, 1), activation='relu',  padding='same', name='conv2')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(x)

        # Block 3
        x = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_1')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_2')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(x)

        # Block 4
        x = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_1')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_2')(x)
        #x = LeakyReLU(alpha=0.2)(x)
        x = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(x)


        x = Flatten(name='flatten_')(x)
        x = Dense(4096, activation='relu', name='fc1')(x)
        embeddings = Dense(self.audio_emb_dim, activation='relu', name='embeddings')(x)
        predicts = Dense(self.classes, activation='softmax', name='prediction')(embeddings)

        audio_model = Model(inputs=img_input, outputs=predicts)

        return audio_model