def create_model(self):
            # The adapt method, when called on the training data, calculates mean and standard deviation
            norm_layer = preprocessing.Normalization()
            norm_layer.adapt(
                self.network_instance.train_data.map(lambda x, _: x))

            # Add all the layers
            input_ = layers.Input(
                shape=self.network_instance.input_shape)  # (124, 129, 1)
            resize = preprocessing.Resizing(32, 32)(input_)
            norm_layer = norm_layer(resize)
            conv1 = layers.Conv2D(32, 3, activation='relu')(norm_layer)
            pooling_1 = layers.MaxPooling2D()(conv1)
            conv2 = layers.Conv2D(64, 3, activation='relu')(pooling_1)
            pooling_2 = layers.MaxPooling2D()(conv2)
            dropout_1 = layers.Dropout(0.25)(pooling_2)
            flatten = layers.Flatten()(dropout_1)
            dense_1 = layers.Dense(128, activation='relu')(flatten)
            dropout_2 = layers.Dropout(0.5)(dense_1)
            output_ = layers.Dense(len(
                self.network_instance.classes))(dropout_2)

            # build the model
            self.model = tf.keras.Model(inputs=input_, outputs=output_)
            self.model.summary()
    def __call__(self, inputs, **kwarg):
        if 'num_of_classes' in kwarg.keys():
            number_of_classes = kwarg['num_of_classes']
        if 'input_shape' in kwarg.keys():
            input_shape = kwarg['input_shape']

        spectrogram = tf.signal.stft(inputs, frame_length=255, frame_step=128)
        spectrogram = tf.abs(spectrogram)
        spectrogram = tf.expand_dims(spectrogram, -1)
        # spectrogram = np.asarray(spectrogram)
        spectrogram = tf.stack(spectrogram)
        print("\n******************************\n")
        print(spectrogram.shape, spectrogram)
        print("\n******************************\n")

        # input_vec = tf.keras.Input(shape=(spectrogram.shape[1], spectrogram.shape[2], 1))
        # x = preprocessing.Resizing(64, 64)(input_vec)
        x = preprocessing.Resizing(32, 32)(spectrogram)
        x = preprocessing.Normalization()(x)
        # x = preprocessing.Normalization()(x)
        # x = layers.Conv2D(32, 3, activation='relu')(input_vec)
        x = layers.Conv2D(32, 3, activation='relu')(x)
        # x = layers.Conv2D(32, 3, activation='relu')(x)
        # x = layers.MaxPooling2D()(x)
        # x = layers.Conv2D(64, 3, activation='relu')(x)
        x = layers.Conv2D(64, 3, activation='relu')(x)
        x = layers.MaxPooling2D()(x)
        x = layers.Dropout(0.25)(x)
        x = layers.Flatten()(x)
        x = layers.Dense(128, activation='relu')(x)
        x = layers.Dropout(0.5)(x)
        answer = layers.Dense(number_of_classes, activation='softmax')(x)

        return answer
def return_resizing_data(data):

    data = np.expand_dims(data, axis=-1)
    result = preprocessing.Resizing(32, 32)(data)
    result = np.squeeze(result, axis=-1)

    return result
def MinimumCNN(spectrogram_ds):
    for spec in spectrogram_ds.take(1):
        input_shape = spec.shape
    print('Input shape:', input_shape)
    num_labels = 2

    norm_layer = preprocessing.Normalization()
    norm_layer.adapt(spectrogram_ds.map(lambda x: x))

    model = models.Sequential([
        layers.Input(shape=input_shape),
        preprocessing.Resizing(32, 32),
        norm_layer,
        layers.Conv2D(32, 3, activation='relu'),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels),
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'])
    return model
Example #5
0
def build_cnn(resizing=(32, 32),
              conv2d1=32,
              conv2d2=64,
              dropout1=0.25,
              dropout2=0.5,
              dense=128,
              learning_rate=0.0001):
    from tensorflow import keras
    from tensorflow.keras.layers.experimental import preprocessing

    model = keras.Sequential()

    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
    outputs = len(set(y_train))

    model.add(keras.layers.Input(shape=input_shape))
    model.add(preprocessing.Resizing(resizing[0], resizing[1]))
    model.add(keras.layers.Conv2D(conv2d1, 3, activation='relu'))
    model.add(keras.layers.Conv2D(conv2d2, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D())
    model.add(keras.layers.Dropout(dropout1))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(dense, activation='relu'))
    model.add(keras.layers.Dropout(dropout2))
    model.add(keras.layers.Dense(outputs, activation='softmax'))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model
Example #6
0
    def __call__(self, inputs, **kwarg):

        x0 = tf.signal.stft(inputs, frame_length=255, frame_step=128)

        x0 = tf.abs(x0)

        x0 = tf.expand_dims(x0, -1)

        x0 = preprocessing.Resizing(32, 32)(x0)
        x0 = layers.BatchNormalization()(x0)

        x0 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x0)
        x0 = layers.MaxPooling2D((2, 2), padding='same')(x0)
        x0 = layers.BatchNormalization()(x0)

        x0 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x0)
        x0 = layers.MaxPooling2D((2, 2), padding='same')(x0)
        x0 = layers.BatchNormalization()(x0)

        x0 = layers.Conv2D(192, (3, 3), padding='same', activation='relu')(x0)
        x0 = layers.MaxPooling2D((2, 2), padding='same')(x0)
        x0 = layers.BatchNormalization()(x0)

        x0 = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x0)
        x0 = layers.MaxPooling2D((2, 2), padding='same')(x0)
        result = layers.BatchNormalization()(x0)

        return result
    def call(self, inputs, **kwargs):
        height = tf.cast(tf.shape(inputs)[1], tf.float32)
        width = tf.cast(tf.shape(inputs)[2], tf.float32)

        if self.min_side is not None and self.max_side is not None:
            cur_min_side = tf.minimum(width, height)
            min_side = tf.cast(self.min_side, tf.float32)
            cur_max_side = tf.maximum(width, height)
            max_side = tf.cast(self.max_side, tf.float32)
            scale = tf.minimum(max_side / cur_max_side,
                               min_side / cur_min_side)
        elif self.min_side is not None:
            cur_min_side = tf.minimum(width, height)
            min_side = tf.cast(self.min_side, tf.float32)
            scale = min_side / cur_min_side
        else:
            cur_max_side = tf.maximum(width, height)
            max_side = tf.cast(self.max_side, tf.float32)
            scale = max_side / cur_max_side

        new_height = tf.cast(height * scale, tf.int32)
        new_width = tf.cast(width * scale, tf.int32)

        resized = preprocessing.Resizing(
            height=new_height,
            width=new_width,
            interpolation=self.interpolation)(inputs)
        return resized
Example #8
0
def build_model(train_ds):
    """
    Build the ML model. Sets up the desired layers and 
    compiles the tf.keres model

    :param train_ds: The training dataset used for nomalisation and determining the input_shape

    """

    # Get the input shape for the model
    for spectrogram, _ in train_ds.take(1):
        input_shape = spectrogram.shape[1:]
    print(f'Input shape: {input_shape}')

    # Normalisation Layer
    norm_layer = preprocessing.Normalization()
    norm_layer.adapt(train_ds.take(30).map(lambda x, _: x))

    #Model layout
    model = models.Sequential([
        layers.InputLayer(input_shape=input_shape),
        preprocessing.Resizing(32, 32),
        norm_layer,
        layers.Conv2D(60, 3, activation='relu'),
        #layers.Conv2D(30, 3, activation='relu'),
        #layers.Conv2D(30, 3, activation='relu'),
        #layers.Conv2D(30, 3, activation='relu'),
        layers.Flatten(),
        layers.Dense(30, activation='relu'),
        #layers.Dropout(0.1),
        layers.Dense(2),
    ])
    return model
def build_cnn(documents, classes, flat=False):
    from tensorflow import keras
    from tensorflow.keras.layers.experimental import preprocessing

    model = keras.Sequential()

    input_shape = (documents.shape[1], documents.shape[2], documents.shape[3])
    outputs = len(set(classes))

    model.add(keras.layers.Input(shape=input_shape))
    model.add(preprocessing.Resizing(32, 32))
    model.add(keras.layers.Conv2D(32, 3, activation='relu'))
    model.add(keras.layers.Conv2D(64, 3, activation='relu'))
    model.add(keras.layers.MaxPooling2D())
    model.add(keras.layers.Dropout(0.25))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dense(outputs, activation='softmax'))

    optimizer = keras.optimizers.Adam(learning_rate=0.0001)

    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    return model
 def _get_data_augment_layer(self):
     data_augment_layer = tf.keras.models.Sequential([
         KerasPreprocessing.RandomFlip('horizontal'),
         KerasPreprocessing.RandomRotation(0.2),
         KerasPreprocessing.RandomZoom(0.2),
         KerasPreprocessing.RandomHeight(0.2),
         KerasPreprocessing.Resizing(height=self.IMAGE_DIM[0], width=self.IMAGE_DIM[1])
     ])
     
     return data_augment_layer
    def stft_function(self, input_x):

        x = tf.signal.stft(input_x, frame_length=255, frame_step=128)
        x = tf.abs(x)
        x = tf.expand_dims(x, -1)

        if self.resize_bool == True:
            x = preprocessing.Resizing(RESIZE_X, RESIZE_Y)(x)

        x = layers.BatchNormalization()(x)

        return x
Example #12
0
def define_model(trial, input_shape, ds_train, num_labels):
    n_layers = trial.suggest_int("n_layers", low=3, high=10, step=1)

    norm_layer = preprocessing.Normalization()
    norm_layer.adapt(ds_train.map(lambda x, _: x))

    model = models.Sequential()
    model.add(layers.Input(shape=input_shape))
    model.add(preprocessing.Resizing(32, 32))
    model.add(norm_layer)

    for i in range(n_layers):
        filter_size = trial.suggest_int("n_units_l{}".format(i),
                                        low=32,
                                        high=512)

        model.add(
            layers.Conv2D(filter_size, 3, activation='relu', padding='same'))

        dropout_value = trial.suggest_float("dropout_{}".format(i), 0.0, 0.5)
        model.add(layers.Dropout(dropout_value))

        # There's already a maxpooling layer after this loop, so don't add a double one.
        if i == (n_layers - 1):
            break

        add_max_pooling = trial.suggest_int("maxpool_{}".format(i), 0, 1)
        if add_max_pooling == 1:
            model.add(layers.MaxPooling2D(padding='same'))

    model.add(layers.MaxPooling2D(padding='same'))

    dropout_value = trial.suggest_float("dropout_beforelast", 0.1, 0.5)
    model.add(layers.Dropout(dropout_value))

    model.add(layers.Flatten())

    dense_size = trial.suggest_int("n_units_last", low=64, high=512)
    model.add(layers.Dense(dense_size, activation='relu'))

    dropout_value = trial.suggest_float("dropout_last", 0.1, 0.5)
    model.add(layers.Dropout(dropout_value))
    model.add(layers.Dense(num_labels, activation='relu'))

    model.summary()

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'],
    )
    return model
Example #13
0
    def build(self, hp):

        if self.input_tensor is not None:
            inputs = tf.keras.utils.get_source_inputs(self.input_tensor)
            x = self.input_tensor
        else:
            inputs = layers.Input(shape=self.input_shape)
            x = inputs

        if self.augmentation_model:
            if isinstance(self.augmentation_model, hypermodel.HyperModel):
                augmentation_model = self.augmentation_model.build(hp)
            elif isinstance(self.augmentation_model, keras.models.Model):
                augmentation_model = self.augmentation_model

            x = augmentation_model(x)

        # Select one of pre-trained EfficientNet as feature extractor
        version = hp.Choice("version", ["B{}".format(i) for i in range(8)],
                            default="B0")
        img_size = EFFICIENTNET_IMG_SIZE[version]

        x = preprocessing.Resizing(img_size,
                                   img_size,
                                   interpolation="bilinear")(x)
        efficientnet_model = EFFICIENTNET_MODELS[version](include_top=False,
                                                          input_tensor=x)

        # Rebuild top layers of the model.
        x = efficientnet_model.output

        pooling = hp.Choice("pooling", ["avg", "max"], default="avg")
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

        top_dropout_rate = hp.Float("top_dropout_rate",
                                    min_value=0.2,
                                    max_value=0.8,
                                    step=0.2,
                                    default=0.2)
        x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)

        x = layers.Dense(self.classes, activation="softmax", name="probs")(x)

        # compile
        model = keras.Model(inputs, x, name="EfficientNet")
        self._compile(model, hp)

        return model
Example #14
0
    def _build_model(self) -> tf.keras.Model:
        """
        Build the model for training.
        """
        # Split the dataset into train, test, validate components
        splits = split_train_test_validate(self.samples)

        # Preprocess dataset to generate FFTs
        train_ds, val_ds, test_ds = tuple(
            map(lambda files: preprocess_dataset(files, self.commands),
                splits))

        # Pre-calculate the input shape for entry into the model.
        self._input_shape = next(
            iter(map(lambda t: t[0].shape, train_ds.take(1))))

        # Batch and configure prefetching and caching for data reads.
        train_ds = train_ds.batch(self.batch_size)
        val_ds = val_ds.batch(self.batch_size)
        self._test_ds = test_ds
        self._train_ds = train_ds.cache().prefetch(AUTOTUNE)
        self._val_ds = val_ds.cache().prefetch(AUTOTUNE)

        num_labels = len(self.commands)
        norm_layer = preprocessing.Normalization()
        norm_layer.adapt(train_ds.map(lambda x, _: x))

        model = models.Sequential([
            layers.InputLayer(input_shape=self._input_shape),
            preprocessing.Resizing(32, 32),
            norm_layer,
            layers.Conv2D(32, 3, activation="relu"),
            layers.Conv2D(64, 3, activation="relu"),
            layers.MaxPooling2D(),
            layers.Dropout(0.25),
            layers.Flatten(),
            layers.Dense(128, activation="relu"),
            layers.Dropout(0.5),
            layers.Dense(num_labels),
        ])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True),
            metrics=["accuracy"],
        )

        return model
Example #15
0
    def basic_CNN(self, data_set, input_shape, num_labels):
        '''
        Pour le modèle, vous utiliserez un simple réseau de neurones convolutifs (CNN), puisque 
        vous avez transformé les fichiers audio en images de spectrogramme. Le modèle comporte 
        également les couches de prétraitement supplémentaires suivantes :
            
            Une couche de Resizing pour sous-échantillonner l'entrée afin de permettre au modèle de 
            s'entraîner plus rapidement.
            Une Normalization couche de normaliser chaque pixel de l'image en fonction de son écart 
            moyen et standard.
            
        Pour la couche de Normalization , sa méthode d' adapt devrait d'abord être appelée sur les données 
        d'apprentissage afin de calculer les statistiques agrégées (c'est-à-dire la moyenne et l'écart type).

        Parameters
        ----------
        data_set : tf.data.Dataset.from_tensor_slices
            data used for the model
        input_shape : Tensor
            Tensor indicating the shape of the input
        num_labels : int
            number of different labels

        Returns
        -------
        Keras Sequential Model

        '''

        norm_layer = preprocessing.Normalization()
        norm_layer.adapt(data_set.map(lambda x, _: x))

        model = models.Sequential([
            layers.Input(shape=input_shape),
            preprocessing.Resizing(32, 32),
            norm_layer,
            layers.Conv2D(32, 3, activation='relu'),
            layers.Conv2D(64, 3, activation='relu'),
            layers.MaxPooling2D(),
            layers.Dropout(0.25),
            layers.Flatten(),
            layers.Dense(128, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(num_labels),
        ])

        model.summary()

        return model
Example #16
0
def create_sequential_model(input_shape, norm_layer, num_labels):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        preprocessing.Resizing(32, 32),
        # norm_layer,					# Not sure if this is causing the issue
        layers.Conv2D(32, 3, activation='relu'),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        # layers.Flatten(),					# Reshape instead??
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels)
    ])

    return model
Example #17
0
def build_model(input_shape, spectrogram_ds, num_labels):
    # normalization: normalize incoming data based on original spectrogram dataset to build model
    def normalization(spectrogram_ds):
        norm_layer = preprocessing.Normalization()
        norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))
        return norm_layer

    # model: default model to process audio, taken from google guide
    model = models.Sequential([
        layers.Input(shape=input_shape),
        preprocessing.Resizing(32, 32),
        normalization(spectrogram_ds),
        layers.Conv2D(32, 3, activation='relu'),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels),
    ])
    return model
Example #18
0
def define_model(input_shape, ds_train, num_labels, layer_sizes: List[int],
                 dropouts: List[float]):
    norm_layer = preprocessing.Normalization()
    norm_layer.adapt(ds_train.map(lambda x, _: x))

    model = models.Sequential()
    model.add(layers.Input(shape=input_shape))
    model.add(preprocessing.Resizing(32, 32))
    model.add(norm_layer)

    for i in range(len(layer_sizes)):
        model.add(
            layers.Conv2D(layer_sizes[i],
                          kernel_size=3,
                          activation='relu',
                          padding='same'))
        model.add(layers.Dropout(dropouts[i]))

    model.add(layers.MaxPooling2D(padding='same'))

    model.add(layers.Flatten())

    model.add(layers.Dense(128, activation="relu")),

    model.add(layers.Dense(num_labels, activation="relu"))

    model.summary()

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9)

    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'])
    return model
Example #19
0
val_ds = val_ds.batch(batch_size)

train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)

for spectrogram, _ in spectrogram_ds.take(1):
    input_shape = spectrogram.shape
print('Input shape:', input_shape)
num_labels = len(commands)

norm_layer = preprocessing.Normalization()
norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))

model = models.Sequential([
    layers.Input(shape=input_shape),
    preprocessing.Resizing(32, 32),
    norm_layer,
    layers.Conv2D(32, 3, activation='relu'),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_labels),
])
model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
Example #20
0
    # result = Lambda(stft_func, arguments={'frame_size':255, 'delay_size':128})(one_data)
    train_data.append(result)
    print("\r{}th file is done...".format(i + 1), end='')

train_data = np.array(train_data)

# result = np.expand_dims(train_data, -1)
result = tf.expand_dims(train_data, -1)
print(result.shape)

input_data = tf.data.Dataset.from_tensor_slices(result)
print(input_data)

input_sig = keras.Input(shape=(504, 127, 1))

x = preprocessing.Resizing(32, 32)(input_sig)
print(x.shape)
x = preprocessing.Normalization()(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.25)(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
answer = layers.Dense(7, activation='softmax')(x)

model = keras.Model(inputs=input_sig, outputs=answer)

model.summary()
class DataLoader():

    resize = preprocessing.Resizing(32, 32)
    norm = preprocessing.Normalization()

    def __init__(self, n_training=6400, n_test=800) -> None:

        data_dir = pathlib.Path('data/mini_speech_commands')
        if not data_dir.exists():
            tf.keras.utils.get_file(
                'mini_speech_commands.zip',
                origin=
                "http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
                extract=True,
                cache_dir='.',
                cache_subdir='data')

        commands = np.array(tf.io.gfile.listdir(str(data_dir)))
        self.commands = commands[commands != 'README.md']
        print('Commands:', self.commands)

        filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
        filenames = tf.random.shuffle(filenames, seed=123)
        num_samples = len(filenames)
        print('Number of total examples:', num_samples)
        print('Example file tensor:', filenames[0])

        train_files = filenames[:6400]
        val_files = filenames[6400:6400 + 800]
        test_files = filenames[-800:]

        print('Training set size', len(train_files))
        print('Validation set size', len(val_files))
        print('Test set size', len(test_files))

        AUTOTUNE = tf.data.AUTOTUNE
        files_ds = tf.data.Dataset.from_tensor_slices(train_files)
        waveform_ds = files_ds.map(self.get_waveform_and_label,
                                   num_parallel_calls=AUTOTUNE)
        self.train_set = waveform_ds.map(
            self.get_spectrogram_and_label_id,
            num_parallel_calls=AUTOTUNE).take(n_training)

        files_ds = tf.data.Dataset.from_tensor_slices(val_files)
        waveform_ds = files_ds.map(self.get_waveform_and_label,
                                   num_parallel_calls=AUTOTUNE)
        self.val_set = waveform_ds.map(self.get_spectrogram_and_label_id,
                                       num_parallel_calls=AUTOTUNE)

        files_ds = tf.data.Dataset.from_tensor_slices(test_files)
        waveform_ds = files_ds.map(self.get_waveform_and_label,
                                   num_parallel_calls=AUTOTUNE)
        self.test_set = waveform_ds.map(
            self.get_spectrogram_and_label_id,
            num_parallel_calls=AUTOTUNE).take(n_test)

    @staticmethod
    def decode_audio(audio_binary):
        audio, _ = tf.audio.decode_wav(audio_binary)
        return tf.squeeze(audio, axis=-1)

    @staticmethod
    def get_label(file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        return parts[-2]

    @staticmethod
    def get_waveform_and_label(file_path):
        label = DataLoader.get_label(file_path)
        audio_binary = tf.io.read_file(file_path)
        waveform = DataLoader.decode_audio(audio_binary)
        return waveform, label

    @classmethod
    def get_spectrogram(cls, waveform):
        zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)
        equal_length = tf.concat([waveform, zero_padding], 0)

        spectrogram = tf.signal.stft(equal_length,
                                     frame_length=255,
                                     frame_step=128)
        spectrogram = tf.math.abs(spectrogram)

        spectrogram = tf.math.pow(spectrogram, 0.2)
        spectrogram = tf.expand_dims(spectrogram, -1)

        #spectrogram = DataLoader.resize(spectrogram)
        #spectrogram = DataLoader.norm(spectrogram)

        return spectrogram

    def get_spectrogram_and_label_id(self, audio, label):
        spectrogram = DataLoader.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        label_id = tf.argmax(label == self.commands)
        return spectrogram, label_id

    def visualize(self):
        for spectrogram, label_id in self.train_set.take(1):

            print('Label:', label_id)
            print('Spectrogram shape:', spectrogram.shape)

            fig, axes = plt.subplots(1, figsize=(12, 8))
            self.plot_spectrogram(spectrogram.numpy(), axes)
            axes.set_title('Spectrogram')
            plt.show()

    @staticmethod
    def plot_spectrogram(spectrogram, ax):
        # Convert to frequencies to log scale and transpose so that the time is
        # represented in the x-axis (columns).
        spectrogram = spectrogram[:, :, 0]

        log_spec = np.log(spectrogram.T)
        height = log_spec.shape[0]
        width = log_spec.shape[1]
        X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
        Y = range(height)
        ax.pcolormesh(X, Y, log_spec)
Example #22
0
    return np.array(train_data_np)


train_data_x = list()

for i, one_data in enumerate(train_data_1):

    result = stft_func(one_data)
    train_data_x.append(result)

    print("\r{}th file is done...".format(i + 1), end='')

train_label = np.array(train_data_x)
train_label = np.expand_dims(train_label, axis=-1)
train_label = preprocessing.Resizing(32, 32)(train_label)
# print(train_label)

train_label = train_label.numpy()

# train_data_1 = tf.data.Dataset.from_tensor_slices(train_data_1)
# train_label = tf.data.Dataset.from_tensor_slices(train_label)

train_data = tf.data.Dataset.from_tensor_slices(
    (train_data_1, train_label)).shuffle(5000).batch(4)

# print(train_data)

import time

input_sig = keras.Input(shape=(64000, ))
Example #23
0
def run_whole_thing(out_dir):
    os.makedirs(out_dir, exist_ok=True)

    # Set seed for experiment reproducibility
    seed = 55
    tf.random.set_seed(seed)
    np.random.seed(seed)

    data_dir = pathlib.Path("data/mini_speech_commands")

    if not data_dir.exists():
        # Get the files from external source and put them in an accessible directory
        tf.keras.utils.get_file(
            'mini_speech_commands.zip',
            origin=
            "http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
            extract=True)

    # Convert the binary audio file to a tensor
    def decode_audio(audio_binary):
        audio, _ = tf.audio.decode_wav(audio_binary)

        return tf.squeeze(audio, axis=-1)

    # Get the label (yes, no, up, down, etc) for an audio file.
    def get_label(file_path):
        parts = tf.strings.split(file_path, os.path.sep)

        # Note: You'll use indexing here instead of tuple unpacking to enable this to work in a TensorFlow graph.
        return parts[-2]

    # Create a tuple that has the labeled audio files
    def get_waveform_and_label(file_path):
        label = get_label(file_path)
        audio_binary = tf.io.read_file(file_path)
        waveform = decode_audio(audio_binary)

        return waveform, label

    # Convert audio files to images
    def get_spectrogram(waveform):
        # Padding for files with less than 16000 samples
        zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)

        # Concatenate audio with padding so that all audio clips will be of the
        # same length
        waveform = tf.cast(waveform, tf.float32)
        equal_length = tf.concat([waveform, zero_padding], 0)
        spectrogram = tf.signal.stft(equal_length,
                                     frame_length=255,
                                     frame_step=128)

        spectrogram = tf.abs(spectrogram)

        return spectrogram

    # Label the images created from the audio files and return a tuple
    def get_spectrogram_and_label_id(audio, label):
        spectrogram = get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        label_id = tf.argmax(label == commands)
        return spectrogram, label_id

    # Preprocess any audio files
    def preprocess_dataset(files, autotune, commands):
        # Creates the dataset
        files_ds = tf.data.Dataset.from_tensor_slices(files)

        # Matches audio files with correct labels
        output_ds = files_ds.map(get_waveform_and_label,
                                 num_parallel_calls=autotune)

        # Matches audio file images to the correct labels
        output_ds = output_ds.map(get_spectrogram_and_label_id,
                                  num_parallel_calls=autotune)

        return output_ds

    # Get all of the commands for the audio files
    commands = np.array(tf.io.gfile.listdir(str(data_dir)))
    commands = commands[commands != 'README.md']

    # Get a list of all the files in the directory
    filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')

    # Shuffle the file names so that random bunches can be used as the training, testing, and validation sets
    filenames = tf.random.shuffle(filenames)

    # Create the list of files for training data
    train_files = filenames[:6400]
    # Create the list of files for validation data
    validation_files = filenames[6400:6400 + 800]
    # Create the list of files for test data
    test_files = filenames[-800:]

    autotune = tf.data.AUTOTUNE

    # Get the converted audio files for training the model
    files_ds = tf.data.Dataset.from_tensor_slices(train_files)
    waveform_ds = files_ds.map(get_waveform_and_label,
                               num_parallel_calls=autotune)
    spectrogram_ds = waveform_ds.map(get_spectrogram_and_label_id,
                                     num_parallel_calls=autotune)

    # Preprocess the training, test, and validation datasets
    train_ds = preprocess_dataset(train_files, autotune, commands)
    validation_ds = preprocess_dataset(validation_files, autotune, commands)
    test_ds = preprocess_dataset(test_files, autotune, commands)

    # Batch datasets for training and validation
    batch_size = 64
    train_ds = train_ds.batch(batch_size)
    validation_ds = validation_ds.batch(batch_size)

    # Reduce latency while training
    train_ds = train_ds.cache().prefetch(autotune)
    validation_ds = validation_ds.cache().prefetch(autotune)

    # Build model
    for spectrogram, _ in spectrogram_ds.take(1):
        input_shape = spectrogram.shape

    num_labels = len(commands)

    norm_layer = preprocessing.Normalization()
    norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))

    model = models.Sequential([
        layers.Input(shape=input_shape),
        preprocessing.Resizing(32, 32),
        norm_layer,
        layers.Conv2D(32, 3, activation='relu'),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels),
    ])

    model.summary()

    # Configure built model with losses and metrics
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'],
    )

    # Finally train the model and return info about each epoch
    EPOCHS = 10
    model.fit(
        train_ds,
        validation_data=validation_ds,
        epochs=EPOCHS,
        callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),
    )

    # Test the model
    test_audio = []
    test_labels = []

    for audio, label in test_ds:
        test_audio.append(audio.numpy())
        test_labels.append(label.numpy())

    test_audio = np.array(test_audio)
    test_labels = np.array(test_labels)

    # See how accurate the model is when making predictions on the test dataset
    y_pred = np.argmax(model.predict(test_audio), axis=1)
    y_true = test_labels

    test_acc = sum(y_pred == y_true) / len(y_true)

    print(f'Test set accuracy: {test_acc:.0%}')
Example #24
0
    temp_size += tensor_gap_size
    x7 = tf.slice(x,
                  begin=[0, temp_size, 0],
                  size=[-1, tmp_shape[-2] - temp_size, tmp_shape[-1]])

    x0 = tf.expand_dims(x0, -1)
    x1 = tf.expand_dims(x1, -1)
    x2 = tf.expand_dims(x2, -1)
    x3 = tf.expand_dims(x3, -1)
    x4 = tf.expand_dims(x4, -1)
    x5 = tf.expand_dims(x5, -1)
    x6 = tf.expand_dims(x6, -1)
    x7 = tf.expand_dims(x7, -1)

    x0 = preprocessing.Resizing(32, 32)(x0)
    x1 = preprocessing.Resizing(32, 32)(x1)
    x2 = preprocessing.Resizing(32, 32)(x2)
    x3 = preprocessing.Resizing(32, 32)(x3)
    x4 = preprocessing.Resizing(32, 32)(x4)
    x5 = preprocessing.Resizing(32, 32)(x5)
    x6 = preprocessing.Resizing(32, 32)(x6)
    x7 = preprocessing.Resizing(32, 32)(x7)

    cnn_block_0 = CNN_block(channel_size=cnn_chan_size)
    cnn_block_1 = CNN_block(channel_size=cnn_chan_size)
    cnn_block_2 = CNN_block(channel_size=cnn_chan_size)
    cnn_block_3 = CNN_block(channel_size=cnn_chan_size)
    cnn_block_4 = CNN_block(channel_size=cnn_chan_size)
    cnn_block_5 = CNN_block(channel_size=cnn_chan_size)
    cnn_block_6 = CNN_block(channel_size=cnn_chan_size)
                batch_y_train[current_size] = partsClear[k*voice_max_length:(k+1)*voice_max_length]
                current_size+=1
                if current_size>=batch_size:
                    break
        return batch_x_train, batch_y_train

print('Build model...')

if os.path.exists(model_name):
    print("Load: " + model_name)
    model = load_model(model_name)
else:
    main_input = Input(shape=(voice_max_length, image_width, int(frame_length/2+1)), name='main_input')
    x = main_input
    x = TimeDistributed(Reshape((image_width, int(frame_length/2+1), 1)))(x)
    x = TimeDistributed(preprocessing.Resizing(image_width//2, int(frame_length/2+1)//2))(x)
    x = TimeDistributed(Conv2D(34, 3, activation='relu'))(x)
    x = TimeDistributed(Conv2D(64, 3, activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D())(x)
    x = TimeDistributed(Dropout(0.1))(x)
    x = TimeDistributed(Flatten())(x)
    x = LSTM(256, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x)
    x = Dense(int(frame_length/2+1), activation='sigmoid')(x)
    x = Reshape((voice_max_length, 1, int(frame_length/2+1)))(x)
    x = Multiply()([x, main_input])
    model = Model(inputs=main_input, outputs=x)
    tf.keras.utils.plot_model(model, to_file='model_lstm_image.png', show_shapes=True)
model.compile(loss='mse', metrics='mse', optimizer='adam')#Adam, SGD, Adagrad

print('Train...')
history = model.fit(MySequence(x_train, x_train_count, batch_size), epochs=epochs, steps_per_epoch=x_train_count//batch_size)
                if current_size >= batch_size:
                    break
        return batch_x_train, batch_y_train


print('Build model...')

if os.path.exists(model_name):
    print("Load: " + model_name)
    model = load_model(model_name)
else:
    main_input = Input(shape=(image_width, int(frame_length / 2 + 1)),
                       name='main_input')
    x = main_input
    x = Reshape((image_width, int(frame_length / 2 + 1), 1))(x)
    x = preprocessing.Resizing(image_width // 2,
                               int(frame_length / 2 + 1) // 2)(x)
    x = Conv2D(34, 3, activation='relu')(x)
    x = Conv2D(64, 3, activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Dropout(0.1)(x)
    x = Flatten()(x)
    x = Dense(int(frame_length / 2 + 1), activation='sigmoid')(x)
    x = Multiply()([x, main_input])
    model = Model(inputs=main_input, outputs=x)
    tf.keras.utils.plot_model(model,
                              to_file='model_dense_image.png',
                              show_shapes=True)
model.compile(loss='mse', metrics='mse', optimizer='adam')

print('Train...')
history = model.fit(MySequence(x_train, x_train_count, batch_size),
val_ds = val_ds.batch(batch_size)

train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)

for spectrogram, _ in spectrogram_ds.take(1):
    input_shape = spectrogram.shape
print('Input shape:', input_shape)
num_labels = len(labels)

norm_layer = preprocessing.Normalization()
norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))

model = models.Sequential([
    layers.Input(shape=input_shape),
    preprocessing.Resizing(64, 64),
    norm_layer,
    layers.Conv2D(64, 5, activation='relu'),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_labels),
])

model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
Example #28
0
#     result = tf.expand_dims(result, 0)

#     if i == 0:
#         a = tf.zeros(result.shape)
#         train_data = tf.raw_ops.Add(x=a, y=result)
#     else:
#         train_data = tf.concat([train_data, result], axis=0)

#     print("\r{}th file is done...".format(i+1), end='')

result = np.expand_dims(train_data, -1)
# result = tf.raw_ops.ExpandDims(train_data, -1)

print(result.shape)

x = preprocessing.Resizing(32, 32)(result)
print(x.shape)
x = preprocessing.Normalization()(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.25)(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
answer = layers.Dense(7, activation='softmax')(x)

model = keras.Model(inputs=input_sigss, outputs=answer)

model.summary()
                     compression='gzip')
    f.create_dataset('spectr_test',data=spectr_test,
                     compression='gzip')
    f.create_dataset('label_test',data=label_test,
                     compression='gzip')
    f.close()
print('file size: %s'%list(os.stat(h5f))[6])

for spectrogram,_ in train_ds.take(1):
  input_shape=spectrogram.shape
num_labels=len(names)
norm_layer=tkp.Normalization()
norm_layer.adapt(train_ds.map(lambda x,_:x))
model=tkm.Sequential([
    tkl.InputLayer(input_shape=input_shape),
    tkp.Resizing(32,32), 
    norm_layer,
    tkl.Conv2D(32,3,activation='relu'),
    tkl.Conv2D(96,3,activation='relu'),
    tkl.MaxPooling2D(),
    tkl.Dropout(.25),
    tkl.Flatten(),
    tkl.Dense(256,activation='relu'),
    tkl.Dropout(.5),
    tkl.Dense(num_labels),
])
model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
Example #30
0
#
#print("\n******************************\n")
#print(spectrogram.shape)
#print(spectrogram)
#print("\n******************************\n")

# x = tf.abs(input_vec)
# x = tf.expand_dims(x, -1)
# x = layers.Conv1D(32, 3, activation='relu')(x)
# x = layers.Conv1D(64, 3, activation='relu')(x)

# spectrogram = tf.expand_dims(x, -1)

# input_vec = tf.keras.Input(shape=(spectrogram.shape[1], spectrogram.shape[2], 1))
# x = preprocessing.Resizing(64, 64)(input_vec)
x = preprocessing.Resizing(32, 32)(spectrogram)
x = preprocessing.Normalization()(x)
# x = preprocessing.Normalization()(x)
# x = layers.Conv2D(32, 3, activation='relu')(input_vec)
x = layers.Conv2D(32, 3, activation='relu')(x)
# x = layers.Conv2D(32, 3, activation='relu')(x)
# x = layers.MaxPooling2D()(x)
# x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.25)(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
answer = layers.Dense(6, activation='softmax')(x)