Ejemplo n.º 1
0
    def transform_data(self, ids_temp: List[str], labels_temp,
                       batch_size: int) -> Tuple[np.array, np.array]:
        num_segments = calculate_num_segments(self.input_dim)
        new_batch_size = batch_size * num_segments

        # Initialization
        x = np.empty((new_batch_size, *self.dimension, self.n_channels),
                     dtype='float32')
        y = np.empty((new_batch_size, len(labels_temp[0])))

        count = 0
        # Generate data
        for i, song_id in enumerate(ids_temp):
            song = np.load("../sdb/data/%s/%s.npz" % (self.dataset, song_id))

            song_temp = None
            try:
                song_temp = song['arr_0']
            except:
                print(song_id)

            # Convert song to sub songs
            sub_signals = self.split_song(song_temp, num_segments)

            for sub_song in sub_signals:
                sub_song = sub_song.reshape((-1, 1))
                x[count, ] = sub_song
                y[count] = labels_temp[i]

                count += 1

        return x, y
Ejemplo n.º 2
0
def predict(base_model, model, x_test: List[str]):

    sample_length = base_model.dimension[0]
    num_segments = utils.calculate_num_segments(sample_length)

    x_test_temp = np.zeros((num_segments, sample_length, 1))
    x_pred = np.zeros((len(x_test), base_model.n_labels))

    for i, song_id in enumerate(x_test):
        song = np.load(base_model.path % (base_model.dataset, song_id))['arr_0']

        for segment in range(0, num_segments):
            x_test_temp[segment] = song[segment * sample_length:
                                        segment * sample_length + sample_length].reshape((-1, 1))

        x_pred[i] = np.mean(model.predict(x_test_temp), axis=0)

    return x_pred
Ejemplo n.º 3
0
    def on_epoch_end(self, epoch, logs={}):
        num_segments = utils.calculate_num_segments(self.sample_length)

        x_val_temp = np.zeros((num_segments, self.sample_length, 1))
        x_pred = np.zeros((len(self.x_val), self.num_labels))

        for i, song_id in enumerate(self.x_val):
            song = np.load(self.path % (self.dataset, song_id))['arr_0']

            for segment in range(0, num_segments):
                x_val_temp[segment] = song[segment * self.sample_length:
                                           segment * self.sample_length + self.sample_length].reshape((-1, 1))

            x_pred[i] = np.mean(self.model.predict(x_val_temp), axis=0)

        auc = evaluator.mean_roc_auc(x_pred, self.y_val)

        print('\r roc-auc_val: %s' % (str(np.mean(auc))), end=100 * ' ' + '\n')
        return
Ejemplo n.º 4
0
    def train(self, train_x, train_y, valid_x, valid_y, epoch_size, lr,
              weight_name):
        # Save model
        json_name = 'model_architecture_%s_%s_%s.6f.json' % (self.model_name,
                                                             self.dataset, lr)
        if os.path.isfile(json_name) != 1:
            json_string = self.model.to_json()
            open(json_name, 'w').write(json_string)

        use_multiprocessing = False
        train_model = self.model
        if self.gpu:
            try:
                os.environ["CUDA_VISIBLE_DEVICES"] = ', '.join(self.gpu)
                train_model = multi_gpu_model(self.model, gpus=len(self.gpu))
                use_multiprocessing = True
            except:
                pass

        train_model.compile(loss=keras.losses.binary_crossentropy,
                            optimizer=keras.optimizers.SGD(lr=lr,
                                                           decay=1e-6,
                                                           momentum=0.9,
                                                           nesterov=True),
                            metrics=['accuracy'])

        train_gen = utils.train_generator(train_x, train_y, self.batch_size,
                                          25, self.dimension[0], self.n_labels,
                                          self.dataset, self.path)

        val_gen = DataGenerator(self.transform_data,
                                valid_x,
                                valid_y,
                                batch_size=self.batch_size,
                                n_channels=1,
                                dim=self.dimension,
                                n_classes=self.n_labels)

        check_pointer = ModelCheckpoint(weight_name,
                                        monitor='val_loss',
                                        verbose=0,
                                        save_best_only=True,
                                        mode='auto',
                                        save_weights_only=True)
        self.callbacks.append(check_pointer)
        self.callbacks.append(
            ROCAUCCallback(valid_x, valid_y, self.dimension[0], self.n_labels,
                           self.dataset, self.path))

        history = train_model.fit_generator(
            train_gen,
            callbacks=self.callbacks,
            steps_per_epoch=len(train_x) // self.batch_size *
            utils.calculate_num_segments(self.dimension[0]),
            # steps_per_epoch=10, # Used for testing
            validation_data=val_gen,
            validation_steps=len(valid_x) // self.batch_size,
            # validation_steps=10, # Used for testing
            epochs=epoch_size,
            workers=self.workers,
            use_multiprocessing=False,
        )

        self._plot_training(history, lr)

        return train_model
Ejemplo n.º 5
0
    def retrain(self, train_x, train_y, valid_x, valid_y, epoch_size, lr,
                lr_prev, weight_name):

        train_model = self.model
        if self.gpu:
            try:
                os.environ["CUDA_VISIBLE_DEVICES"] = ', '.join(self.gpu)
                train_model = multi_gpu_model(self.model, gpus=len(self.gpu))
                use_multiprocessing = True
            except:
                pass

        # load weights model
        splitted_weight_name = weight_name.split("_")
        splitted_weight_name[-1] = str(lr_prev)
        train_model.load_weights("_".join(splitted_weight_name) + ".hdf5")

        train_model.compile(loss=keras.losses.binary_crossentropy,
                            optimizer=keras.optimizers.SGD(lr=lr,
                                                           decay=1e-6,
                                                           momentum=0.9,
                                                           nesterov=True),
                            metrics=['accuracy'])

        train_gen = utils.train_generator(train_x, train_y, self.batch_size,
                                          25, self.dimension[0], self.n_labels,
                                          self.dataset, self.path)

        val_gen = DataGenerator(self.transform_data,
                                valid_x,
                                valid_y,
                                batch_size=self.batch_size,
                                n_channels=1,
                                dim=self.dimension,
                                n_classes=self.n_labels)

        check_pointer = ModelCheckpoint(weight_name,
                                        monitor='val_loss',
                                        verbose=0,
                                        save_best_only=True,
                                        mode='auto',
                                        save_weights_only=True)
        self.callbacks.append(check_pointer)
        self.callbacks.append(
            ROCAUCCallback(valid_x, valid_y, self.dimension[0], self.n_labels,
                           self.dataset, self.path))

        history = train_model.fit_generator(
            train_gen,
            callbacks=self.callbacks,
            steps_per_epoch=len(train_x) // self.batch_size *
            utils.calculate_num_segments(self.dimension[0]),
            validation_data=val_gen,
            validation_steps=len(valid_x) // self.batch_size,
            epochs=epoch_size,
            workers=self.workers,
            use_multiprocessing=False,
        )

        self._plot_training(history, lr)

        return train_model