def transform_data(self, ids_temp: List[str], labels_temp, batch_size: int) -> Tuple[np.array, np.array]: num_segments = calculate_num_segments(self.input_dim) new_batch_size = batch_size * num_segments # Initialization x = np.empty((new_batch_size, *self.dimension, self.n_channels), dtype='float32') y = np.empty((new_batch_size, len(labels_temp[0]))) count = 0 # Generate data for i, song_id in enumerate(ids_temp): song = np.load("../sdb/data/%s/%s.npz" % (self.dataset, song_id)) song_temp = None try: song_temp = song['arr_0'] except: print(song_id) # Convert song to sub songs sub_signals = self.split_song(song_temp, num_segments) for sub_song in sub_signals: sub_song = sub_song.reshape((-1, 1)) x[count, ] = sub_song y[count] = labels_temp[i] count += 1 return x, y
def predict(base_model, model, x_test: List[str]): sample_length = base_model.dimension[0] num_segments = utils.calculate_num_segments(sample_length) x_test_temp = np.zeros((num_segments, sample_length, 1)) x_pred = np.zeros((len(x_test), base_model.n_labels)) for i, song_id in enumerate(x_test): song = np.load(base_model.path % (base_model.dataset, song_id))['arr_0'] for segment in range(0, num_segments): x_test_temp[segment] = song[segment * sample_length: segment * sample_length + sample_length].reshape((-1, 1)) x_pred[i] = np.mean(model.predict(x_test_temp), axis=0) return x_pred
def on_epoch_end(self, epoch, logs={}): num_segments = utils.calculate_num_segments(self.sample_length) x_val_temp = np.zeros((num_segments, self.sample_length, 1)) x_pred = np.zeros((len(self.x_val), self.num_labels)) for i, song_id in enumerate(self.x_val): song = np.load(self.path % (self.dataset, song_id))['arr_0'] for segment in range(0, num_segments): x_val_temp[segment] = song[segment * self.sample_length: segment * self.sample_length + self.sample_length].reshape((-1, 1)) x_pred[i] = np.mean(self.model.predict(x_val_temp), axis=0) auc = evaluator.mean_roc_auc(x_pred, self.y_val) print('\r roc-auc_val: %s' % (str(np.mean(auc))), end=100 * ' ' + '\n') return
def train(self, train_x, train_y, valid_x, valid_y, epoch_size, lr, weight_name): # Save model json_name = 'model_architecture_%s_%s_%s.6f.json' % (self.model_name, self.dataset, lr) if os.path.isfile(json_name) != 1: json_string = self.model.to_json() open(json_name, 'w').write(json_string) use_multiprocessing = False train_model = self.model if self.gpu: try: os.environ["CUDA_VISIBLE_DEVICES"] = ', '.join(self.gpu) train_model = multi_gpu_model(self.model, gpus=len(self.gpu)) use_multiprocessing = True except: pass train_model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True), metrics=['accuracy']) train_gen = utils.train_generator(train_x, train_y, self.batch_size, 25, self.dimension[0], self.n_labels, self.dataset, self.path) val_gen = DataGenerator(self.transform_data, valid_x, valid_y, batch_size=self.batch_size, n_channels=1, dim=self.dimension, n_classes=self.n_labels) check_pointer = ModelCheckpoint(weight_name, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', save_weights_only=True) self.callbacks.append(check_pointer) self.callbacks.append( ROCAUCCallback(valid_x, valid_y, self.dimension[0], self.n_labels, self.dataset, self.path)) history = train_model.fit_generator( train_gen, callbacks=self.callbacks, steps_per_epoch=len(train_x) // self.batch_size * utils.calculate_num_segments(self.dimension[0]), # steps_per_epoch=10, # Used for testing validation_data=val_gen, validation_steps=len(valid_x) // self.batch_size, # validation_steps=10, # Used for testing epochs=epoch_size, workers=self.workers, use_multiprocessing=False, ) self._plot_training(history, lr) return train_model
def retrain(self, train_x, train_y, valid_x, valid_y, epoch_size, lr, lr_prev, weight_name): train_model = self.model if self.gpu: try: os.environ["CUDA_VISIBLE_DEVICES"] = ', '.join(self.gpu) train_model = multi_gpu_model(self.model, gpus=len(self.gpu)) use_multiprocessing = True except: pass # load weights model splitted_weight_name = weight_name.split("_") splitted_weight_name[-1] = str(lr_prev) train_model.load_weights("_".join(splitted_weight_name) + ".hdf5") train_model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True), metrics=['accuracy']) train_gen = utils.train_generator(train_x, train_y, self.batch_size, 25, self.dimension[0], self.n_labels, self.dataset, self.path) val_gen = DataGenerator(self.transform_data, valid_x, valid_y, batch_size=self.batch_size, n_channels=1, dim=self.dimension, n_classes=self.n_labels) check_pointer = ModelCheckpoint(weight_name, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', save_weights_only=True) self.callbacks.append(check_pointer) self.callbacks.append( ROCAUCCallback(valid_x, valid_y, self.dimension[0], self.n_labels, self.dataset, self.path)) history = train_model.fit_generator( train_gen, callbacks=self.callbacks, steps_per_epoch=len(train_x) // self.batch_size * utils.calculate_num_segments(self.dimension[0]), validation_data=val_gen, validation_steps=len(valid_x) // self.batch_size, epochs=epoch_size, workers=self.workers, use_multiprocessing=False, ) self._plot_training(history, lr) return train_model