def _run_train(self, x, y, epochs, batch_size, validation_data): train_gen = BatchGenerator(x, y, batch_size) steps_per_epoch = np.ceil(train_gen.length / batch_size).astype(int) self._sess.run(tf.global_variables_initializer()) for e in range(1, epochs + 1): print('Epoch {}/{}'.format(e, epochs)) pbar = utils.Progbar(steps_per_epoch) for step, batch in enumerate(train_gen.next(), 1): users = batch[0][:, 0] items = batch[0][:, 1] ratings = batch[1] self._sess.run(self._optimizer, feed_dict={ self._users: users, self._items: items, self._ratings: ratings }) pred = self.predict(batch[0]) update_values = [('rmse', rmse(ratings, pred)), ('mae', mae(ratings, pred))] if validation_data is not None and step == steps_per_epoch: valid_x, valid_y = validation_data valid_pred = self.predict(valid_x) update_values += [('val_rmse', rmse(valid_y, valid_pred)), ('val_mae', mae(valid_y, valid_pred))]
def train(self): """Executes the training for the WGAN model.""" self._generate_and_save_examples(0) for epoch in range(self.completed_epochs, self.epochs): pb_i = keras_utils.Progbar(len(self.raw_dataset)) start = time.time() for i, x_batch in enumerate(self._get_training_dataset()): self._train_step(x_batch, train_generator=False, train_discriminator=True) if (i + 1) % self.discriminator_training_ratio == 0: self._train_step(x_batch, train_generator=True, train_discriminator=False) pb_i.add(self.batch_size) if self.checkpoint_prefix and (epoch + 1) % self.epochs_per_save == 0: self.checkpoint.save(file_prefix=self.checkpoint_prefix) print('\nTime for epoch {} is {} minutes'.format( epoch + 1, (time.time() - start) / 60)) self._generate_and_save_examples(epoch + 1)
def class_build_dataset(malfiles, cleanfiles, seqlen, tiled=False): dataset = [] ite = 0 prog = utils.Progbar(len(cleanfiles) + len(malfiles)) # quick hack: in our current tests generated malware files are 500 tokens long... if (not tiled): for sample in malfiles: with open(sample, 'r') as s: lst = list(map(int, s.read().split())) dataset.append({'seq': lst, 'label': 1}) ite += 1 prog.update(ite) else: for sample in malfiles: sequences = get_tiled_list(get_token_list(sample), seqlen) for seq in sequences: dataset.append({'seq': seq, 'label': 1}) ite += 1 prog.update(ite) # while cleanware token files are 2500 tokens long... for sample in cleanfiles: sequences = get_tiled_list(get_token_list(sample), seqlen) for seq in sequences: dataset.append({'seq': seq, 'label': 0}) ite += 1 prog.update(ite) return dataset
def fit(self, x, y, epochs=1, steps_per_epoch=1): """Trains the model for a given number of epochs (iterations on a dataset). Arguments: x: Private tensor of training data y: Private tensor of target (label) data epochs: Integer. Number of epochs to train the model. steps_per_epoch: Integer. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. """ assert isinstance(x, PondPrivateTensor), type(x) assert isinstance(y, PondPrivateTensor), type(y) # Initialize variables before starting to train sess = KE.get_session() sess.run(tf.global_variables_initializer()) for e in range(epochs): print("Epoch {}/{}".format(e + 1, epochs)) batch_size = x.shape.as_list()[0] progbar = utils.Progbar(batch_size * steps_per_epoch) for _ in range(steps_per_epoch): self.fit_batch(x, y) progbar.add(batch_size, values=[("loss", self._current_loss)])
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = '3' print('Num GPUs Available: ', len(tf.config.experimental.list_physical_devices('GPU'))) raw_dataset, magnitude_stats, phase_stats =\ waveform_dataset.get_magnitude_phase_dataset( DATASET_PATH, FFT_FRAME_LENGTH, FFT_FRAME_STEP, LOG_MAGNITUDE, INSTANTANEOUS_FREQUENCY ) normalized_raw_dataset = [] pb_i = utils.Progbar(len(raw_dataset)) for spectogram in raw_dataset: norm_mag = waveform_dataset.normalize(spectogram[:, :, 0], *magnitude_stats) norm_phase = waveform_dataset.normalize(spectogram[:, :, 1], *phase_stats) norm = np.concatenate([np.expand_dims(norm_mag, axis=2), np.expand_dims(norm_phase, axis=2)], axis=-1) normalized_raw_dataset.append(norm) pb_i.add(1) normalized_raw_dataset = np.array(normalized_raw_dataset) generator = spec_gan.Generator(channels=2, activation=activations.tanh, in_shape=Z_IN_SHAPE) discriminator = spec_gan.Discriminator(input_shape=SPECTOGRAM_IMAGE_SHAPE) generator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5, beta_2=0.9) discriminator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5, beta_2=0.9) get_waveform = lambda spectogram:\ save_helper.get_waveform_from_normalized_spectogram( spectogram, [magnitude_stats, phase_stats], FFT_FRAME_LENGTH, FFT_FRAME_STEP, LOG_MAGNITUDE, INSTANTANEOUS_FREQUENCY ) save_examples = lambda epoch, real, generated:\ save_helper.save_wav_data( epoch, real, generated, SAMPLING_RATE, RESULT_DIR, get_waveform ) spec_phase_gan_model = wgan.WGAN( normalized_raw_dataset, generator, [discriminator], Z_DIM, generator_optimizer, discriminator_optimizer, discriminator_training_ratio=D_UPDATES_PER_G, batch_size=BATCH_SIZE, epochs=EPOCHS, checkpoint_dir=CHECKPOINT_DIR, fn_save_examples=save_examples ) spec_phase_gan_model.train()
def train(self): train_gen = BatchGenerator(self.x_train, self.y_train, self.batch_size) steps_per_epoch = np.ceil(train_gen.length / self.batch_size).astype(int) self._sess.run(tf.compat.v1.global_variables_initializer()) for e in range(1, self.epochs + 1): print('Epoch {}/{}'.format(e, self.epochs)) pbar = utils.Progbar(steps_per_epoch) for step, batch in enumerate(train_gen.next(), 1): users = batch[0][:, 0] items = batch[0][:, 1] ratings = batch[1] self._sess.run(self._optimizer, feed_dict={ self._users: users, self._items: items, self._ratings: ratings }) pred = self.predict(batch[0]) update_values = [('rmse', rmse(ratings, pred)), ('mae', mae(ratings, pred)), ('hr', hr(ratings, pred))] if self.x_valid is not None and step == steps_per_epoch: valid_pred = self.predict(self.x_valid) update_values += [('val_rmse', rmse(self.y_valid, valid_pred)), ('val_mae', mae(self.y_valid, valid_pred)), ('val_hr', hr(self.y_valid, valid_pred))] pbar.update(step, values=update_values) y_pred = self.predict(self.x_test) print('rmse: {}, mae: {}, hr:{}'.format(rmse(self.y_test, y_pred), mae(self.y_test, y_pred), hr(self.y_test, y_pred)))
def train(self): """The main training loop for the model.""" for epoch in range(self.completed_epochs, self.epochs): pb_i = utils.Progbar(self.dataset_length) start = time.time() for step, x_batch in enumerate(self.dataset): if self.train_step(step, x_batch): pb_i.add(self.batch_size) self.save_audio(x_batch, epoch) if self.checkpoint_prefix and (epoch + 1) % 10 == 0: self.checkpoint.save(file_prefix=self.checkpoint_prefix) print('\nTime for epoch {} is {} minutes'.format( epoch + 1, (time.time() - start) / 60))
best_loss = np.Inf last_epoch = 0 # summary summary_writer = tf.summary.create_file_writer( join(output_dir, 'summary', 'train')) # start training start_time = time.time() # train on GPU if available with tf.device(device): for epoch in range(options.epochs): progbar = utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch + 1, options.epochs)) for idx, (A, B) in enumerate(train_dataset): ## train discriminators # translate images to opposite domain fake_B = gen_A2B.model.predict(A) fake_A = gen_B2A.model.predict(B) # train discriminators (original image = real / translated = fake) A_real_loss = disc_a.model.train_on_batch(A, valid) A_fake_loss = disc_a.model.train_on_batch(fake_A, fake) A_loss = 0.5 * tf.math.add(A_real_loss, A_fake_loss)
def generate_from_seq(self, save_name=None, prt=False, nSeqPrt=10, nsequence=10, seed=None, save_path=None, progress=False): global _gen_save global _args global _dir if (save_path is None): save_path = f'{_gen_save}{_dir}{_args.name}_E{save_name}.str' else: save_path = f'{save_path}{_args.name}_{save_name}.str' print(30 * "=") full_prediction = [] seeded = "NOISE" if (seed is None): predicted = list(np.random.randint(0, self.svocab, self.seqlen)) else: predicted = [neg_denormalize(num, self.svocab) for num in seed] seeded = "SEED" seed = list(map(round, predicted)) seed = list(map(int, seed)) seed = self.translate_sequence(seed) prog = None if (progress): prog = utils.Progbar(nsequence) else: print("/" * 30) print(f"[{seeded}]:\n{seed}") print("\\" * 30) for i in range(nsequence): x = np.reshape(predicted, (int(len(predicted) / self.seqlen), self.seqlen, 1)) x = neg_normalize(x, self.svocab) prediction = self.G.predict(x, verbose=0) if (not progress): print("\n" + "-" * 20 + f"\nRound: {i+1}") generated_sequence = (neg_denormalize(prediction[0, :, 0], self.svocab).tolist()) tokens = list(map(round, generated_sequence)) tokens = list(map(int, tokens)) generated = "" # take only the generated tokens newtoks = tokens[-self.stride:] if (_args.int): for t in newtoks: tok = t if t <= (self.svocab - 1) else (self.svocab - 1) generated += f"{tok} " else: for t in newtoks: # safer tok = t if t <= (self.svocab - 1) else (self.svocab - 1) generated += f"{self.toks2txts[tok]} " if (prt): if (i < nSeqPrt): print(generated, end=" ") else: prt = False print("...") if (progress): prog.update(i + 1) # get the full sequence generated to feed back as input of the generator predicted = tokens full_prediction.append(generated) if (save_name is not None): with open(save_path, 'w') as genFile: genFile.write(" ".join(full_prediction)) print() print(30 * "=")
def generate_from_noise(self, save_name=None, prt=False, nSeqPrt=10, nsequence=10, save_path=None, progress=False): global _gen_save global _args global _dir if (save_path is None): save_path = f'{_gen_save}{_dir}{_args.name}_E{save_name}.str' else: save_path = f'{save_path}{_args.name}_{save_name}.str' print(30 * "=") full_prediction = [] predicted = list(np.random.randint(0, self.svocab, self.seqlen)) prog = None if (progress): prog = utils.Progbar(nsequence) for i in range(nsequence): x = np.reshape(predicted, (int(len(predicted) / self.seqlen), self.seqlen, 1)) #x = x / float(self.svocab) # should use normalise function for consistency x = neg_normalize(x, self.svocab) prediction = self.G.predict(x, verbose=0) #generated_sequence = (prediction[0,:,0]*self.svocab).tolist() generated_sequence = (neg_denormalize(prediction[0, :, 0], self.svocab).tolist()) tokens = list(map(round, generated_sequence)) tokens = list(map(int, tokens)) #generated = " ".join(self.toks2txts[tok] for tok in tokens) generated = "" if (_args.int): for t in tokens: tok = t if t <= (self.svocab - 1) else (self.svocab - 1) generated += f"{tok} " else: for t in tokens: # safer tok = t if t <= (self.svocab - 1) else (self.svocab - 1) generated += f"{self.toks2txts[tok]} " if (prt): if (i < nSeqPrt): print(generated, end=" ") else: prt = False print("...") #predicted += tokens predicted = list(np.random.randint(0, self.svocab, self.seqlen)) full_prediction.append(generated) if (progress): prog.update(i + 1) if (save_name is not None): with open(save_path, 'w') as genFile: genFile.write(" ".join(full_prediction)) print() print(30 * "=")
start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') # vis = True # In[27]: for epoch_num in range(num_epochs): progbar = utils.Progbar(epoch_length) # keras progress bar 사용 print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: # try: # mean overlapping bboxes 출력 if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') # data generator에서 X, Y, image 가져오기 X, Y, img_data = next(data_gen_train)
def train_batch_by_batch(exp, batch_size, start_epoch, end_epoch, save_every_n_epochs, test_every_n_epochs, tbw, file_stdout_logger, file_logger, run_args, early_stopping_eps, run_metadata=None): max_n_batch_per_epoch = 1000 # limits each epoch to batch_size * 1000 examples. i think this is ok. n_batch_per_epoch_train = min( max_n_batch_per_epoch, int(np.ceil(exp.get_n_train() / float(batch_size)))) print(exp.get_n_train()) max_printed_examples = 8 print_every = 100000 # set this to be really high at first print_atleast_every = 100 print_atmost = max(1, max_printed_examples / batch_size) # lets say we want 1 new result image every 1 minute print_every_n_seconds = run_args.print_every # save a new model every 20 minutes? seems reasonable auto_save_every_n_epochs = 100 auto_test_every_n_epochs = 100 min_save_every_n_epochs = 10 save_every_n_seconds = 20 * 60 # print_n_batches_per_epoch = max(1, max_printed_examples / batch_size) # we don't want more than 64 images printed per epoch # print_every = int(np.floor( # ((n_batch_per_epoch_train-1) / print_n_batches_per_epoch) / 2)) * 2 + 1 # make this odd so we can print augmentations start_time = time.time() # do this once here to flush any setup information to the file exp._reopen_log_file() for e in range(start_epoch, end_epoch + 1): file_stdout_logger.debug('{} training epoch {}/{}'.format( exp.model_name, e, end_epoch + 1)) if e < end_epoch: exp.update_epoch_count(e) pb = utils.Progbar(n_batch_per_epoch_train) printed_count = 0 for bi in range(n_batch_per_epoch_train): joint_loss, joint_loss_names = exp.train_on_batch() batch_count = e * n_batch_per_epoch_train + bi # only log to file on the last batch of training, otherwise we'll have too many messages training_logger = None if bi == n_batch_per_epoch_train - 1: training_logger = file_logger log_losses(pb, tbw, training_logger, joint_loss_names, joint_loss, batch_count) # time how long it takes to do 5 batches if batch_count - start_epoch * n_batch_per_epoch_train == 5: s_per_batch = (time.time() - start_time) / 5. # make this an odd integer in case our experiment is doing # different things on alternating batches, so that we can visualize both print_every = int( np.ceil(print_every_n_seconds / s_per_batch / 2.)) * 2 + 1 auto_save_every_n_epochs = save_every_n_seconds / s_per_batch / n_batch_per_epoch_train if auto_save_every_n_epochs > 50: # if interval is big enough, adjust to multiples of 50 auto_save_every_n_epochs = max( 1, int(np.floor(save_every_n_epochs / 50))) * 50 else: auto_save_every_n_epochs = max(1, int(np.floor(save_every_n_epochs / min_save_every_n_epochs))) \ * min_save_every_n_epochs if ((batch_count % print_every == 0 or batch_count % print_atleast_every == 0)) \ and printed_count < print_atmost: results_im = exp.make_train_results_im() cv2.imwrite( os.path.join(exp.figures_dir, 'train_epoch{}_batch{}.jpg'.format(e, bi)), results_im) printed_count += 1 if batch_count >= 10: file_stdout_logger.debug('Printing every {} batches, ' 'saving every {} and {} epochs, ' 'testing every {}'.format( print_every, auto_save_every_n_epochs, save_every_n_epochs, test_every_n_epochs, )) if (e > 0 and e % auto_save_every_n_epochs == 0 and e > start_epoch ) or e == end_epoch or (e > 0 and e % save_every_n_epochs == 0 and e > start_epoch): exp.save_models(e, iter_count=e * n_batch_per_epoch_train) # TODO: figure out how to flush log file without closing file_stdout_logger.handlers[0].close() # flush our .log file lfh = logging.FileHandler( filename=os.path.join(exp.exp_dir, 'training.log')) file_stdout_logger.handlers[0] = lfh if exp.logger is not None: exp.logger.handlers[0].close() exp._reopen_log_file() tbw.close( ) # save to disk and then open a new file so that we can read into tensorboard more easily tbw.reopen() if (e % auto_test_every_n_epochs == 0 or e % test_every_n_epochs == 0): file_stdout_logger.debug('{} testing'.format(exp.model_name)) pbt = utils.Progbar(1) test_loss, test_loss_names = exp.test_batches() log_losses(pbt, None, file_logger, test_loss_names, test_loss, e * n_batch_per_epoch_train + bi) results_im = exp.make_test_results_im() if results_im is not None: cv2.imwrite( os.path.join(exp.figures_dir, 'test_epoch{}_batch{}.jpg'.format(e, bi)), results_im) log_losses(None, tbw, file_logger, test_loss_names, test_loss, e * n_batch_per_epoch_train + bi) print('\n\n')
def main(): # Set allowed GPUs. os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) # Build and load MODELS from checkpoints for model_name in MODELS: MODELS[model_name]['loaded'] = True if 'data' in MODELS[model_name] and MODELS[model_name]['data']: continue try: checkpoint = tf.train.Checkpoint(generator=MODELS[model_name]['generator']) checkpoint.restore(MODELS[model_name]['checkpoint_path']).expect_partial() print('Loaded ', model_name) except: print(model_name, ' not found') MODELS[model_name]['loaded'] = False maestro = maestro_dataset.get_maestro_waveform_dataset(MAESTRO_PATH) magnitude_stastics = [] phase_stastics = [] for frame_length, frame_step in zip(FFT_FRAME_LENGTHS, FFT_FRAME_STEPS): _, magnitude_stastic, phase_stastic =\ maestro_dataset.get_maestro_magnitude_phase_dataset( MAESTRO_PATH, frame_length, frame_step, LOG_MAGNITUDE, INSTANTANEOUS_FREQUENCY ) magnitude_stastics.append(magnitude_stastic) phase_stastics.append(phase_stastic) maestro = maestro[np.random.randint( low=0, high=len(maestro), size=GENERATION_LENGTH * N_GENERATIONS )] z_gen = tf.random.uniform((N_GENERATIONS, GENERATION_LENGTH, Z_DIM), -1, 1, tf.float32) pb_i = utils.Progbar(N_GENERATIONS) for i in range(N_GENERATIONS): z_in = tf.reshape(z_gen[i], (GENERATION_LENGTH, Z_DIM)) for model_name in MODELS: if not MODELS[model_name]['loaded']: continue # If the model is a generator then produce a random generation, # otherwise take the current data point. if 'data' in MODELS[model_name] and MODELS[model_name]['data']: generation = maestro[i:i+GENERATION_LENGTH] else: generation = MODELS[model_name]['generator'](z_in) generation = np.squeeze(generation) if MODELS[model_name]['preprocess']['unnormalize_magnitude']: fft_config = MODELS[model_name]['fft_config'] generation = maestro_dataset.un_normalize( generation, *magnitude_stastics[fft_config] ) elif MODELS[model_name]['preprocess']['unnormalize_spectogram']: fft_config = MODELS[model_name]['fft_config'] generation = maestro_dataset.un_normalize_spectogram( generation, magnitude_stastics[fft_config], phase_stastics[fft_config] ) # Apply pre-defined transform to waveform. generation = np.squeeze(generation) waveform = MODELS[model_name]['generate_fn'](generation) # Clip waveform to desired length and save waveform = waveform[:, 0:WAVEFORM_LENGTH] MODELS[model_name]['waveform'].append(waveform) pb_i.add(1) for model_name in MODELS: if not MODELS[model_name]['loaded']: continue path = os.path.join(RESULTS_PATH, model_name) mkdir(path) for i, generation in enumerate(MODELS[model_name]['waveform']): generation = np.pad(generation, [[0, 0], [0, SILENCE_PADDING]]) if 'clip_beginning' in MODELS[model_name]: generation = generation[:, MODELS[model_name]['clip_beginning']:] wav = np.reshape(generation, (-1)) sf.write(os.path.join(path, model_name + '_{}.wav'.format(i)), wav, SAMPLING_RATE)
for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) train_data = MAMLDataLoader(args.train_data_dir, args.batch_size) val_data = MAMLDataLoader(args.val_data_dir, args.val_batch_size) inner_optimizer = optimizers.Adam(args.inner_lr) outer_optimizer = optimizers.Adam(args.outer_lr) maml = MAML(args.input_shape, args.n_way) # 验证次数可以少一些,不需要每次都更新这么多 val_data.steps = 10 for e in range(args.epochs): train_progbar = utils.Progbar(train_data.steps) val_progbar = utils.Progbar(val_data.steps) print('\nEpoch {}/{}'.format(e + 1, args.epochs)) train_meta_loss = [] train_meta_acc = [] val_meta_loss = [] val_meta_acc = [] for i in range(train_data.steps): batch_train_loss, acc = maml.train_on_batch( train_data.get_one_batch(), inner_optimizer, inner_step=1, outer_optimizer=outer_optimizer)
def main(fft_window_size, fft_window_step): """Generates the audio and PESQ vs SNR graphs for a given STFT setup. Saves the graphs and generated audio files to disk. Args: fft_window_size: The FFT window size. fft_window_step: The FFT window step. """ os.environ['CUDA_VISIBLE_DEVICES'] = '1' print(fft_window_size, ' ', fft_window_step, ' ', (fft_window_size // 2)) origonal_audio, _ = sf.read(WAVEFORM_PATH) origonal_audio = origonal_audio.astype(np.float32) for representation in REPRESENTATIONS: REPRESENTATIONS[representation]['perceptual_errors'] = [] REPRESENTATIONS[representation]['waveforms'] = [] for snr in SNRS: pb_i = utils.Progbar(len(REPRESENTATIONS) * N_REPEATS) print('SNR: ', snr) for representation in REPRESENTATIONS: all_perceptual_errors = [] for _ in range(N_REPEATS): perceptual_errors, audio_hats = process_representation_at_snr( representation, origonal_audio, snr, fft_window_size, fft_window_step) all_perceptual_errors.append(perceptual_errors) pb_i.add(1) print(' ', representation, ' -> ', np.mean(all_perceptual_errors, 0)) REPRESENTATIONS[representation]['perceptual_errors'].append( np.mean(all_perceptual_errors, 0)) REPRESENTATIONS[representation]['waveforms'].append(audio_hats) # Plot the graph for representation in REPRESENTATIONS: perceptual_errors = REPRESENTATIONS[representation][ 'perceptual_errors'] perceptual_errors = np.array(perceptual_errors) plot = plt.plot(SNRS, perceptual_errors[:, 0], label=representation) for i in range(perceptual_errors.shape[-1] - 1): plt.plot(SNRS, perceptual_errors[:, i + 1], color=plot[0].get_color(), linestyle=LINE_STYLES[i]) plt.xlabel('SNR') plt.ylabel('PESQ') plt.legend() file_name = 'pesq_vs_snr__{}ws_{}s'.format(fft_window_size, fft_window_step) plt.savefig(os.path.join(RESULTS_PATH, file_name), bbox_inches='tight', dpi=920) plt.clf() # Save the audio files setup = 'audio_{}ws_{}s'.format(fft_window_size, fft_window_step) base_audio_dir = os.path.join(RESULTS_PATH, setup) for representation in REPRESENTATIONS: audio_dir = os.path.join(base_audio_dir, representation) os.makedirs(audio_dir, exist_ok=True) for i, audio in enumerate( REPRESENTATIONS[representation]['waveforms']): for j, wav in enumerate(audio): file_path = os.path.join( audio_dir, '{}_{}db_{}.wav'.format(representation, SNRS[i], j)) sf.write(file_path, wav, SAMPLE_RATE)
def main(): global rpn_optimizer, classifier_optimizer os.environ["CUDA_VISIBLE_DEVICES"] = "0" gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) img_input = Input(shape=(None, None, 3)) roi_input = Input(shape=(None, 4)) share_layer = ResNet50(img_input) rpn = frcnn.rpn(share_layer, num_anchors=len(cfg.anchor_box_ratios) * len(cfg.anchor_box_scales)) classifier = frcnn.classifier(share_layer, roi_input, cfg.num_rois, nb_classes=cfg.num_classes) model_rpn = models.Model(img_input, rpn) model_classifier = models.Model([img_input, roi_input], classifier) model_all = models.Model([img_input, roi_input], rpn + classifier) # 生成38x38x9个先验框 anchors = get_anchors(cfg.share_layer_shape, cfg.input_shape) # 根据先验框解析真实框 box_parse = BoundingBox(anchors, max_threshold=cfg.rpn_max_overlap, min_threshold=cfg.rpn_min_overlap) reader = DataReader(cfg.annotation_path, box_parse, cfg.batch_size) train_data = reader.generate() train_step = len(reader.train_lines) // cfg.batch_size # loss相关 losses = np.zeros((train_step, 4)) best_loss = np.Inf rpn_lr = CosineAnnealSchedule(cfg.epoch, train_step, cfg.rpn_lr, cfg.rpn_lr * 1e-4) cls_lr = CosineAnnealSchedule(cfg.epoch, train_step, cfg.cls_lr, cfg.cls_lr * 1e-4) rpn_optimizer = optimizers.Adam(rpn_lr) classifier_optimizer = optimizers.Adam(cls_lr) for e in range(cfg.epoch): invalid_data = 0 # 记录无效roi数据 print("Learning rate adjustment, rpn_lr: {}, cls_lr: {}". format(rpn_optimizer._decayed_lr("float32").numpy(), classifier_optimizer._decayed_lr("float32").numpy())) # keras可视化训练条 progbar = utils.Progbar(train_step) print('Epoch {}/{}'.format(e+1, cfg.epoch)) for i in range(train_step): # 读取数据 image, rpn_y, bbox = next(train_data) loss_rpn = rpn_train(model_rpn, image, rpn_y) predict_rpn = model_rpn(image) img_h, img_w = np.shape(image[0])[:2] # 计算图片输入到rpn的输出shape share_layer = predict_rpn[2] rpn_height, rpn_width = share_layer.shape[1:-1] # 将预测结果进行解码 anchors = get_anchors(share_layer_shape=(rpn_width, rpn_height), image_shape=(img_w, img_h)) predict_boxes = box_parse.detection_out(predict_rpn, anchors, confidence_threshold=0) x_roi, y_class_label, y_classifier, valid_roi = get_classifier_train_data(predict_boxes, bbox, img_w, img_h, cfg.batch_size, cfg.num_classes) invalid_data += (cfg.batch_size - len(valid_roi)) if len(x_roi) == 0: progbar.update(i+1, [('rpn_cls', np.mean(losses[:i+1, 0])), ('rpn_regr', np.mean(losses[:i+1, 1])), ('detector_cls', np.mean(losses[:i+1, 2])), ('detector_regr', np.mean(losses[:i+1, 3]))]) continue loss_class = classifier_train(model_classifier, [image[valid_roi], x_roi], [y_class_label, y_classifier]) losses[i, 0] = loss_rpn[0].numpy() losses[i, 1] = loss_rpn[1].numpy() losses[i, 2] = loss_class[0].numpy() losses[i, 3] = loss_class[1].numpy() # 输出训练过程 progbar.update(i+1, [('rpn_cls', np.mean(losses[:i+1, 0])), ('rpn_regr', np.mean(losses[:i+1, 1])), ('detector_cls', np.mean(losses[:i+1, 2])), ('detector_regr', np.mean(losses[:i+1, 3]))]) # 当一个epoch训练完了以后,输出训练指标 else: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr print('\nLoss RPN classifier: {:.4f}'.format(loss_rpn_cls)) print('Loss RPN regression: {:.4f}'.format(loss_rpn_regr)) print('Loss Detector classifier: {:.4f}'.format(loss_class_cls)) print('Loss Detector regression: {:.4f}'.format(loss_class_regr)) print("{} picture can't detect any roi.".format(invalid_data)) print('The best loss is {:.4f}. The current loss is {:.4f}.'.format(best_loss, curr_loss)) if curr_loss < best_loss: best_loss = curr_loss print('Saving weights.\n') model_all.save_weights("./model/frcnn_{:.4f}.h5".format(curr_loss))