class Agent(object): def __init__(self, param, logger): self.logger = logger logger.info("Start initializing Agent, mode is {}".format(param["mode"])) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.param = param self.session = tf.Session(config=config) self.model = Model(self.session, self.param, self.logger) # 建立将模型graph并写入session中 self.data_manager_train = DataManager(self.param, shuffle=True) # 训练集数据生成器 self.data_manager_valid = DataManager(self.param, shuffle=False, valid=True) # 验证集数据生成器 self.tensorboard_manager = TensorboardManager(self.session, self.param, self.model, self.logger) # 使用TensorBoard进行可视化 self.trainer = Trainer(self.session, self.model, self.param, self.logger, self.tensorboard_manager) # 损失函数,优化器,以及训练策略 self.saver = Saver(self.session, self.param, self.model.checkPoint_dir, self.logger) # 用于将session保存到checkpoint或pb文件中 self.validator = Validator(self.session, self.model, self.logger) # 用于验证训练后模型的性能 logger.info("Successfully initialized") def run(self): if not self.param["anew"] and self.param["mode"] != "testPb": self.saver.load_checkpoint() if self.param["mode"] == "train_segmentation": # 训练模型分割部分 self.trainer.train_segmentation(self.data_manager_train, self.data_manager_valid, self.saver) elif self.param["mode"] == "train_decision": # 训练模型分类部分 self.trainer.train_decision(self.data_manager_train, self.data_manager_valid, self.saver) elif self.param["mode"] == "visualization": # 验证模型分割效果 self.validator.valid_segmentation(self.data_manager_train) elif self.param["mode"] == "testing": # 验证模型分类效果 self.validator.valid_decision(self.data_manager_train) self.validator.valid_decision(self.data_manager_valid) elif self.param["mode"] == "savePb": # 保存模型到pb文件 self.saver.save_pb() elif self.param["mode"] == "testPb": # 测试pb模型效果 self.pb_tester = PbTester(self.param, self.logger) self.pb_tester.test_segmentation() self.pb_tester.test_decision() # self.pb_tester.view_timeline() elif self.param["mode"] == "view_dataset": # tensorboard --logdir=E:/CODES/TensorFlow_PZT/tensorboard --samples_per_plugin=images=1000 self.tensorboard_manager.vis_dataset(self.data_manager_train) self.tensorboard_manager.vis_mask_out(self.data_manager_train) self.session.close() self.tensorboard_manager.close()
def main(): args = parse_args() saver = Saver(args.model_dir) model = SELUNet() with warnings.catch_warnings(): warnings.simplefilter('ignore') if args.use_cuda: model = model.cuda() model, _, params_dict = saver.load_checkpoint( model, file_name=args.model_name) model.eval() filespan = args.filespan idr_params, _, _ = get_optimal_params(model, args.valspeechfolder, args.valpeaksfolder, args.window, args.stride, filespan, numfiles=60, use_cuda=False, thlist=[ 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75 ], spblist=[25], hctlist=[10, 15, 20, 25, 30]) thr = idr_params['thr'] spb = idr_params['spb'] hct = idr_params['hct'] with open('test_idr.txt', 'w') as f: print( 'Optimal Hyperparameters\nThreshold: {} Samples Per Bin: {} Histogram Count Threshold: {}' .format(thr, spb, hct), file=f, flush=True) numfiles = len(glob(os.path.join(args.speechfolder, '*.npy'))) print('Models and Files Loaded') metrics_list = [] for i in range(0, numfiles, filespan): if (i + filespan) > numfiles: break speech_windowed_data, peak_distance, peak_indicator, indices, actual_gci_locations = create_dataset( args.speechfolder, args.peaksfolder, args.window, args.stride, slice(i, i + filespan)) input = to_variable( th.from_numpy( np.expand_dims(speech_windowed_data, 1).astype(np.float32)), args.use_cuda, True) with warnings.catch_warnings(): prediction = model(input) predicted_peak_indicator = F.sigmoid(prediction[:, 1]).data.numpy() predicted_peak_distance = (prediction[:, 0]).data.numpy().astype(np.int32) predicted_peak_indicator_indices = predicted_peak_indicator > args.threshold predicted_peak_indicator = predicted_peak_indicator[ predicted_peak_indicator_indices].ravel() predicted_peak_distance = predicted_peak_distance[ predicted_peak_indicator_indices].ravel() indices = indices[predicted_peak_indicator_indices] assert (len(indices) == len(predicted_peak_distance)) assert (len(predicted_peak_distance) == len(predicted_peak_indicator)) positive_distance_indices = predicted_peak_distance < args.window positive_peak_distances = predicted_peak_distance[ positive_distance_indices] postive_predicted_peak_indicator = predicted_peak_indicator[ positive_distance_indices] gci_locations = [ indices[i, d] for i, d in enumerate(positive_peak_distances) ] locations_true = np.nonzero(actual_gci_locations)[0] xaxes = np.zeros(len(actual_gci_locations)) xaxes[locations_true] = 1 ground_truth = np.row_stack( (np.arange(len(actual_gci_locations)), xaxes)) predicted_truth = np.row_stack( (gci_locations, postive_predicted_peak_indicator)) gx = ground_truth[0, :] gy = ground_truth[1, :] px = predicted_truth[0, :] py = predicted_truth[1, :] fs = 16000 gci = np.array( cluster(px, py, threshold=thr, samples_per_bin=spb, histogram_count_threshold=hct)) predicted_gci_time = gci / fs target_gci_time = np.nonzero(gy)[0] / fs gci = np.round(gci).astype(np.int64) gcilocs = np.zeros_like(gx) gcilocs[gci] = 1 metric = corrected_naylor_metrics(target_gci_time, predicted_gci_time) print(metric) metrics_list.append(metric) idr = np.mean([ v for m in metrics_list for k, v in m.items() if k == 'identification_rate' ]) mr = np.mean( [v for m in metrics_list for k, v in m.items() if k == 'miss_rate']) far = np.mean([ v for m in metrics_list for k, v in m.items() if k == 'false_alarm_rate' ]) se = np.mean([ v for m in metrics_list for k, v in m.items() if k == 'identification_accuracy' ]) print('IDR: {:.5f} MR: {:.5f} FAR: {:.5f} IDA: {:.5f}'.format( idr, mr, far, se)) with open('test_idr.txt', 'a') as f: f.write('IDR: {:.5f} MR: {:.5f} FAR: {:.5f} IDA: {:.5f}\n'.format( idr, mr, far, se))
def main(): args = parse_args() speech_windowed_data, peak_distance, peak_indicator, indices, actual_gci_locations = create_dataset( args.speechfolder, args.peaksfolder, args.window, args.stride, 10) saver = Saver(args.model_dir) model = SELUWeightNet model, _, params_dict = saver.load_checkpoint( model, file_name=args.model_name) model.eval() input = to_variable( th.from_numpy( np.expand_dims(speech_windowed_data, 1).astype(np.float32)), args.use_cuda, True) with warnings.catch_warnings(): if args.use_cuda: model = model.cuda() warnings.simplefilter('ignore') prediction = model(input) predicted_peak_indicator = F.sigmoid(prediction[:, 1]).data.numpy() predicted_peak_distance = (prediction[:, 0]).data.numpy().astype(np.int32) predicted_peak_indicator_indices = predicted_peak_indicator > args.threshold predicted_peak_indicator = predicted_peak_indicator[ predicted_peak_indicator_indices].ravel() predicted_peak_distance = predicted_peak_distance[ predicted_peak_indicator_indices].ravel() indices = indices[predicted_peak_indicator_indices] assert (len(indices) == len(predicted_peak_distance)) assert (len(predicted_peak_distance) == len(predicted_peak_indicator)) positive_distance_indices = predicted_peak_distance < args.window positive_peak_distances = predicted_peak_distance[ positive_distance_indices] postive_predicted_peak_indicator = predicted_peak_indicator[ positive_distance_indices] print('Neg Peaks: {} Pos Peaks: {}'.format( len(predicted_peak_distance) - len(positive_peak_distances), len(positive_peak_distances))) gci_locations = [ indices[i, d] for i, d in enumerate(positive_peak_distances) ] locations_true = np.nonzero(actual_gci_locations)[0] xaxes = np.zeros(len(actual_gci_locations)) xaxes[locations_true] = 1 if __debug__: ground_truth = np.row_stack((np.arange(len(actual_gci_locations)), xaxes)) predicted_truth = np.row_stack((gci_locations, postive_predicted_peak_indicator)) os.makedirs(args.prediction_dir, exist_ok=True) np.save( os.path.join(args.prediction_dir, 'ground_truth'), ground_truth) np.save( os.path.join(args.prediction_dir, 'predicted'), predicted_truth) plt.scatter( gci_locations, postive_predicted_peak_indicator, color='b', label='Predicted GCI') plt.plot( np.arange(len(actual_gci_locations)), xaxes, color='r', label='Actual GCI') plt.legend() plt.show()
class Trainer: def __init__(self): self._setup_logger() torch.backends.cudnn.deterministic = True self.device = torch.device( "cuda:%d" % (args.gpu) if torch.cuda.is_available() else "cpu") torch.cuda.set_device(args.gpu) self.dataset = Cifar10() self.saver = Saver(self.save_dir) if args.arch == 'auto': self.Net = lambda: AutoNetwork(args.init_channels, self.dataset. num_classes, args.layers, nn.CrossEntropyLoss()) elif args.arch == 'VGG11': self.Net = lambda: VGG('VGG11', self.dataset.num_classes, nn.CrossEntropyLoss()) elif args.arch == 'mobilenet': self.Net = lambda: MobileNetV2(self.dataset.num_classes, nn.CrossEntropyLoss()) elif args.arch == 'simple': self.Net = lambda: SimpleNetwork(self.dataset.num_classes, nn.CrossEntropyLoss()) else: raise Exception('Net not defined!') def _setup_logger(self): self.save_dir = os.path.join('./checkpoints', args.logdir) if not os.path.isdir(self.save_dir): os.makedirs(self.save_dir) log_path = os.path.join( self.save_dir, datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) self.logger = L.create_logger(args.logdir, log_path) for arg in vars(args): self.logger.info("%-25s %-20s" % (arg, getattr(args, arg))) def test_beta(self, optimizee): self.logger.info('Testing beta') torch.manual_seed(2 * args.seed) optimizee.reset_model_parameters() scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizee.optimizer, float(args.max_epoch), eta_min=args.learning_rate_min) optimizee.optimizer.print_beta_greedy() for epoch in range(0, args.max_test_epoch): scheduler.step() lr = scheduler.get_lr()[0] self.train_epoch(epoch, lr, optimizee, False) if (epoch + 1) % args.test_freq == 0: acc = self.eval(optimizee.model) def train(self): optimizee = Optimizee(self.Net) start_episode, start_epoch, best_test_acc = 0, 0, 0. if args.checkpoint != '': start_episode, start_epoch = self.saver.load_checkpoint( optimizee, args.checkpoint) # Hao: in my implementation, the optimizers for alpha and beta will preserve their states across episodes for episode in range(start_episode, args.max_episodes): torch.manual_seed(args.seed + episode) optimizee.reset_model_parameters() if args.arch == 'auto': optimizee.reset_arch_parameters() scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizee.optimizer, float(args.max_epoch), eta_min=args.learning_rate_min, last_epoch=start_epoch - 1) for epoch in range( start_epoch, args.max_epoch): # loop over the dataset multiple times scheduler.step() lr = scheduler.get_lr()[0] training_status = self.train_epoch(epoch, lr, optimizee) if (epoch + 1) % args.test_freq == 0: acc = self.eval(optimizee.model) # if accuracy not above random, discard this example and start a new one if acc <= 0.11 or training_status == False: self.logger.warning( 'training_status false or acc too low, break') break checkpoint_path = self.save_dir + '/episode_{}_epoch_{}_acc_{}'.format( str(episode), str(epoch), str(acc)) self.saver.save_checkpoint(optimizee, epoch, episode, checkpoint_path) return optimizee def train_epoch(self, epoch, lr, optimizee, train_beta=True): if train_beta: optimizee.optimizer.beta_temp = np.interp( epoch + 1, [0, args.max_epoch], [args.min_beta_temp, args.max_beta_temp]) status = self._train_epoch(epoch, lr, optimizee) self.saver.write_beta_embedding() optimizee.optimizer.print_beta_greedy() else: optimizee.optimizer.beta_temp = args.max_beta_temp status = self._train_epoch_fix_beta(epoch, lr, optimizee) return status def eval(self, model): correct, total = 0, 0 with torch.no_grad(): for images, labels in self.dataset.test_loader: images, labels = images.to(self.device), labels.to(self.device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() self.logger.info( 'Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) return float(correct) / total def _train_epoch(self, epoch, lr, optimizee): losses, next_step_losses, meta_update_losses = 0.0, 0.0, 0.0 model_grad_norms, beta_grad_norms = 0., 0. for i, data in enumerate(self.dataset.train_loader, 0): x_train, y_train = data[0].to(self.device), data[1].to(self.device) val_data = next(iter(self.dataset.val_loader)) x_val, y_val = val_data[0].to(self.device), val_data[1].to( self.device) # Derive \frac{\partial L}{\partial \theta} optimizee.model.zero_grad() loss = optimizee.model.forward_pass(x_train, y_train) loss.backward() model_grad_norms += nn.utils.clip_grad_norm_( optimizee.model.parameters(), args.model_grad_norm) param_updates = optimizee.optimizer.step() # do a differentiable step of update over optimizee.symbolic_model optimizee.differentiable_update(param_updates) # check the next-step loss after update using the symbolic model # so that it is on the computational graph # TODO use validation data? not sure next_step_loss = optimizee.symbolic_model.forward_pass( x_train, y_train) if (math.isnan(next_step_loss.item())): self.logger.error('next step loss becomes NaN, break') raise Exception next_step_losses += next_step_loss.item() losses += loss.item() meta_update_losses += next_step_loss # do a non-differentiable update over optimizee.model if next_step_loss is smaller if (next_step_loss.item() < loss.item()): # This is still important for performance optimizee.update(param_updates) # forsee bptt_steps then update beta if (i + 1) % args.bptt_step == 0: beta_grad_norms += optimizee.beta_step( meta_update_losses).item() # let saver save the grads for beta with torch.no_grad(): self.saver.add_beta_grads([ b.grad.abs().mean() for b in optimizee.optimizer.beta ]) meta_update_losses = 0 optimizee.sync_symbolic_model() if args.arch_training and (i + 1) % args.update_alpha_step == 0: optimizee.alpha_step( x_train, y_train, x_val, y_val, lr) # TODO: make sure alpha step won't change weights optimizee.sync_symbolic_model(skip_weights=True) if (i + 1) % args.log_freq == 0: # print every 2000 mini-batches beta_out = optimizee.optimizer.beta[-1].data.cpu().numpy() alpha_out = optimizee.model.alphas_normal[-1].data.cpu().numpy( ) if args.arch_training else '' self.logger.info( '[%d, %5d] loss: %.3f/%.3f, next step loss: %.3f, beta[-1]/L2(g): %s/%s, alpha[-1]: %s' % (epoch + 1, i + 1, losses / args.log_freq, model_grad_norms / args.log_freq, next_step_losses / args.log_freq, beta_out, beta_grad_norms / args.log_freq, alpha_out)) losses, next_step_losses, model_grad_norms, beta_grad_norms = 0., 0., 0., 0. self.saver.write_beta(optimizee.optimizer.beta) return True def _train_epoch_fix_beta(self, epoch, lr, optimizee): losses, model_grad_norms = 0.0, 0.0 for i, data in enumerate(self.dataset.train_loader, 0): x_train, y_train = data[0].to(self.device), data[1].to(self.device) val_data = next(iter(self.dataset.val_loader)) x_val, y_val = val_data[0].to(self.device), val_data[1].to( self.device) optimizee.model.zero_grad() loss = optimizee.model.forward_pass(x_train, y_train) loss.backward() model_grad_norms += nn.utils.clip_grad_norm_( optimizee.model.parameters(), args.model_grad_norm) param_updates = optimizee.optimizer.step(do_update=True) losses += loss.item() if args.use_darts_arch and args.train_alpha: optimizee.alpha_step(x_train, y_train, x_val, y_val, lr) if (i + 1) % args.log_freq == 0: # print every 2000 mini-batches beta_out = optimizee.optimizer.beta[-1].data.cpu().numpy() alpha_out = optimizee.model.alphas_normal[-1].data.cpu().numpy( ) if args.arch_training else 'None' self.logger.info( '[%d, %5d] loss: %.3f/%.3f, beta[-1]: %s, alpha[-1]: %s' % (epoch + 1, i + 1, losses / args.log_freq, model_grad_norms / args.log_freq, beta_out, alpha_out)) losses, model_grad_norms = 0., 0. return True