class Config(object): gpu_id = 2 os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) # train_epoch = 300 train_epoch = 180 learning_rate = 0.001 num_workers = 8 val_freq = 10 num_way = 5 num_shot = 1 batch_size = 64 episode_size = 15 test_episode = 600 hid_dim = 64 z_dim = 64 proto_net = ProtoNet(hid_dim=hid_dim, z_dim=z_dim) model_name = "{}_{}_{}_{}_{}".format(gpu_id, train_epoch, batch_size, hid_dim, z_dim) if "Linux" in platform.platform(): data_root = '/mnt/4T/Data/data/miniImagenet' if not os.path.isdir(data_root): data_root = '/media/ubuntu/4T/ALISURE/Data/miniImagenet' else: data_root = "F:\\data\\miniImagenet" pn_dir = Tools.new_dir( "../models_pn/mixup_fsl/{}_pn_{}way_{}shot.pkl".format( model_name, num_way, num_shot)) Tools.print(model_name) pass
def train(self, epochs): for epoch in range(0, epochs): Tools.print() Tools.print("Start Epoch {}".format(epoch)) self._lr(epoch) epoch_loss, epoch_train_acc = self._train_epoch() self._save_checkpoint(self.model, self.root_ckpt_dir, epoch) epoch_test_loss, epoch_test_acc = self.test() Tools.print( 'Epoch: {:02d}, lr={:.4f}, Train: {:.4f}/{:.4f} Test: {:.4f}/{:.4f}' .format(epoch, self.optimizer.param_groups[0]['lr'], epoch_train_acc, epoch_loss, epoch_test_acc, epoch_test_loss)) pass pass
def val(self, episode=0, is_print=True): acc_train = self.val_train() if is_print: Tools.print("Train {} Accuracy: {}".format(episode, acc_train)) pass acc_val = self.val_val() if is_print: Tools.print("Val {} Accuracy: {}".format(episode, acc_val)) pass acc_test1 = self.val_test() if is_print: Tools.print("Test1 {} Accuracy: {}".format(episode, acc_test1)) pass # acc_test2 = self.val_test2() # if is_print: # Tools.print("Test2 {} Accuracy: {}".format(episode, acc_test2)) # pass return acc_val
def train(self): self.load_model() self.test() video_generator = self.data.video_generator(is_train=True) start_epoch = self.sess.run(self.now_epoch) for epoch in range(start_epoch, self.max_epochs): total_acc = 0 total_loss = 0 for step in range(self.data.train_batch_num): train_images, train_labels = next(video_generator) _loss, summary, acc, _, _pred, _learning_rate = self.sess.run( [ self.loss, self.summary_op, self.accuracy, self.train_op, self.pred, self.learning_rate ], feed_dict={ self.images_placeholder: train_images, self.labels_placeholder: train_labels }) total_acc += acc total_loss += _loss self.summary_writer.add_summary( summary, epoch * self.data.train_batch_num + step) if step % 50 == 0: Tools.print( "{}/{} {}/{} acc={:.5f} avg_loss={:.5f} loss={:.5f} lr={:.5f}" .format(epoch, self.max_epochs, step, self.data.train_batch_num, total_acc / (step + 1), total_loss / (step + 1), _loss, _learning_rate)) Tools.print("Train preds {}".format(_pred)) Tools.print("Train label {}".format(train_labels)) pass pass self.sess.run(self.add_epoch_op) # for learning rate self.saver.save(self.sess, self.checkpoint_path, global_step=epoch) self.test() pass pass
def tran_image_net( size=16, split="val_new", algorithm="box", out_dir=None, root_dir="/media/ubuntu/ALISURE/data/DATASET/ILSVRC2015/Data/CLS-LOC"): _alg_dict = { 'lanczos': Image.LANCZOS, 'nearest': Image.NEAREST, 'bilinear': Image.BILINEAR, 'bicubic': Image.BICUBIC, 'hamming': Image.HAMMING, 'box': Image.BOX } in_dir = os.path.join(root_dir, split) out_dir = os.path.join(root_dir, "{}_{}".format( split, size)) if out_dir is None else out_dir current_out_dir = Tools.new_dir(os.path.join(out_dir, algorithm)) if "train" in split: y_test = tran_train_img(in_dir=in_dir, out_dir=current_out_dir, alg=_alg_dict[algorithm], size=size) else: y_test = tran_val_img(in_dir=in_dir, out_dir=current_out_dir, alg=_alg_dict[algorithm], size=size) pass count = np.zeros([1001]) for i in y_test: count[i - 1] += 1 for i in range(1001): Tools.print('%d : %d' % (i, count[i])) Tools.print('SUM: %d' % len(y_test)) Tools.print("Finished.") pass
def demo_mlc_cam(self, image_filename_list, model_file_name=None): Tools.print("Load model form {}".format(model_file_name)) self.load_model(model_file_name) self.net.eval() with torch.no_grad(): for image_filename in image_filename_list: image = Image.open(image_filename).convert("RGB") image_inputs = self.transform_test(image) inputs = torch.unsqueeze(image_inputs, dim=0).float().cuda() logits, out_features = self.net.forward(inputs, is_vis=True) logits = logits.detach().cpu().numpy() arg_sort = np.argsort(logits)[0][-10:] image = self.transform_un_normalize(image_inputs) cam_list = self.generate_cam( weights=self.net.head_linear.weight, features=out_features, indexes=arg_sort, image_size=inputs.shape[-2:]) # image.save("1.png") # Image.fromarray(np.asarray(cam_list[0][1].detach().cpu().numpy() * 255, dtype=np.uint8)).save("1.bmp") for index, arg_one in enumerate(arg_sort): label_info = self.label_info_dict[arg_one + 1] if self.num_classes == 199 and arg_one >= 124: label_info = self.label_info_dict[arg_one + 2] pass Tools.print("{:3d} {:3d} {:.4f} {} {}".format( index, arg_one, logits[0][arg_one], label_info["name"], label_info["cat_name"])) pass Tools.print(image_filename) pass pass pass
def train(self): max_acc = 0.0 max_acc_epoch = 0 for epoch in range(1, config.max_epoch + 1): Tools.print(config.save_path) tl, ta = self.train_epoch() vl, va = self.test() Tools.print('epoch:{} lr:{:.4f} train:{:.4f} {:.4f}, val:{:.4f} {:.4f}, max:{:.4f}({:.4f})'.format( epoch, self.lr_scheduler.get_last_lr(), tl, ta, vl, va, max_acc, max_acc_epoch)) if va >= max_acc: Tools.print('A better model is found: {} {}'.format(va)) max_acc = va max_acc_epoch = epoch torch.save(optimizer.state_dict(), os.path.join(config.save_path, 'optimizer_best.pth')) self.save_model(max_acc) pass pass pass
def train(self, max_epoch=100): for epoch in range(max_epoch): running_loss = 0.0 for i, (inputs, _) in enumerate(self.train_loader): inputs = Variable(inputs.cuda()) encoded, linear, softmax, decoded = self.auto_encoder(inputs) loss = self.criterion(decoded, inputs) running_loss += loss.data self.optimizer.zero_grad() loss.backward() self.optimizer.step() pass Tools.print('[%d] loss: %.3f' % (epoch + 1, running_loss / len(self.train_loader))) pass Tools.print('Finished Training') Tools.print('Saving Model...') torch.save(self.auto_encoder.state_dict(), self.checkpoint_path) pass
def train_mlc(self, start_epoch=0, model_file_name=None): if model_file_name is not None: Tools.print("Load model form {}".format(model_file_name), txt_path=self.config.mlc_save_result_txt) self.load_model(model_file_name) pass self.eval_mlc(epoch=0) for epoch in range(start_epoch, self.config.mlc_epoch_num): Tools.print() self._adjust_learning_rate(self.optimizer, epoch, lr=self.config.mlc_lr, change_epoch=self.config.mlc_change_epoch) Tools.print('Epoch:{:03d}, lr={:.5f}'.format(epoch, self.optimizer.param_groups[0]['lr']), txt_path=self.config.mlc_save_result_txt) ########################################################################### # 1 训练模型 all_loss = 0.0 self.net.train() for i, (inputs, labels) in tqdm(enumerate(self.data_loader_mlc_train), total=len(self.data_loader_mlc_train)): inputs, labels = inputs.type(torch.FloatTensor).cuda(), labels.cuda() self.optimizer.zero_grad() result = self.net(inputs) loss = self.bce_loss(result, labels) ###################################################################################################### loss.backward() self.optimizer.step() all_loss += loss.item() pass Tools.print("[E:{:3d}/{:3d}] mlc loss:{:.3f}".format( epoch, self.config.mlc_epoch_num, all_loss/len(self.data_loader_mlc_train)), txt_path=self.config.mlc_save_result_txt) ########################################################################### # 2 保存模型 if epoch % self.config.mlc_save_epoch_freq == 0: Tools.print() save_file_name = Tools.new_dir(os.path.join( self.config.mlc_model_dir, "mlc_{}.pth".format(epoch))) torch.save(self.net.state_dict(), save_file_name) Tools.print("Save Model to {}".format(save_file_name), txt_path=self.config.mlc_save_result_txt) Tools.print() pass ########################################################################### ########################################################################### # 3 评估模型 if epoch % self.config.mlc_eval_epoch_freq == 0: self.eval_mlc(epoch=epoch) pass ########################################################################### pass # Final Save Tools.print() save_file_name = Tools.new_dir(os.path.join( self.config.mlc_model_dir, "mlc_final_{}.pth".format(self.config.mlc_epoch_num))) torch.save(self.net.state_dict(), save_file_name) Tools.print("Save Model to {}".format(save_file_name), txt_path=self.config.mlc_save_result_txt) Tools.print() self.eval_mlc(epoch=self.config.mlc_epoch_num) pass
_data_root_path = '/mnt/4T/Data/cifar/cifar-10' # _data_root_path = '/home/ubuntu/ALISURE/data/cifar' _root_ckpt_dir = "./ckpt2/dgl/4_DGL_CONV/{}-100".format("GCN") _batch_size = 64 _image_size = 32 _sp_size = 4 _epochs = 100 _train_print_freq = 100 _test_print_freq = 50 _num_workers = 8 _use_gpu = True # _gpu_id = "0" _gpu_id = "1" Tools.print( "ckpt:{} batch size:{} image size:{} sp size:{} workers:{} gpu:{}". format(_root_ckpt_dir, _batch_size, _image_size, _sp_size, _num_workers, _gpu_id)) runner = RunnerSPE(data_root_path=_data_root_path, root_ckpt_dir=_root_ckpt_dir, batch_size=_batch_size, image_size=_image_size, sp_size=_sp_size, train_print_freq=_train_print_freq, test_print_freq=_test_print_freq, num_workers=_num_workers, use_gpu=_use_gpu, gpu_id=_gpu_id) runner.train(_epochs) pass
def train(self): self.net.train() iter_num = len(self.train_loader.dataset) // self.batch_size ave_grad = 0 for epoch in range(self.epoch): r_sal_loss = 0 self.net.zero_grad() for i, data_batch in enumerate(self.train_loader): sal_image, sal_label = data_batch['sal_image'], data_batch[ 'sal_label'] if (sal_image.size(2) != sal_label.size(2)) or ( sal_image.size(3) != sal_label.size(3)): Tools.print('IMAGE ERROR, PASSING```') continue sal_image, sal_label = torch.Tensor(sal_image), torch.Tensor( sal_label) if torch.cuda.is_available(): sal_image, sal_label = sal_image.cuda(), sal_label.cuda() sal_pred = self.net(sal_image) sal_loss_fuse = F.binary_cross_entropy_with_logits( sal_pred, sal_label, reduction='sum') sal_loss = sal_loss_fuse / (self.iter_size * self.batch_size) r_sal_loss += sal_loss.data sal_loss.backward() ave_grad += 1 # accumulate gradients as done in DSS if ave_grad % self.iter_size == 0: self.optimizer.step() self.optimizer.zero_grad() ave_grad = 0 pass if i % (self.show_every // self.batch_size) == 0: Tools.print( 'epoch: [{:2d}/{:2d}], lr={:.6f} iter:[{:5d}/{:5d}] || Sal:{:10.4f}' .format(epoch, self.epoch, self.lr, i, iter_num, r_sal_loss / self.show_every)) r_sal_loss = 0 pass pass torch.save(self.net.state_dict(), '{}/epoch_{}.pth'.format(self.save_folder, epoch + 1)) if epoch in self.lr_decay_epoch: self.lr = self.lr * 0.1 self.optimizer = Adam(filter(lambda p: p.requires_grad, self.net.parameters()), lr=self.lr, weight_decay=self.wd) # self.optimizer = Adam(self.net.parameters(), lr=self.lr, weight_decay=self.wd) pass pass torch.save(self.net.state_dict(), '{}/final.pth'.format(self.save_folder)) pass
""" # _run_name = "run-6" # _model_path = './results/{}/epoch_22.pth'.format(_run_name) _run_name = "run-Res50-2" _model_path = './results/{}/epoch_1.pth'.format(_run_name) _result_fold = Tools.new_dir("./results/test/{}/{}".format( _run_name, _sal_mode)) _dataset = ImageDataTest(_sal_mode) _test_loader = data.DataLoader(dataset=_dataset, batch_size=1, shuffle=False, num_workers=1) Solver.test(_arch, _model_path, _test_loader, _result_fold) label_list = [ os.path.join(_dataset.data_source["mask_root"], "{}.png".format(os.path.splitext(_)[0])) for _ in _dataset.image_list ] eval_list = [ os.path.join(_result_fold, "{}.png".format(os.path.splitext(_)[0])) for _ in _dataset.image_list ] mae, score_max, score_mean = Solver.eval(label_list, eval_list) Tools.print("{} {} {}".format(mae, score_max, score_mean)) pass pass
def train(self, start_epoch): for epoch in range(start_epoch, Config.max_epoch): Tools.print() self._train_one_epoch(epoch) pass pass
def _train_epoch(self): self.model.train() # 统计 th_num = 25 epoch_loss, epoch_loss1, epoch_loss2, nb_data = 0, 0, 0, 0 epoch_mae, epoch_prec, epoch_recall = 0.0, np.zeros( shape=(th_num, )) + 1e-6, np.zeros(shape=(th_num, )) + 1e-6 epoch_mae2, epoch_prec2, epoch_recall2 = 0.0, np.zeros( shape=(th_num, )) + 1e-6, np.zeros(shape=(th_num, )) + 1e-6 # Run iter_size = 10 self.model.zero_grad() tr_num = len(self.train_loader) for i, (images, _, labels_sod, batched_graph, batched_pixel_graph, segments, _, _) in enumerate(self.train_loader): # Data images = images.float().to(self.device) labels = batched_graph.y.to(self.device) labels_sod = torch.unsqueeze(torch.Tensor(labels_sod), dim=1).to(self.device) batched_graph.batch = batched_graph.batch.to(self.device) batched_graph.edge_index = batched_graph.edge_index.to(self.device) batched_pixel_graph.batch = batched_pixel_graph.batch.to( self.device) batched_pixel_graph.edge_index = batched_pixel_graph.edge_index.to( self.device) batched_pixel_graph.data_where = batched_pixel_graph.data_where.to( self.device) gcn_logits, gcn_logits_sigmoid, sod_logits, sod_logits_sigmoid = self.model.forward( images, batched_graph, batched_pixel_graph) loss_fuse1 = F.binary_cross_entropy_with_logits(sod_logits, labels_sod, reduction='sum') loss_fuse2 = F.binary_cross_entropy_with_logits(gcn_logits, labels, reduction='sum') loss = (loss_fuse1 + loss_fuse2) / iter_size loss.backward() if (i + 1) % iter_size == 0: self.optimizer.step() self.optimizer.zero_grad() pass labels_val = labels.cpu().detach().numpy() labels_sod_val = labels_sod.cpu().detach().numpy() gcn_logits_sigmoid_val = gcn_logits_sigmoid.cpu().detach().numpy() sod_logits_sigmoid_val = sod_logits_sigmoid.cpu().detach().numpy() # Stat nb_data += images.size(0) epoch_loss += loss.detach().item() epoch_loss1 += loss_fuse1.detach().item() epoch_loss2 += loss_fuse2.detach().item() # cal 1 mae = self._eval_mae(sod_logits_sigmoid_val, labels_sod_val) prec, recall = self._eval_pr(sod_logits_sigmoid_val, labels_sod_val, th_num) epoch_mae += mae epoch_prec += prec epoch_recall += recall # cal 2 mae2 = self._eval_mae(gcn_logits_sigmoid_val, labels_val) prec2, recall2 = self._eval_pr(gcn_logits_sigmoid_val, labels_val, th_num) epoch_mae2 += mae2 epoch_prec2 += prec2 epoch_recall2 += recall2 # Print if i % self.train_print_freq == 0: Tools.print( "{:4d}-{:4d} loss={:.4f}({:.4f}+{:.4f})-{:.4f}({:.4f}+{:.4f}) " "sod-mse={:.4f}({:.4f}) gcn-mse={:.4f}({:.4f})".format( i, tr_num, loss.detach().item(), loss_fuse1.detach().item(), loss_fuse2.detach().item(), epoch_loss / (i + 1), epoch_loss1 / (i + 1), epoch_loss2 / (i + 1), mae, epoch_mae / (i + 1), mae2, epoch_mae2 / nb_data)) pass pass # 结果 avg_loss, avg_loss1, avg_loss2 = epoch_loss / tr_num, epoch_loss1 / tr_num, epoch_loss2 / tr_num avg_mae, avg_prec, avg_recall = epoch_mae / tr_num, epoch_prec / tr_num, epoch_recall / tr_num score = (1 + 0.3) * avg_prec * avg_recall / (0.3 * avg_prec + avg_recall) avg_mae2, avg_prec2, avg_recall2 = epoch_mae2 / nb_data, epoch_prec2 / nb_data, epoch_recall2 / nb_data score2 = (1 + 0.3) * avg_prec2 * avg_recall2 / (0.3 * avg_prec2 + avg_recall2) return avg_loss, avg_loss1, avg_loss2, avg_mae, score.max( ), avg_mae2, score2.max()
def __init__(self, data_root_path, down_ratio=4, sp_size=4, train_print_freq=100, test_print_freq=50, root_ckpt_dir="./ckpt2/norm3", lr=None, num_workers=8, use_gpu=True, gpu_id="1", has_bn=True, normalize=True, residual=False, concat=True, weight_decay=0.0, is_sgd=False): self.train_print_freq = train_print_freq self.test_print_freq = test_print_freq self.device = gpu_setup(use_gpu=use_gpu, gpu_id=gpu_id) self.root_ckpt_dir = Tools.new_dir(root_ckpt_dir) self.train_dataset = MyDataset(data_root_path=data_root_path, is_train=True, down_ratio=down_ratio, sp_size=sp_size) self.test_dataset = MyDataset(data_root_path=data_root_path, is_train=False, down_ratio=down_ratio, sp_size=sp_size) self.train_loader = DataLoader( self.train_dataset, batch_size=1, shuffle=True, num_workers=num_workers, collate_fn=self.train_dataset.collate_fn) self.test_loader = DataLoader(self.test_dataset, batch_size=1, shuffle=False, num_workers=num_workers, collate_fn=self.test_dataset.collate_fn) self.model = MyGCNNet(has_bn=has_bn, normalize=normalize, residual=residual, concat=concat).to(self.device) self.model.vgg16.load_pretrained_model( pretrained_model= "/mnt/4T/ALISURE/SOD/PoolNet/pretrained/vgg16-397923af.pth") if is_sgd: # self.lr_s = [[0, 0.001], [50, 0.0001], [90, 0.00001]] self.lr_s = [[0, 0.01], [50, 0.001], [90, 0.0001] ] if lr is None else lr self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr_s[0][1], momentum=0.9, weight_decay=weight_decay) else: self.lr_s = [[0, 0.001], [50, 0.0001], [90, 0.00001] ] if lr is None else lr self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr_s[0][1], weight_decay=weight_decay) Tools.print("Total param: {} lr_s={} Optimizer={}".format( self._view_model_param(self.model), self.lr_s, self.optimizer)) self._print_network(self.model) self.loss_class = nn.BCELoss().to(self.device) pass
def train(self): Tools.print() Tools.print("Training...") for epoch in range(1, 1 + Config.train_epoch): self.proto_net.train() Tools.print() all_loss = 0.0 pn_lr = self.adjust_learning_rate(self.proto_net_optim, epoch, Config.first_epoch, Config.t_epoch, Config.learning_rate) Tools.print('Epoch: [{}] pn_lr={}'.format(epoch, pn_lr)) for task_data, task_labels, task_index in tqdm( self.task_train_loader): task_data, task_labels = RunnerTool.to_cuda( task_data), RunnerTool.to_cuda(task_labels) # 1 calculate features out = self.proto(task_data) # 2 loss loss = self.loss(out, task_labels) all_loss += loss.item() # 3 backward self.proto_net.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.proto_net.parameters(), 0.5) self.proto_net_optim.step() ########################################################################### pass ########################################################################### # print Tools.print("{:6} loss:{:.3f}".format( epoch, all_loss / len(self.task_train_loader))) ########################################################################### ########################################################################### # Val if epoch % Config.val_freq == 0: Tools.print() Tools.print("Test {} {} .......".format( epoch, Config.model_name)) self.proto_net.eval() val_accuracy = self.test_tool.val(episode=epoch, is_print=True) if val_accuracy > self.best_accuracy: self.best_accuracy = val_accuracy torch.save(self.proto_net.state_dict(), Config.pn_dir) Tools.print("Save networks for epoch: {}".format(epoch)) pass pass ########################################################################### pass pass
def load_model(self, model_file_name): self.model.load_state_dict(torch.load(model_file_name), strict=False) Tools.print('Load Model: {}'.format(model_file_name)) pass
def load_model(self, model_file_name): Tools.print("Load model form {}".format(model_file_name), txt_path=self.config.mlc_save_result_txt) checkpoint = torch.load(model_file_name) self.net.load_state_dict(checkpoint, strict=True) Tools.print("Restore from {}".format(model_file_name), txt_path=self.config.mlc_save_result_txt) pass
def print_info(self): Tools.print() Tools.print("Dataset: {}, Model: {}".format(self.dataset_name, self.model_name)) Tools.print("Training Graphs: {}".format(len(self.dataset.train))) Tools.print("Validation Graphs: {}".format(len(self.dataset.val))) Tools.print("Test Graphs: {}".format(len(self.dataset.test))) Tools.print("Number of Classes: {}".format(self.n_classes)) Tools.print() Tools.print("Params: \n{}".format(str(self))) Tools.print() pass
def load_model(self, model_file_name): ckpt = torch.load(model_file_name, map_location=self.device) self.model.load_state_dict(ckpt, strict=False) Tools.print('Load Model: {}'.format(model_file_name)) pass
_gpu_id = "0" # _gpu_id = "1" # _epochs = 100 # _is_sgd = False _weight_decay = 0.0 _epochs = 150 _is_sgd = True # _weight_decay = 5e-4 _improved = False _has_bn = True _has_residual = True _is_normalize = True Tools.print("epochs:{} ckpt:{} batch size:{} image size:{} sp size:{} workers:{} gpu:{} " "has_residual:{} is_normalize:{} has_bn:{} improved:{} is_sgd:{} weight_decay:{}".format( _epochs, _root_ckpt_dir, _batch_size, _image_size, _sp_size, _num_workers, _gpu_id, _has_residual, _is_normalize, _has_bn, _improved, _is_sgd, _weight_decay)) runner = RunnerSPE(data_root_path=_data_root_path, root_ckpt_dir=_root_ckpt_dir, batch_size=_batch_size, image_size=_image_size, sp_size=_sp_size, is_sgd=_is_sgd, residual=_has_residual, normalize=_is_normalize, has_bn=_has_bn, improved=_improved, weight_decay=_weight_decay, train_print_freq=_train_print_freq, test_print_freq=_test_print_freq, num_workers=_num_workers, use_gpu=_use_gpu, gpu_id=_gpu_id) runner.train(_epochs) pass
def train(self): Tools.print() Tools.print("Training...") for epoch in range(Config.train_epoch): self.matching_net.train() Tools.print() all_loss = 0.0 for task_data, task_labels, task_index in tqdm( self.task_train_loader): task_data, task_labels = RunnerTool.to_cuda( task_data), RunnerTool.to_cuda(task_labels) # 1 calculate features predicts = self.matching(task_data) # 2 loss loss = self.loss(predicts, task_labels) all_loss += loss.item() # 3 backward self.matching_net.zero_grad() loss.backward() self.matching_net_optim.step() ########################################################################### pass ########################################################################### # print Tools.print("{:6} loss:{:.3f} lr:{}".format( epoch + 1, all_loss / len(self.task_train_loader), self.matching_net_scheduler.get_last_lr())) self.matching_net_scheduler.step() ########################################################################### ########################################################################### # Val if epoch % Config.val_freq == 0: Tools.print() Tools.print("Test {} {} .......".format( epoch, Config.model_name)) self.matching_net.eval() val_accuracy = self.test_tool.val(episode=epoch, is_print=True) if val_accuracy > self.best_accuracy: self.best_accuracy = val_accuracy torch.save(self.matching_net.state_dict(), Config.mn_dir) Tools.print("Save networks for epoch: {}".format(epoch)) pass pass ########################################################################### pass pass
def load_model(self): if os.path.exists(Config.pn_dir): self.proto_net.load_state_dict(torch.load(Config.pn_dir)) Tools.print("load proto net success from {}".format(Config.pn_dir)) pass
def train(self): Tools.print() best_accuracy = 0.0 Tools.print("Training...", txt_path=self.config.log_file) # Init Update try: self.matching_net.eval() self.ic_model.eval() Tools.print("Init label {} .......", txt_path=self.config.log_file) self.produce_class.reset() for task_data, task_labels, task_index, task_ok in tqdm( self.task_train_loader): ic_labels = RunnerTool.to_cuda(task_index[:, -1]) task_data, task_labels = RunnerTool.to_cuda( task_data), RunnerTool.to_cuda(task_labels) ic_out_logits, ic_out_l2norm = self.ic_model(task_data[:, -1]) self.produce_class.cal_label(ic_out_l2norm, ic_labels) pass Tools.print("Epoch: {}/{}".format(self.produce_class.count, self.produce_class.count_2), txt_path=self.config.log_file) finally: pass for epoch in range(1, 1 + self.config.train_epoch): self.matching_net.train() self.ic_model.train() Tools.print() mn_lr = self.config.adjust_learning_rate(self.matching_net_optim, epoch, self.config.first_epoch, self.config.t_epoch, self.config.learning_rate) ic_lr = self.config.adjust_learning_rate(self.ic_model_optim, epoch, self.config.first_epoch, self.config.t_epoch, self.config.learning_rate) Tools.print('Epoch: [{}] mn_lr={} ic_lr={}'.format( epoch, mn_lr, ic_lr), txt_path=self.config.log_file) self.produce_class.reset() Tools.print(self.task_train.classes) is_ok_total, is_ok_acc = 0, 0 all_loss, all_loss_fsl, all_loss_ic = 0.0, 0.0, 0.0 for task_data, task_labels, task_index, task_ok in tqdm( self.task_train_loader): ic_labels = RunnerTool.to_cuda(task_index[:, -1]) task_data, task_labels = RunnerTool.to_cuda( task_data), RunnerTool.to_cuda(task_labels) ########################################################################### # 1 calculate features relations = self.matching(task_data) ic_out_logits, ic_out_l2norm = self.ic_model(task_data[:, -1]) # 2 ic_targets = self.produce_class.get_label(ic_labels) self.produce_class.cal_label(ic_out_l2norm, ic_labels) # 3 loss loss_fsl = self.fsl_loss(relations, task_labels) loss_ic = self.ic_loss(ic_out_logits, ic_targets) loss = loss_fsl * self.config.loss_fsl_ratio + loss_ic * self.config.loss_ic_ratio all_loss += loss.item() all_loss_fsl += loss_fsl.item() all_loss_ic += loss_ic.item() # 4 backward self.ic_model.zero_grad() loss_ic.backward() self.ic_model_optim.step() self.matching_net.zero_grad() loss_fsl.backward() self.matching_net_optim.step() # is ok is_ok_acc += torch.sum(torch.cat(task_ok)) is_ok_total += torch.prod( torch.tensor(torch.cat(task_ok).shape)) ########################################################################### pass ########################################################################### # print Tools.print( "{:6} loss:{:.3f} fsl:{:.3f} ic:{:.3f} ok:{:.3f}({}/{})". format(epoch + 1, all_loss / len(self.task_train_loader), all_loss_fsl / len(self.task_train_loader), all_loss_ic / len(self.task_train_loader), int(is_ok_acc) / int(is_ok_total), is_ok_acc, is_ok_total), txt_path=self.config.log_file) Tools.print("Train: [{}] {}/{}".format(epoch, self.produce_class.count, self.produce_class.count_2), txt_path=self.config.log_file) ########################################################################### ########################################################################### # Val if epoch % self.config.val_freq == 0: self.matching_net.eval() self.ic_model.eval() self.test_tool_ic.val(epoch=epoch) val_accuracy = self.test_tool_fsl.val(episode=epoch, is_print=True) if val_accuracy > best_accuracy: best_accuracy = val_accuracy torch.save(self.matching_net.state_dict(), Tools.new_dir(self.config.mn_dir)) torch.save(self.ic_model.state_dict(), Tools.new_dir(self.config.ic_dir)) Tools.print("Save networks for epoch: {}".format(epoch), txt_path=self.config.log_file) pass pass ########################################################################### pass pass
_improved = True _has_bn = True _has_residual = True _is_normalize = True _concat = True _sp_size, _down_ratio, _model_name = 4, 4, "C2PC2PC3C3C3" _name = "E2E2-Pretrain_temp_BS1-MoreConv-{}_{}_lr0001".format( _model_name, _is_sgd) _root_ckpt_dir = "./ckpt2/dgl/1_PYG_CONV_Fast-SOD_BAS_Temp/{}".format( _name) Tools.print( "name:{} epochs:{} ckpt:{} sp size:{} down_ratio:{} workers:{} gpu:{} " "has_residual:{} is_normalize:{} has_bn:{} improved:{} concat:{} is_sgd:{} weight_decay:{}" .format(_name, _epochs, _root_ckpt_dir, _sp_size, _down_ratio, _num_workers, _gpu_id, _has_residual, _is_normalize, _has_bn, _improved, _concat, _is_sgd, _weight_decay)) runner = RunnerSPE(data_root_path=_data_root_path, root_ckpt_dir=_root_ckpt_dir, sp_size=_sp_size, is_sgd=_is_sgd, lr=_lr, residual=_has_residual, normalize=_is_normalize, down_ratio=_down_ratio, has_bn=_has_bn, concat=_concat, weight_decay=_weight_decay, train_print_freq=_train_print_freq,
def __init__(self, gpu_id=1, dataset_name=MyDataset.dataset_name_miniimagenet, is_conv_4=True, is_res34=True, is_modify_head=True): self.gpu_id = gpu_id os.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu_id) self.dataset_name = dataset_name self.is_conv_4 = is_conv_4 self.is_res34 = is_res34 self.modify_head = is_modify_head self.num_workers = 8 self.num_way = 5 self.num_shot = 1 self.val_freq = 10 self.episode_size = 15 self.test_episode = 600 self.ic_out_dim = 512 self.ic_ratio = 1 self.learning_rate = 0.01 self.loss_fsl_ratio = 1.0 self.loss_ic_ratio = 1.0 ############################################################################################### self.train_epoch = 1500 self.first_epoch, self.t_epoch = 300, 200 self.adjust_learning_rate = RunnerTool.adjust_learning_rate1 ############################################################################################### ############################################################################################### self.is_png = True self.data_root = MyDataset.get_data_root( dataset_name=self.dataset_name, is_png=self.is_png) self.transform_train_ic, self.transform_train_fsl, self.transform_test = MyTransforms.get_transform( dataset_name=self.dataset_name, has_ic=True, is_fsl_simple=True, is_css=False) if self.is_res34: self.resnet = resnet34 self.ic_net_name = "res34{}".format( "_head" if self.modify_head else "") else: self.resnet = resnet18 self.ic_net_name = "res18{}".format( "_head" if self.modify_head else "") pass if self.is_conv_4: self.matching_net, self.batch_size, self.e_net_name = C4Net( hid_dim=64, z_dim=64), 64, "C4" else: self.matching_net, self.batch_size, self.e_net_name = ResNet12Small( avg_pool=True, drop_rate=0.1), 32, "R12S" ############################################################################################### self.model_name = "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}{}".format( self.gpu_id, self.ic_net_name, self.e_net_name, self.train_epoch, self.batch_size, self.num_way, self.num_shot, self.first_epoch, self.t_epoch, self.ic_out_dim, self.ic_ratio, self.loss_fsl_ratio, self.loss_ic_ratio, "_png" if self.is_png else "") self.time = Tools.get_format_time() _root_path = "../models_abl/{}/mn".format(self.dataset_name) self.mn_dir = "{}/{}_{}_mn.pkl".format(_root_path, self.time, self.model_name) self.ic_dir = "{}/{}_{}_ic.pkl".format(_root_path, self.time, self.model_name) self.log_file = self.ic_dir.replace(".pkl", ".txt") Tools.print(self.data_root, txt_path=self.log_file) Tools.print(self.model_name, txt_path=self.log_file) Tools.print(self.mn_dir, txt_path=self.log_file) Tools.print(self.ic_dir, txt_path=self.log_file) pass
def train(self, epochs, start_epoch=0): test_loss, test_loss1, test_loss2, test_mae, test_score, test_mae2, test_score2 = self.test( ) Tools.print( 'E:{:2d}, Test sod-mae-score={:.4f}-{:.4f} ' 'gcn-mae-score={:.4f}-{:.4f} loss={:.4f}({:.4f}+{:.4f})'.format( 0, test_mae, test_score, test_mae2, test_score2, test_loss, test_loss1, test_loss2)) for epoch in range(start_epoch, epochs): Tools.print() Tools.print("Start Epoch {}".format(epoch)) self._lr(epoch) Tools.print('Epoch:{:02d},lr={:.4f}'.format( epoch, self.optimizer.param_groups[0]['lr'])) (train_loss, train_loss1, train_loss2, train_mae, train_score, train_mae2, train_score2) = self._train_epoch() self._save_checkpoint(self.model, self.root_ckpt_dir, epoch) test_loss, test_loss1, test_loss2, test_mae, test_score, test_mae2, test_score2 = self.test( ) Tools.print( 'E:{:2d}, Train sod-mae-score={:.4f}-{:.4f} ' 'gcn-mae-score={:.4f}-{:.4f} loss={:.4f}({:.4f}+{:.4f})'. format(epoch, train_mae, train_score, train_mae2, train_score2, train_loss, train_loss1, train_loss2)) Tools.print( 'E:{:2d}, Test sod-mae-score={:.4f}-{:.4f} ' 'gcn-mae-score={:.4f}-{:.4f} loss={:.4f}({:.4f}+{:.4f})'. format(epoch, test_mae, test_score, test_mae2, test_score2, test_loss, test_loss1, test_loss2)) pass pass
def train(self): Tools.print() Tools.print("Training...") # Init Update try: self.feature_encoder.eval() self.ic_model.eval() Tools.print("Init label {} .......") self.produce_class.reset() for image, label, idx in tqdm(self.ic_train_loader): image, idx = self.to_cuda(image), self.to_cuda(idx) features = self.feature_encoder(image) # 5x64*19*19 ic_out_logits, ic_out_l2norm = self.ic_model(features) self.produce_class.cal_label(ic_out_l2norm, idx) pass Tools.print("Epoch: {}/{}".format(self.produce_class.count, self.produce_class.count_2)) finally: pass for epoch in range(Config.train_epoch): self.feature_encoder.train() self.ic_model.train() Tools.print() fe_lr = self.adjust_learning_rate(self.feature_encoder_optim, epoch) ic_lr = self.adjust_learning_rate(self.ic_model_optim, epoch) Tools.print('Epoch: [{}] fe_lr={} ic_lr={}'.format( epoch, fe_lr, ic_lr)) all_loss = 0.0 self.produce_class.reset() for image, label, idx in tqdm(self.ic_train_loader): image, label, idx = self.to_cuda(image), self.to_cuda( label), self.to_cuda(idx) ########################################################################### # 1 calculate features features = self.feature_encoder(image) # 5x64*19*19 ic_out_logits, ic_out_l2norm = self.ic_model(features) ic_targets = self.produce_class.get_label(idx) self.produce_class.cal_label(ic_out_l2norm, idx) # 2 loss loss = self.ic_loss(ic_out_logits, ic_targets) all_loss += loss.item() # 3 backward self.feature_encoder.zero_grad() self.ic_model.zero_grad() loss.backward() self.feature_encoder_optim.step() self.ic_model_optim.step() ########################################################################### pass ########################################################################### # print Tools.print("{:6} loss:{:.3f}".format( epoch + 1, all_loss / len(self.ic_train_loader))) Tools.print("Train: [{}] {}/{}".format(epoch, self.produce_class.count, self.produce_class.count_2)) ########################################################################### ########################################################################### # Val if epoch % Config.val_freq == 0: self.feature_encoder.eval() self.ic_model.eval() val_accuracy = self.test_tool_ic.val(epoch=epoch) if val_accuracy > self.best_accuracy: self.best_accuracy = val_accuracy torch.save(self.feature_encoder.state_dict(), Config.fe_dir) torch.save(self.ic_model.state_dict(), Config.ic_dir) Tools.print("Save networks for epoch: {}".format(epoch)) pass pass ########################################################################### pass pass
def test(self, model_file=None, is_train_loader=False): if model_file: self.load_model(model_file_name=model_file) self.model.train() Tools.print() th_num = 25 # 统计 epoch_test_loss, epoch_test_loss1, epoch_test_loss2, nb_data = 0, 0, 0, 0 epoch_test_mae, epoch_test_mae2 = 0.0, 0.0 epoch_test_prec, epoch_test_recall = np.zeros( shape=(th_num, )) + 1e-6, np.zeros(shape=(th_num, )) + 1e-6 epoch_test_prec2, epoch_test_recall2 = np.zeros( shape=(th_num, )) + 1e-6, np.zeros(shape=(th_num, )) + 1e-6 loader = self.train_loader if is_train_loader else self.test_loader tr_num = len(loader) with torch.no_grad(): for i, (images, _, labels_sod, batched_graph, batched_pixel_graph, segments, _, _) in enumerate(loader): # Data images = images.float().to(self.device) labels = batched_graph.y.to(self.device) labels_sod = torch.unsqueeze(torch.Tensor(labels_sod), dim=1).to(self.device) batched_graph.batch = batched_graph.batch.to(self.device) batched_graph.edge_index = batched_graph.edge_index.to( self.device) batched_pixel_graph.batch = batched_pixel_graph.batch.to( self.device) batched_pixel_graph.edge_index = batched_pixel_graph.edge_index.to( self.device) batched_pixel_graph.data_where = batched_pixel_graph.data_where.to( self.device) _, gcn_logits_sigmoid, _, sod_logits_sigmoid = self.model.forward( images, batched_graph, batched_pixel_graph) loss1 = self.loss_bce(gcn_logits_sigmoid, labels) loss2 = self.loss_bce(sod_logits_sigmoid, labels_sod) loss = loss1 + loss2 labels_val = labels.cpu().detach().numpy() labels_sod_val = labels_sod.cpu().detach().numpy() gcn_logits_sigmoid_val = gcn_logits_sigmoid.cpu().detach( ).numpy() sod_logits_sigmoid_val = sod_logits_sigmoid.cpu().detach( ).numpy() # Stat nb_data += images.size(0) epoch_test_loss += loss.detach().item() epoch_test_loss1 += loss1.detach().item() epoch_test_loss2 += loss2.detach().item() # cal 1 mae = self._eval_mae(sod_logits_sigmoid_val, labels_sod_val) prec, recall = self._eval_pr(sod_logits_sigmoid_val, labels_sod_val, th_num) epoch_test_mae += mae epoch_test_prec += prec epoch_test_recall += recall # cal 2 mae2 = self._eval_mae(gcn_logits_sigmoid_val, labels_val) prec2, recall2 = self._eval_pr(gcn_logits_sigmoid_val, labels_val, th_num) epoch_test_mae2 += mae2 epoch_test_prec2 += prec2 epoch_test_recall2 += recall2 # Print if i % self.test_print_freq == 0: Tools.print( "{:4d}-{:4d} loss={:.4f}({:.4f}+{:.4f})-{:.4f}({:.4f}+{:.4f}) " "sod-mse={:.4f}({:.4f}) gcn-mse={:.4f}({:.4f})".format( i, len(loader), loss.detach().item(), loss1.detach().item(), loss2.detach().item(), epoch_test_loss / (i + 1), epoch_test_loss1 / (i + 1), epoch_test_loss2 / (i + 1), mae, epoch_test_mae / (i + 1), mae2, epoch_test_mae2 / nb_data)) pass pass pass # 结果1 avg_loss, avg_loss1, avg_loss2 = epoch_test_loss / tr_num, epoch_test_loss1 / tr_num, epoch_test_loss2 / tr_num avg_mae, avg_prec, avg_recall = epoch_test_mae / tr_num, epoch_test_prec / tr_num, epoch_test_recall / tr_num score = (1 + 0.3) * avg_prec * avg_recall / (0.3 * avg_prec + avg_recall) avg_mae2, avg_prec2, avg_recall2 = epoch_test_mae2 / nb_data, epoch_test_prec2 / nb_data, epoch_test_recall2 / nb_data score2 = (1 + 0.3) * avg_prec2 * avg_recall2 / (0.3 * avg_prec2 + avg_recall2) return avg_loss, avg_loss1, avg_loss2, avg_mae, score.max( ), avg_mae2, score2.max()
def load_model(self): if os.path.exists(Config.ic_dir): self.ic_model.load_state_dict(torch.load(Config.ic_dir)) Tools.print("load ic model success from {}".format(Config.ic_dir)) pass