def classify_single_image(image_path): # model loading model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], NUM_CLASSES) model.eval() if USE_GPU: cudnn.enabled = True softmax = nn.Softmax().cuda() model.cuda() saved_state_dict = torch.load(MODEL_PATH) else: softmax = nn.Softmax() saved_state_dict = torch.load(MODEL_PATH, map_location='cpu') load_filtered_state_dict(model, saved_state_dict, ignore_layer=[], reverse=False, gpu=cudnn.enabled) transformations = transforms.Compose( [transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), transforms.ToTensor()]) imgs = torch.FloatTensor(1, 3, IMAGE_SIZE, IMAGE_SIZE) if USE_GPU: imgs = imgs.cuda() #image loading imgs[0] = transformations(Image.open(image_path).convert("RGB")) pred = model(imgs) pred = softmax(pred) print(pred.cpu().detach().numpy()) _, pred_1 = pred.topk(1, 1, True, True) c = default_class[pred_1.cpu().numpy()[0][0]] print("{} -- {}".format(image_path, c))
def main(args): model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], args.num_classes) saved_state_dict = torch.load(args.saved_model) transformations = transforms.Compose([ transforms.Resize((args.image_size, args.image_size)), transforms.ToTensor() ]) if args.gpu[0] >= 0: cudnn.enabled = True softmax = nn.Softmax().cuda() model.cuda() else: softmax = nn.Softmax() load_filtered_state_dict(model, saved_state_dict, ignore_layer=[], reverse=True) test_x, test_y, classes_names = get_dataset(args.test_data_dir) test_dataset = DataWrapper(test_x, test_y, transformations, augumentation=False) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=1) classes, indices = np.unique(test_y, return_index=True) #n = (test_dataset.__len__() + args.batch_size - 1) / args.batch_size * args.batch_size n = test_dataset.__len__() y_pred = np.zeros((n)) y = np.zeros((n)) count = 0 for i, (images, labels, names) in enumerate(test_loader): images = Variable(images) labels = Variable(labels) if args.gpu[0] >= 0: images = images.cuda() labels = labels.cuda() label_pred = model(images) label_pred = softmax(label_pred) n = images.size()[0] _, label_pred = label_pred.topk(1, 1, True, True) y_pred[count:count + n] = label_pred.view(-1).cpu().numpy() y[count:count + n] = labels.data.cpu().numpy() count += n plot(y, y_pred, classes_names)
def __init__(self, cfg): super().__init__() body = resnet.ResNet(cfg) fpn = fpn_module.FPN(in_channels_list=[0, 512, 1024, 2048], out_channels=256) self.backbone = nn.Sequential( OrderedDict([("body", body), ("fpn", fpn)])) self.head = PAAHead(cfg) self.paa_loss = PAALoss(cfg) self.anchor_generator = AnchorGenerator(cfg)
def main(args): cudnn.enabled = True print('Loading data.') transformations = transforms.Compose([ transforms.Resize(240), transforms.RandomCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_x, train_y, classes_names = get_dataset(args.trainning_data_dir) test_x, test_y, _ = get_dataset(args.validation_data_dir) num_classes = len(classes_names) trainning_dataset = DataWrapper(train_x, train_y, transformations) eval_dataset = DataWrapper(test_x, test_y, transformations) train_loader = torch.utils.data.DataLoader(dataset=trainning_dataset, batch_size=args.batch_size, shuffle=True, num_workers=16) eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=args.batch_size, shuffle=True, num_workers=16) n = trainning_dataset.__len__() print(n) # ResNet50 structure model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], num_classes) if args.saved_model: print('Loading model.') saved_state_dict = torch.load(args.saved_model) # 'origin model from pytorch' if 'resnet' in args.saved_model: load_filtered_state_dict(model, saved_state_dict, ignore_layer=[], reverse=False) else: load_filtered_state_dict(model, saved_state_dict, ignore_layer=[]) crossEntropyLoss = nn.CrossEntropyLoss().cuda() softmax = nn.Softmax().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # multi-gpu model = nn.DataParallel(model, device_ids=[0, 1]) model.cuda() Save_model = SaveBestModel(save_dir=args.save_path) Writer = SummaryWriter() step = 0 for epoch in range(args.num_epochs): evaluate(eval_loader, model, Writer, step, Save_model, epoch) step = train(train_loader, model, crossEntropyLoss, optimizer, Writer, args.batch_size, epoch, step, n)
def eval(): with tf.variable_scope(FLAGS.resnet): images, labels, num_classes = dataset_reader.build_input( FLAGS.test_batch_size, 'val', dataset='places365', color_switch=FLAGS.color_switch, blur=0, multicrops_for_eval=FLAGS.test_with_multicrops) model = resnet.ResNet(num_classes, None, None, None, resnet=FLAGS.resnet, mode='test', float_type=tf.float32) logits = model.inference(images) model.compute_loss(labels + FLAGS.labels_offset, logits) precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32), labels + FLAGS.labels_offset, 1) precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32)) if FLAGS.test_with_multicrops == 1: precisions = tf.nn.in_top_k( [tf.reduce_mean(model.predictions, axis=[0])], [labels[0]], 1) precision_op = tf.cast(precisions, tf.float32) # ========================= end of building model ================================ gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if FLAGS.pre_trained_filename is not None and FLAGS.finetuned_filename is not None: last_layer_variables = [] finetuned_variables = [] for v in tf.global_variables(): if 'Momentum' in v.name: continue if v.name.find('logits') > 0: last_layer_variables.append(v) print('last layer\'s variables: %s' % v.name) continue print('finetuned variables:', v.name) finetuned_variables.append(v) loader1 = tf.train.Saver(var_list=finetuned_variables) loader1.restore(sess, FLAGS.finetuned_filename) loader2 = tf.train.Saver(var_list=last_layer_variables) loader2.restore(sess, FLAGS.pre_trained_filename) print('Succesfully loaded model from %s and %s.' % (FLAGS.finetuned_filename, FLAGS.pre_trained_filename)) elif FLAGS.pre_trained_filename is not None: loader = tf.train.Saver() loader.restore(sess, FLAGS.pre_trained_filename) print('Succesfully loaded model from %s.' % FLAGS.pre_trained_filename) else: print('No models loaded...') print( '======================= eval process begins =========================' ) average_loss = 0.0 average_precision = 0.0 if FLAGS.test_max_iter is None: max_iter = dataset_reader.num_per_epoche( 'eval', 'places365') // FLAGS.test_batch_size else: max_iter = FLAGS.test_max_iter if FLAGS.test_with_multicrops == 1: max_iter = dataset_reader.num_per_epoche('eval', 'places365') step = 0 while step < max_iter: step += 1 loss, precision = sess.run([model.loss, precision_op]) average_loss += loss average_precision += precision if step % 100 == 0: print(step, '/', max_iter, ':', average_loss / step, average_precision / step) elif step % 10 == 0: print(step, '/', max_iter, ':', average_loss / step, average_precision / step) # 10 / 365 : 1.82831767797 0.541999986768 # 20 / 365 : 1.83651441932 0.543499980867 # 30 / 365 : 1.82160102526 0.53766664962 # 40 / 365 : 1.82607512772 0.540499981493 # 50 / 365 : 1.81884565115 0.537599982023 # 60 / 365 : 1.8105583032 0.54083331277 # 70 / 365 : 1.81942391736 0.537714266351 # 80 / 365 : 1.82481645197 0.53937498033 # 90 / 365 : 1.8170008858 0.539888870385 # 100 / 365 : 1.82069332361 0.538299981654 # 110 / 365 : 1.81809715033 0.538909073309 # 120 / 365 : 1.81752947768 0.539916648219 # 130 / 365 : 1.81486196976 0.541692289939 # 140 / 365 : 1.81867902449 0.540785696464 # 150 / 365 : 1.81397258838 0.542666648626 # 160 / 365 : 1.81897332892 0.541937481798 # 170 / 365 : 1.82705717648 0.540705863868 # 180 / 365 : 1.83086211814 0.538833316167 # 190 / 365 : 1.83226556652 0.537999982426 # 200 / 365 : 1.83346488535 0.537949982285 # 210 / 365 : 1.82859564338 0.538428553229 # 220 / 365 : 1.8310897605 0.537454527617 # 230 / 365 : 1.83356597475 0.536869547289 # 240 / 365 : 1.8296460549 0.537999982511 # 250 / 365 : 1.82880885744 0.538159982085 # 260 / 365 : 1.8355158627 0.537038444097 # 270 / 365 : 1.83302205448 0.537518500951 # 280 / 365 : 1.83109659127 0.537464267867 # 290 / 365 : 1.83024783546 0.537206878539 # 300 / 365 : 1.82938882232 0.537733315627 # 310 / 365 : 1.8317064889 0.537225788351 # 320 / 365 : 1.83259406239 0.537249981891 # 330 / 365 : 1.83219493519 0.53715149688 # 340 / 365 : 1.83274422358 0.536823511211 # 350 / 365 : 1.83405183656 0.536828553166 # 360 / 365 : 1.83265232974 0.536805537095 # 2018-04-05 17:05:19.543344 18642] Step 0 Test # loss = 1.8364, precision = 0.5362 # 2018-04-05 17:42:57.255770 26263] Step 0 Test (Multi-crops) # loss = 1.8639, precision = 0.5472 coord.request_stop() coord.join(threads) return average_loss / max_iter, average_precision / max_iter
def main(args): model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], args.num_classes) #model = dpn92(num_classes=args.num_classes) transformations = transforms.Compose([ transforms.Resize((args.image_size, args.image_size)), transforms.ToTensor() ]) if args.gpu[0] >= 0: cudnn.enabled = True softmax = nn.Softmax().cuda() model.cuda() saved_state_dict = torch.load(args.saved_model) else: softmax = nn.Softmax() saved_state_dict = torch.load(args.saved_model, map_location='cpu') load_filtered_state_dict(model, saved_state_dict, ignore_layer=[], reverse=True, gpu=cudnn.enabled) imgs_path = glob.glob(os.path.join(args.test_data_dir, '*.jpg')) imgs_path += glob.glob(os.path.join(args.test_data_dir, '*.jpeg')) imgs_path += glob.glob(os.path.join(args.test_data_dir, '*.png')) for i in xrange((len(imgs_path) + args.batch_size - 1) / args.batch_size): if args.gpu[0] >= 0: imgs = torch.FloatTensor(args.batch_size, 3, args.image_size, args.image_size).cuda() else: imgs = torch.FloatTensor(args.batch_size, 3, args.image_size, args.image_size) for j in xrange(min(args.batch_size, len(imgs_path))): img = Image.open(imgs_path[i * args.batch_size + j]) img = img.convert("RGB") imgs[j] = transformations(img) pred = model(imgs) pred = softmax(pred) _, pred_1 = pred.topk(1, 1, True, True) for j in xrange(min(args.batch_size, len(imgs_path))): c = default_class[pred_1.cpu().numpy()[0][0]] print("{} -- {} {}".format(imgs_path[i * args.batch_size + j], pred_1, c)) if args.save_path: img_numpy = imgs[j].cpu().numpy() img_numpy = img_numpy * 255 # change to channel last img_numpy = np.transpose(img_numpy, (1, 2, 0)).astype(np.uint8) # rgb to bgr img_numpy = img_numpy[..., ::-1].copy() cv2.putText(img_numpy, c, (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 1, cv2.LINE_AA) save_path = os.path.join( args.save_path, os.path.basename(imgs_path[i * args.batch_size + j])) cv2.imwrite(save_path, img_numpy)
def main(): word_index_dict = json.load(open(args.word_index_json)) # 加载label信息 num_classes = len(word_index_dict) # 分为多少类,划分为多分类的问题 num_class 1823 image_label_dict = json.load(open(args.image_label_json)) cudnn.benchmark = True if args.model == 'densenet': # 两千多种字符,multi-label分类 model = DenseNet121(num_classes).cuda() elif args.model == 'resnet': # resnet主要用于文字区域的segmentation以及object detection操作 model = resnet.ResNet(num_classes=num_classes, args=args).cuda() else: return optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # 学习率初始为0.001 # model = torch.nn.DataParallel(model).cuda() loss = Loss().cuda() if args.resume: state_dict = torch.load(args.resume) model.load_state_dict(state_dict['state_dict']) best_f1score = state_dict['f1score'] start_epoch = state_dict['epoch'] + 1 else: best_f1score = 0 if args.model == 'resnet': start_epoch = 100 else: start_epoch = 1 args.epoch = start_epoch print('best_f1score', best_f1score) # 划分数据集 test_filelist = sorted(glob(os.path.join(args.data_dir, 'test', '*'))) # 5664+ trainval_filelist = sorted(glob(os.path.join(args.data_dir, 'train', '*'))) # 13318 # 两种输入size训练 # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入 # train_filelist2: 长宽比大于8:1的图片,经过padding, crop后变成 64*1024的输入 train_filelist1, train_filelist2 = [], [] # 黑名单,这些图片的label是有问题的 black_list = set(json.load(open(args.black_json))['black_list']) image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json)) for f in trainval_filelist: image = f.split('/')[-1] if image in black_list: continue r = image_hw_ratio_dict[image] if r == 0: train_filelist1.append(f) else: train_filelist2.append(f) train_val_filelist = train_filelist1 + train_filelist2 val_filelist = train_filelist1[-2048:] train_filelist1 = train_filelist1[:-2048] train_filelist2 = train_filelist2 image_size = [512, 64] if args.phase in ['test', 'val', 'train_val']: # 测试输出文字检测结果 test_dataset = dataloader.DataSet( test_filelist, image_label_dict, num_classes, # transform=train_transform, args=args, image_size=image_size, phase='test') test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) train_filelist = train_filelist1[-2048:] train_dataset = dataloader.DataSet(train_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='test') train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) val_dataset = dataloader.DataSet(val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='test') val_loader = DataLoader(dataset=val_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) train_val_dataset = dataloader.DataSet(train_val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='test') train_val_loader = DataLoader(dataset=train_val_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) if args.phase == 'test': # test(start_epoch - 1, model, val_loader, 'val') test(start_epoch - 1, model, test_loader, 'test') # test(start_epoch - 1, model, train_val_loader, 'train_val') elif args.phase == 'val': test(start_epoch - 1, model, train_loader, 'train') test(start_epoch - 1, model, val_loader, 'val') elif args.phase == 'train_val': test(start_epoch - 1, model, train_val_loader, 'train_val') return elif args.phase == 'train': train_dataset1 = dataloader.DataSet(train_filelist1, image_label_dict, num_classes, image_size=image_size, args=args, phase='train') # 长宽比小于8:1的图片,经过padding后变成 64*512 的输入 train_loader1 = DataLoader(dataset=train_dataset1, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) train_dataset2 = dataloader.DataSet(train_filelist2, image_label_dict, num_classes, image_size=(1024, 64), args=args, phase='train') train_loader2 = DataLoader(dataset=train_dataset2, batch_size=args.batch_size // 2, shuffle=True, num_workers=8, pin_memory=True) val_dataset = dataloader.DataSet(val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='val') val_loader = DataLoader(dataset=val_dataset, batch_size=min(8, args.batch_size), shuffle=False, num_workers=8, pin_memory=True) filelist = glob(os.path.join(args.bg_dir, '*')) pretrain_dataset1 = dataloader.DataSet(filelist, image_label_dict, num_classes, image_size=args.image_size, word_index_dict=word_index_dict, args=args, font_range=[8, 32], margin=10, rotate_range=[-10., 10.], phase='pretrain') pretrain_loader1 = DataLoader(dataset=pretrain_dataset1, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) pretrain_dataset2 = dataloader.DataSet(filelist, image_label_dict, num_classes, image_size=(256, 128), word_index_dict=word_index_dict, args=args, font_range=[24, 64], margin=20, rotate_range=[-20., 20.], phase='pretrain') pretrain_loader2 = DataLoader(dataset=pretrain_dataset2, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) best_f1score = 0 # eval_mode = 'pretrain-2' eval_mode = 'eval' for epoch in range(start_epoch, args.epochs): args.epoch = epoch if eval_mode == 'eval': if best_f1score > 0.9: args.lr = 0.0001 if best_f1score > 0.9: args.hard_mining = 1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr train_eval(epoch, model, train_loader1, loss, optimizer, 2., 'train-1') if best_f1score > 0.9: train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2') best_f1score = train_eval( epoch, model, val_loader, loss, optimizer, best_f1score, 'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining)) continue '''
def eval(): with tf.variable_scope(FLAGS.resnet): images, labels, _ = dataset_reader.build_input( FLAGS.test_batch_size, 'val', dataset='imagenet', blur=0, resize_image=FLAGS.resize_image, color_switch=FLAGS.color_switch) model = resnet.ResNet(FLAGS.num_classes, None, None, None, resnet=FLAGS.resnet, mode=FLAGS.mode, float_type=tf.float32) logits = model.inference(images) model.compute_loss(labels + FLAGS.labels_offset, logits) precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32), labels + FLAGS.labels_offset, FLAGS.top_k) precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32)) # ========================= end of building model ================================ gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if FLAGS.pre_trained_filename is not None and FLAGS.finetuned_filename is not None: last_layer_variables = [] finetuned_variables = [] for v in tf.global_variables(): if 'Momentum' in v.name: continue if v.name.find('logits') > 0: last_layer_variables.append(v) print('last layer\'s variables: %s' % v.name) continue print('finetuned variables:', v.name) finetuned_variables.append(v) loader1 = tf.train.Saver(var_list=finetuned_variables) loader1.restore(sess, FLAGS.finetuned_filename) loader2 = tf.train.Saver(var_list=last_layer_variables) loader2.restore(sess, FLAGS.pre_trained_filename) print('Succesfully loaded model from %s and %s.' % (FLAGS.finetuned_filename, FLAGS.pre_trained_filename)) elif FLAGS.pre_trained_filename is not None: loader = tf.train.Saver() loader.restore(sess, FLAGS.pre_trained_filename) print('Succesfully loaded model from %s.' % FLAGS.pre_trained_filename) else: print('No models loaded...') print( '======================= eval process begins =========================' ) average_loss = 0.0 average_precision = 0.0 if FLAGS.test_max_iter is None: max_iter = dataset_reader.num_per_epoche( 'eval', 'imagenet') // FLAGS.test_batch_size else: max_iter = FLAGS.test_max_iter step = 0 while step < max_iter: step += 1 loss, precision = sess.run([model.loss, precision_op]) average_loss += loss average_precision += precision if step % 100 == 0: print(step, '/', max_iter, ':', average_loss / step, average_precision / step) elif step % 10 == 0: print(step, '/', max_iter, ':', average_loss / step, average_precision / step) # batch size = 100, resnet_v1_101: # 10 / 500 : 1.05231621861 0.766999977827 # 20 / 500 : 0.976500582695 0.773999977112 # 30 / 500 : 0.969429596265 0.775666640202 # 40 / 500 : 0.970155435801 0.773249974847 # 50 / 500 : 0.980628492832 0.772999976873 # 60 / 500 : 0.974383835991 0.771833313505 # 70 / 500 : 0.967128694909 0.772428552594 # 80 / 500 : 0.971453700215 0.768749982864 # 90 / 500 : 0.977773231268 0.765444427066 # 100 / 500 : 0.970571737289 0.76619998157 # 460 / 500 : 0.990876104456 0.76347824374 # 470 / 500 : 0.989712166723 0.763914876669 # 480 / 500 : 0.989510885812 0.763645816346 # 490 / 500 : 0.989354180499 0.764061207552 # 500 / 500 : 0.992528787792 0.763559982777 # batch size = 10, resnet_v1_101: # 10 / 5000 : 1.04412123561 0.769999998808 # 20 / 5000 : 1.01021358222 0.784999996424 # 30 / 5000 : 1.00341213942 0.779999997218 # 40 / 5000 : 0.953749916703 0.77499999851 # 50 / 5000 : 0.942187260389 0.775999999046 # 100 / 5000 : 0.987873907052 0.764999999404 # 5000 / 5000 : 0.992528784394 0.763559984207 # batch size = 100, resnet_v1_152: # 10 / 500 : 0.894700920582 0.776999986172 # 500 / 500 : 0.974335199773 0.7680999825 # batch size = 100, resnet_v1_50: # 10 / 500 : 0.89778097868 0.774999976158 # 500 / 500 : 1.04086481136 0.752079983711 # https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models coord.request_stop() coord.join(threads) return average_loss / max_iter, average_precision / max_iter
import glob import os import cv2 import numpy as np # hyperparameters default_class = ['drawings', 'hentai', 'neutral', 'p**n', 'sexy'] USE_GPU = True NUM_CLASSES = len(default_class) IMAGE_SIZE = 299 MODEL_PATH = "./models/resnet50-19c8e357.pth" # model loading print("model loading start") model = resnet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], NUM_CLASSES) model.eval() if USE_GPU: cudnn.enabled = True softmax = nn.Softmax().cuda() model.cuda() saved_state_dict = torch.load(MODEL_PATH) else: softmax = nn.Softmax() saved_state_dict = torch.load(MODEL_PATH, map_location='cpu') load_filtered_state_dict(model, saved_state_dict, ignore_layer=[], reverse=False, gpu=cudnn.enabled)
def eval(i_ckpt): tf.reset_default_graph() print('================', end='') if FLAGS.data_type == 16: print('using tf.float16 =====================') data_type = tf.float16 else: print('using tf.float32 =====================') data_type = tf.float32 with tf.variable_scope(FLAGS.resnet): images, labels, num_classes = dataset_reader.build_input( FLAGS.test_batch_size, 'val', crop_size=FLAGS.test_crop_size, dataset=FLAGS.database, color_switch=FLAGS.color_switch, blur=0, resize_image=FLAGS.resize_image, multicrops_for_eval=FLAGS.test_with_multicrops) model = resnet.ResNet(num_classes, None, None, None, mode='eval', bn_epsilon=FLAGS.epsilon, norm_only=FLAGS.norm_only, resnet=FLAGS.resnet, float_type=data_type) logits = model.inference(images) model.compute_loss(labels, logits) precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32), model.labels, 1) precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32)) if FLAGS.test_with_multicrops == 1: precisions = tf.nn.in_top_k( [tf.reduce_mean(model.predictions, axis=[0])], [labels[0]], 1) precision_op = tf.cast(precisions, tf.float32) # ========================= end of building model ================================ gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if i_ckpt is not None: loader = tf.train.Saver(max_to_keep=0) loader.restore(sess, i_ckpt) eval_step = i_ckpt.split('-')[-1] print('Succesfully loaded model from %s at step=%s.' % (i_ckpt, eval_step)) print( '======================= eval process begins =========================' ) average_loss = 0.0 average_precision = 0.0 if FLAGS.test_max_iter is None: max_iter = dataset_reader.num_per_epoche( 'eval', FLAGS.database) // FLAGS.test_batch_size else: max_iter = FLAGS.test_max_iter if FLAGS.test_with_multicrops == 1: max_iter = dataset_reader.num_per_epoche('eval', FLAGS.database) step = 0 while step < max_iter: step += 1 loss, precision = sess.run([model.loss, precision_op]) average_loss += loss average_precision += precision if step % 10 == 0: print(step, '/', max_iter, ':', average_loss / step, average_precision / step) coord.request_stop() coord.join(threads) return average_loss / max_iter, average_precision / max_iter
def train(resume_step=None): global_step = tf.get_variable('global_step', [], dtype=tf.int64, initializer=tf.constant_initializer(0), trainable=False) print('================', end='') if FLAGS.data_type == 16: print('using tf.float16 =====================') data_type = tf.float16 else: print('using tf.float32 =====================') data_type = tf.float32 wd_rate_ph = tf.placeholder(data_type, shape=()) wd_rate2_ph = tf.placeholder(data_type, shape=()) lrn_rate_ph = tf.placeholder(data_type, shape=()) with tf.variable_scope(FLAGS.resnet): images, labels, num_classes = dataset_reader.build_input( FLAGS.batch_size, 'train', examples_per_class=FLAGS.examples_per_class, dataset=FLAGS.database, resize_image=FLAGS.resize_image, color_switch=FLAGS.color_switch, blur=FLAGS.blur) model = resnet.ResNet(num_classes, lrn_rate_ph, wd_rate_ph, wd_rate2_ph, optimizer=FLAGS.optimizer, mode='train', bn_epsilon=FLAGS.epsilon, resnet=FLAGS.resnet, norm_only=FLAGS.norm_only, initializer=FLAGS.initializer, fix_blocks=FLAGS.fix_blocks, fine_tune_filename=FLAGS.fine_tune_filename, bn_ema=FLAGS.ema_decay, wd_mode=FLAGS.weight_decay_mode, fisher_filename=FLAGS.fisher_filename, gpu_num=FLAGS.gpu_num, fisher_epsilon=FLAGS.fisher_epsilon, float_type=data_type, separate_regularization=FLAGS.separate_reg) model.inference(images) model.build_train_op(labels) names = [] num_params = 0 for v in tf.trainable_variables(): # print v.name names.append(v.name) num = 1 for i in v.get_shape().as_list(): num *= i num_params += num print("Trainable parameters' num: %d" % num_params) precisions = tf.nn.in_top_k(tf.cast(model.predictions, tf.float32), model.labels, 1) precision_op = tf.reduce_mean(tf.cast(precisions, tf.float32)) # ========================= end of building model ================================ step = 0 saver = tf.train.Saver(max_to_keep=0) logdir = LogDir(FLAGS.database, FLAGS.log_dir, FLAGS.weight_decay_mode) logdir.print_all_info() if not os.path.exists(logdir.log_dir): print('creating ', logdir.log_dir, '...') os.mkdir(logdir.log_dir) if not os.path.exists(logdir.database_dir): print('creating ', logdir.database_dir, '...') os.mkdir(logdir.database_dir) if not os.path.exists(logdir.exp_dir): print('creating ', logdir.exp_dir, '...') os.mkdir(logdir.exp_dir) if not os.path.exists(logdir.snapshot_dir): print('creating ', logdir.snapshot_dir, '...') os.mkdir(logdir.snapshot_dir) init = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] gpu_options = tf.GPUOptions(allow_growth=False) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) sess = tf.Session(config=config) sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) import_variables = tf.trainable_variables() if FLAGS.fix_blocks > 0: import_variables = tf.global_variables() if FLAGS.fine_tune_filename is not None and resume_step is None: fine_tune_variables = [] new_layers_names = model.new_layers_names new_layers_names.append('Momentum') for v in import_variables: if any(elem in v.name for elem in new_layers_names): print('not loading %s' % v.name) continue fine_tune_variables.append(v) loader = tf.train.Saver(var_list=fine_tune_variables) loader.restore(sess, FLAGS.fine_tune_filename) print('Succesfully loaded fine-tune model from %s.' % FLAGS.fine_tune_filename) elif resume_step is not None: # ./snapshot/model.ckpt-3000 i_ckpt = logdir.snapshot_dir + '/model.ckpt-%d' % resume_step saver.restore(sess, i_ckpt) step = resume_step print('Succesfully loaded model from %s at step=%s.' % (i_ckpt, resume_step)) else: print('Not import any model.') print( '=========================== training process begins =================================' ) f_log = open(logdir.exp_dir + '/' + str(datetime.datetime.now()) + '.txt', 'w') f_log.write('step,loss,precision,wd\n') f_log.write(sorted_str_dict(FLAGS.__dict__) + '\n') average_loss = 0.0 average_precision = 0.0 show_period = 20 snapshot = FLAGS.snapshot max_iter = FLAGS.train_max_iter lrn_rate = FLAGS.lrn_rate lr_step = [] if FLAGS.lr_step is not None: temps = FLAGS.lr_step.split(',') for t in temps: lr_step.append(int(t)) t0 = None wd_rate = FLAGS.weight_decay_rate wd_rate2 = FLAGS.weight_decay_rate2 while step < max_iter + 1: step += 1 if FLAGS.lr_policy == 'step': if len(lr_step) > 0 and step == lr_step[0]: lrn_rate *= FLAGS.step_size lr_step.remove(step) elif FLAGS.lr_policy == 'poly': lrn_rate = ((1 - 1.0 * (step - 1) / max_iter)**0.9) * FLAGS.lrn_rate elif FLAGS.lr_policy == 'linear': lrn_rate = FLAGS.lrn_rate / step else: lrn_rate = FLAGS.lrn_rate _, loss, wd, precision = sess.run( [model.train_op, model.loss, model.wd, precision_op], feed_dict={ lrn_rate_ph: lrn_rate, wd_rate_ph: wd_rate, wd_rate2_ph: wd_rate2 }) average_loss += loss average_precision += precision if FLAGS.save_first_iteration == 1 or step % snapshot == 0: saver.save(sess, logdir.snapshot_dir + '/model.ckpt', global_step=step) if step % show_period == 0: left_hours = 0 if t0 is not None: delta_t = (datetime.datetime.now() - t0).seconds left_time = (max_iter - step) / show_period * delta_t left_hours = left_time / 3600.0 t0 = datetime.datetime.now() average_loss /= show_period average_precision /= show_period if step == 0: average_loss *= show_period average_precision *= show_period f_log.write('%d,%f,%f,%f\n' % (step, average_loss, average_precision, wd)) f_log.flush() print('%s %s] Step %s, lr = %f, wd_rate = %f, wd_rate_2 = %f ' \ % (str(datetime.datetime.now()), str(os.getpid()), step, lrn_rate, wd_rate, wd_rate2)) print('\t loss = %.4f, precision = %.4f, wd = %.4f' % (average_loss, average_precision, wd)) print('\t estimated time left: %.1f hours. %d/%d' % (left_hours, step, max_iter)) average_loss = 0.0 average_precision = 0.0 coord.request_stop() coord.join(threads) return f_log, logdir # f_log returned for eval.
import torch as t import torchvision as tv from data import get_train_dataset, get_validation_dataset from stopping import EarlyStoppingCallback from trainer import Trainer from matplotlib import pyplot as plt import numpy as np from model import resnet # set up data loading for the training and validation set using t.utils.data.DataLoader and the methods implemented in data.py train_dl = t.utils.data.DataLoader(get_train_dataset(), batch_size=50) test_dl = t.utils.data.DataLoader(get_validation_dataset(), batch_size=20) # set up your model model = resnet.ResNet() # set up loss (you can find preimplemented loss functions in t.nn) use the pos_weight parameter to ease convergence loss = t.nn.MultiLabelSoftMarginLoss() # set up optimizer (see t.optim); optimizer = t.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # initialize the early stopping callback implemented in stopping.py and create a object of type Trainer early_stopping = EarlyStoppingCallback(5) trainer = Trainer(model, loss, optim=optimizer, train_dl=train_dl, val_test_dl=test_dl, cuda=True, early_stopping_cb=early_stopping) # go, go, go... call fit on trainer res = trainer.fit()