def dump_model(): def get_bn_params(bn): return [bn.running_mean,bn.running_var,bn.weight.data,bn.bias.data] def get_block_params(block): params=[block.conv1.weight.data]+get_bn_params(block.bn1)+[block.conv2.weight.data]+get_bn_params(block.bn2) if block.downsample: params+=[block.downsample._modules['0'].weight.data]+get_bn_params(block.downsample._modules['1']) return params def get_BN_block_params(block): params=[block.conv1.weight.data]+get_bn_params(block.bn1)+[block.conv2.weight.data]+get_bn_params(block.bn2)+[block.conv3.weight.data]+get_bn_params(block.bn3) if block.downsample: params+=[block.downsample._modules['0'].weight.data]+get_bn_params(block.downsample._modules['1']) return params def get_layer_params(layer): params=[] for block in layer._modules: params+=get_block_params(layer._modules[block]) return params model = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2]) # model.load('resnet18\\mergedresnet18.sd') checkpoint = torch.load('resnet18\\checkpoint_blu35103.pth.tar') #model.load_state_dict(moduledict_to_dict(checkpoint['state_dict'])) model.load_state_dict(checkpoint['state_dict']) #model.quantize_from('quant_param89.data') params=[model.conv1.weight.data]+get_bn_params(model.bn1)#(64,3,7,7) params+=get_layer_params(model.layer1) params+=get_layer_params(model.layer2) params += get_layer_params(model.layer3) params += get_layer_params(model.layer4) params+=[model.fc.weight.data,model.fc.bias.data] with open('checkpoint_blu35103.data','wb') as f: for para_tensor in params: para_np=para_tensor.numpy() para_np.tofile(f) return
def main(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = resnet.ResNet(resnet.Bottleneck, [3,8,36,3]) model.load_state_dict(torch.load('resnet152_best-148.ckpt', map_location=device)) loader = transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()]) model.cuda() model.eval() with torch.no_grad(): # ResNet152 Python Pytorch Test... ''' img_path = "9/" + i image = Image.open(img_path) image = image.convert('RGB') image_tensor = loader(image).unsqueeze(0) image_numpy = image_tensor.numpy() image_tensor = image_tensor.cuda() output = model(image_tensor) _, predict = torch.max(output.data, 1) print("predicted : ", predict, i) ''' # Save Script Module example = torch.rand(1,3,32,32).cuda() traced_script_module = torch.jit.trace(model, example) traced_script_module.save('script_module.pt')
def conv_validation(): #model = resnet.__dict__['resnet18'](pretrained=True) model = resnet.ResNet(resnet.BasicBlock, [2, 2, 2, 2]) #model.load('resnet18\\mergedresnet18.sd') checkpoint = torch.load('resnet18\\checkpoint_renorm89.pth.tar') model.load_state_dict(moduledict_to_dict(checkpoint['state_dict'])) #model.load_state_dict(checkpoint['state_dict']) model.quantize('quant_param89.data') model.load_blu('3sigma.blu') #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) normalize = transforms.Normalize(mean=[128/255,128/255,128/255], std=[0.226, 0.226, 0.226]) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder('..\\data\\imagenet\\val', transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=10, shuffle=False, num_workers=4, pin_memory=True) acc1_t = torch.tensor([0.0]) acc5_t = torch.tensor([0.0]) for i, (input, target) in enumerate(val_loader): #input=(input-0.5)/0.226 output=model(input) #output=model.forward_blu(input) acc1, acc5 = accuracy(output, target, topk=(1, 5)) print('{}:acc1:{},acc5:{}'.format(i,acc1,acc5)) acc1_t+=acc1 acc5_t+=acc5 print('acc1:{}\nacc5:{}'.format(acc1_t/(i+1), acc5_t/(i+1)))
def unittest(): sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) model = resnet.ResNet(args) sess.run(tf.global_variables_initializer())
def __init__( self, T, C ): self.T = T self.C = C self.X = tf.placeholder( tf.float32, [None, self.T, 224, 224, 3] ) self.Y = tf.placeholder( tf.int32, [None, self.T] ) # The Joint annotations. self.J = 18 # Using CMU Openpose self.P = tf.placeholder( tf.float32, [None, self.T, 7, 7, self.J] ) self.LR = tf.placeholder( tf.float32 ) # Learning rate self.lambda_l2 = tf.placeholder( tf.float32 ) # Regularization factor self.phase = tf.placeholder( tf.bool ) # Training phase self.BATCH = tf.shape( self.X )[0] self.BT = self.BATCH * self.T self.scope = "Model" # Train only variables in scope self.l_action = 1.0 self.l_pose = 1.0 self.DIM_LSTM = 512 # Dimensionality of LSTM self.DIM_ATT = 32 # Either 32 (Sub-JHMDB) or 128 (PennAction) # Init ResNet self.net = resnet.ResNet() # We are using ResNet as base DCN. Change here. self.net.phase = self.phase
def train(): sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) model = resnet.ResNet(args) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=100) model.load_model(sess, saver) logger.info('model loaded') train_config = {'data_dir':args.train_dir} dtrain = dataset.DataSet(train_config) logger.info('dataset loaded') for epoch in range(args.beg_epoch, args.epochs): train_acc,train_loss = run_epoch(epoch, model, sess, dtrain) saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=epoch) logger.info('model of epoch {} saved'.format(epoch)) s = '[epoch {}]train-acc:{:.3} train-loss:{:.3}' s = s.format(epoch,train_acc,train_loss) logger.warning(s) print(s) #if epoch!=args.beg_epoch and (epoch-args.beg_epoch)%10==0: if True: valid_acc,valid_time = run_test(epoch, model, sess, 10, 20) s = '[epoch {}]test-acc:{:.3} test-time:{}'.format(epoch, valid_acc, valid_time) logger.warning(s) print(s) logger.info('train end')
def netMaker(args): '''Build a network Args: args (Namespace): the namespace containing all the arguments required for training and building the network Returns: the built network ''' if args.dataset == "MNIST" or args.dataset == "FAKENIST": inSize = 28 inChan = 1 numClasses = 10 elif args.dataset == "CIFAR10": inSize = 32 inChan = 3 numClasses = 10 elif args.dataset == "IMAGENET": inSize = 224 inChan = 3 numClasses = 1000 else: raise ValueError("Unknown dataset : {}".format(args.dataset)) if args.model == "cnn": net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=False,inChan=inChan, width_per_group=args.dechan,\ strides=[2,2,2,2],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,conv=True) elif args.model == "gnn": net = GNN(inChan, args.chan_gnn, args.nb_lay_gnn, numClasses, args.res_con_gnn, args.batch_norm_gnn, args.max_pool_pos, args.max_pool_ker, False) elif args.model == "gnn_resnet_stri": net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\ strides=[2,2,2,2],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=False,conv=False) elif args.model == "gnn_resnet": net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\ strides=[1,1,1,1],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=False,conv=False) elif args.model == "gnn_resnet_mc": net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\ strides=[1,1,1,1],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=True,conv=False) elif args.model == "gcnn_resnet": net = resnet.ResNet(resnet.BasicBlock, [1, 1, 1, 1],geom=True,inChan=inChan, width_per_group=args.dechan,\ strides=[2,2,2,2],firstConvKer=args.deker,inPlanes=args.dechan,num_classes=numClasses,multiChan=False,conv=True) else: raise ValueError("Unknown model type : {}".format(args.model)) return net
def load_model(self): # Load the model as defined by ` self.trained_model_prefix + "latest_model.pth" ` and already trained by train_model.py nb_outputs = ut.nbOutputs(self.label_style, self.environment) if self.model_type == 'dk_resnet18_CP': nb_outputs = 2 # FIXME: hard coded reward_fn_head = RewardFunctionHeadCartPole() net = RewardFunctionHeadModel( models.resnet18(pretrained=False, num_classes=nb_outputs), reward_fn_head) elif self.model_type == 'dk_resnet18_CP_weird': nb_outputs = 2 # FIXME: hard coded reward_fn_head = WeirdRewardFunctionHeadCartPole() net = RewardFunctionHeadModel( models.resnet18(pretrained=False, num_classes=nb_outputs), reward_fn_head) elif self.model_type == 'dk_resnet18_DT': nb_outputs = 2 # FIXME: hard coded reward_fn_head = RewardFunctionHeadDuckieTown() net = RewardFunctionHeadModel( models.resnet18(pretrained=False, num_classes=nb_outputs), reward_fn_head) elif self.model_type == 'resnet18': net = models.resnet18(pretrained=False, num_classes=nb_outputs) #### To use in case want the pretrained model: (remove num_classes as pretrained model only comes with original 1000 classes) # dim_feats = net.fc.in_features # =1000 # net.fc = nn.Linear(dim_feats, nb_outputs) elif self.model_type == 'resnet34': net = resnet.resnet34(pretrained=False, num_classes=nb_outputs) elif self.model_type == 'resnet50': net = resnet.ResNet(resnet.Bottleneck, [3, 4, 6, 3], num_classes=nb_outputs) elif self.model_type == 'resnet101': net = resnet.ResNet(resnet.Bottleneck, [3, 4, 23, 3], num_classes=nb_outputs) elif self.model_type == 'resnet152': net = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3], num_classes=nb_outputs) net.load_state_dict(torch.load(self.model_path)) print('Loaded model') net.eval() net = net.to(self.device) return net
def __init__(self, input_channel, dropout = None, inp = "log-Filterbank-DCT-26-13", nodes = [16, 32, 64, 128], num_layers = 2): super(resnet_extractor, self).__init__() import resnet self.model = resnet.ResNet(input_channel, resnet.ResidualBlock, nodes, num_layers, dropout, inp) self.out_features = self.model.out_features
def initialize_net(self): nb_outputs = ut.nbOutputs(self.label_style, self.environment) if self.model == 'dk_resnet18_CP': nb_outputs = 2 # FIXME: hard coded reward_fn_head = RewardFunctionHeadCartPole() net = RewardFunctionHeadModel( models.resnet18(pretrained=False, num_classes=nb_outputs), reward_fn_head) elif self.model == 'dk_resnet18_CP_weird': nb_outputs = 2 # FIXME: hard coded reward_fn_head = WeirdRewardFunctionHeadCartPole() net = RewardFunctionHeadModel( models.resnet18(pretrained=False, num_classes=nb_outputs), reward_fn_head) elif self.model == 'dk_resnet18_DT': nb_outputs = 2 # FIXME: hard coded reward_fn_head = RewardFunctionHeadDuckieTown() net = RewardFunctionHeadModel( models.resnet18(pretrained=False, num_classes=nb_outputs), reward_fn_head) elif self.model == 'resnet18': net = models.resnet18(pretrained=False, num_classes=nb_outputs) #### To use in case want the pretrained model: (remove num_classes as pretrained model only comes with original 1000 classes) # dim_feats = net.fc.in_features # =1000 # net.fc = nn.Linear(dim_feats, nb_outputs) elif self.model == 'resnet34': net = models.resnet34(pretrained=False, num_classes=nb_outputs) elif self.model == 'resnet50': net = resnet.ResNet(resnet.Bottleneck, [3, 4, 6, 3], num_classes=nb_outputs) elif self.model == 'resnet101': net = resnet.ResNet(resnet.Bottleneck, [3, 4, 23, 3], num_classes=nb_outputs) elif self.model == 'resnet152': net = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3], num_classes=nb_outputs) net = net.float() net = net.to(self.device) return net
def temperature_scaling(): print("Calibration") state_dict = torch.load(args.save_path + 'resnet110_{0}.pth'.format(args.epoch)) valid_indices = torch.load(args.save_path + 'valid_indices.pth') mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] test_transforms = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) valid_set = tv.datasets.CIFAR100(root='./cifar100_data/', train=True, transform=test_transforms, download=False) test_set = tv.datasets.CIFAR100(root='./cifar100_data/', train=False, transform=test_transforms, download=False) valid_loader = torch.utils.data.DataLoader( valid_set, batch_size=args.batch_size, sampler=SubsetRandomSampler(valid_indices)) test_loader = torch.utils.data.DataLoader(test_set, batch_size=100, num_workers=4) #valid_loader = valid_loader_pth criterion = nn.CrossEntropyLoss() network = resnet.ResNet(args.layer, 100).cuda() network.load_state_dict(state_dict) test_loss, test_acc, test_softmax, test_correct, test_y, test_loss_idv = test( test_loader, network, criterion, args.epoch + 1, 'test') for i in range(len(test_softmax)): test_softmax[i] = test_softmax[i].item() utlis.save_data('before_cali_correct', test_correct, args.save_path) utlis.save_data('before_cali_softmax', test_softmax, args.save_path) model = ModelWithTemperature(network) model.set_temperature(valid_loader) test_loss, test_acc, test_softmax, test_correct, test_y, test_loss_idv = test( test_loader, model, criterion, args.epoch + 1, 'test') for i in range(len(test_softmax)): test_softmax[i] = test_softmax[i].item() utlis.save_data('after_cali_correct', test_correct, args.save_path) utlis.save_data('after_cali_softmax', test_softmax, args.save_path)
def __init__(self, is_train=True): self.resnet = resnet.ResNet() self.resnet.cuda() if is_train: self.optimizer = torch.optim.SGD(self.resnet.parameters(), lr=C.LEARNING_RATE, momentum=0.9, weight_decay=1E-4) self.policy_loss_fn = MultiLableCrossEntropy() self.value_loss_fn = torch.nn.MSELoss()
def test(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3]) net = net.to(device) net.eval() with torch.no_grad(): img_tensor = torch.randn(1, 3, 32, 32) img_tensor = img_tensor.to(device) y = net(img_tensor) print(y.size())
def main(root_image, root_model): print("\nInput image ==> ", root_image) print("Current model ===> ", root_model) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("Run ", device) ## ResNet-50 : [3, 4, 6, 3] ## ResNet-101 : [3, 4, 23, 3] ## ResNet-152 : [3, 8, 36, 3] model = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3]) model.load_state_dict(torch.load(root_model)) model.to(device) # Input Image Transforms loader = transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # transforms.Normalize([0.5],[0.5]), ]) model.eval() with torch.no_grad(): # label list : 67 class label = os.listdir("./dataset/img/") label.sort() # to RGB image = Image.open(root_image) image = image.convert("RGB") # to Tensor, Cuda image_tensor = loader(image).unsqueeze(0) image_tensor = image_tensor.to(device) output = model(image_tensor) # Predict _, predicted = torch.max(output.data, 1) # Calc Probability s_max = torch.nn.Softmax(dim=0) prob = s_max(output.squeeze(0)) prob_np = prob.cpu().numpy() index = int(predicted.item()) predicted_name = label[index] probability = round(prob_np[index] * 100, 2) print("Predicted ===> name : {}, probability : {}".format( predicted_name, probability)) '''
def __init__(self, trained_dataset='imagenet', depth=101, num_of_class=1000, path=None): model = RN.ResNet(trained_dataset, depth, num_of_class) model = torch.nn.DataParallel(model).cuda() if path is not None: print("=> loading checkpoint '{}'".format(path)) checkpoint = torch.load(path) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(path)) model.module.avgpool = Identity() model.module.fc = Identity() self.model = model
def _build_model(self): # Neural Net for Deep-Q learning Model # model = Sequential() # model.add(Dense(32, input_dim=self.state_size, activation='relu')) # model.add(Dense(64, activation='relu')) # model.add(Dense(self.action_size, activation='softmax')) # model.compile(loss=self._huber_loss, # optimizer=Adam(lr=self.learning_rate)) # resnet # model = resnet.resnet18(self.action_size) # model.build(input_shape=(None, 32, 32, 3)) # model.compile(loss=self._huber_loss, # optimizer=Adam(lr=self.learning_rate)) # resnet go model = resnet.ResNet(self.action_size) model.build(input_shape=(None, 16, 16, 3)) model.compile(loss=self._huber_loss, optimizer=Adam(lr=self.learning_rate)) return model
def test(): sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) model = resnet.ResNet(args) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=100) for epoch in range(0, args.beg_epoch): args.model_path = os.path.join(args.model_dir, 'model.ckpt-{}'.format(epoch)) model.load_model(sess, saver) valid_acc,valid_time = run_test(epoch, model, sess, 10, 20) s = '[epoch {}]test-acc:{:.3} test-time:{}'.format(epoch, valid_acc, valid_time) logger.warning(s) print(s) return logger.info('test end')
def __init__(self, args): super(Model, self).__init__() self.conv_layer = resnet.ResNet(31) self.recg = recog.Recogniton(args)
def main(): word_index_dict = json.load(open(args.word_index_json, encoding="utf-8")) #一个汉字对应一个编号 num_classes = len(word_index_dict) # xzy 加载label文件 image_label_dict = json.load(open(args.image_label_json)) #一个图片对应多个其标签的编号 # print(image_label_dict) cudnn.benchmark = True # xzy 加载模型 if args.model == 'densenet': # 两千多种字符,multi-label分类 if use_gpu: model = DenseNet121(num_classes).cuda() else: model = DenseNet121(num_classes) elif args.model == 'resnet': # resnet主要用于文字区域的segmentation以及object detection操作 if use_gpu: model = resnet.ResNet(num_classes=num_classes, args=args).cuda() else: model = resnet.ResNet(num_classes=num_classes, args=args) else: return optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # model = torch.nn.DataParallel(model).cuda() if use_gpu: loss = Loss().cuda() else: loss = Loss() # if args.resume: if need_resume: print("let's begin resume") # state_dict = torch.load(args.resume) state_dict = torch.load(resume_ckpt_path) #xzy model.load_state_dict(state_dict['state_dict']) #xzy 断点训练、测试,都走这里。 best_f1score = state_dict['f1score'] start_epoch = state_dict['epoch'] + 1 print("already resume " + str(resume_ckpt_path.split("/")[-1])) else: print("no resume") best_f1score = 0 if args.model == 'resnet': start_epoch = 100 else: start_epoch = 1 args.epoch = start_epoch print('best_f1score', best_f1score) #xzy 加载、划分数据集 # test_filelist = sorted(glob(os.path.join(args.data_dir,'test','*'))) # trainval_filelist = sorted(glob(os.path.join(args.data_dir,'train','*'))) test_filelist = sorted(glob(os.path.join(args.data_test_dir, '*'))) trainval_filelist = sorted(glob(os.path.join(args.data_dir, '*'))) print("训练数据集长度是" + str(len(trainval_filelist))) print("测试数据集长度是" + str(len(test_filelist))) # 两种输入size训练 # train_filelist1: 长宽比小于8:1的图片,经过padding后变成 64*512 的输入 # train_filelist2: 长宽比大于8:1的图片,经过padding,crop后变成 64*1024的输入 train_filelist1, train_filelist2 = [], [] # 黑名单,这些图片的label是有问题的 black_list = set(json.load(open(args.black_json))['black_list']) # image_hw_ratio_dict = json.load(open(args.image_hw_ratio_json)) #xzy for f in trainval_filelist: image = f.split('/')[-1] if image in black_list: continue # r = image_hw_ratio_dict[image] # if r == 0: if True: #xzy 不用考虑判断是否1:8从而分散数据集,进行不同resize train_filelist1.append(f) else: train_filelist2.append(f) # train_val_filelist = train_filelist1 + train_filelist2 train_val_filelist = train_filelist1 val_filelist = train_filelist1[-2048:] train_filelist1 = train_filelist1[:-2048] # train_filelist2 = train_filelist2` ` #取消train_dataset2数据集 # image_size = [512, 64] 32的16倍和2倍 image_size = [288, 64] #xzy 32的9倍和2倍 if args.phase in ['test', 'val', 'train_val']: # 测试输出文字检测结果 test_dataset = dataloader.DataSet( test_filelist, image_label_dict, num_classes, # transform=train_transform, args=args, image_size=image_size, phase='test') test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) train_filelist = train_filelist1[-2048:] train_dataset = dataloader.DataSet(train_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='test') train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) val_dataset = dataloader.DataSet(val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='test') val_loader = DataLoader(dataset=val_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) train_val_dataset = dataloader.DataSet(train_val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='test') train_val_loader = DataLoader(dataset=train_val_dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) if args.phase == 'test': print("now , let's do phase of test") test(start_epoch - 1, model, val_loader, 'val') test(start_epoch - 1, model, test_loader, 'test') # test(start_epoch - 1, model, train_val_loader, 'train_val') elif args.phase == 'val': test(start_epoch - 1, model, train_loader, 'train') test(start_epoch - 1, model, val_loader, 'val') elif args.phase == 'train_val': test(start_epoch - 1, model, train_val_loader, 'train_val') return elif args.phase == 'train': # print(train_filelist1[:10]) train_dataset1 = dataloader.DataSet( train_filelist1, image_label_dict, num_classes, image_size=image_size, # image_size = [512, 64] args=args, phase='train') train_loader1 = DataLoader(dataset=train_dataset1, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) train_dataset2 = dataloader.DataSet(train_filelist2, image_label_dict, num_classes, image_size=(1024, 64), args=args, phase='train') train_loader2 = DataLoader(dataset=train_dataset2, batch_size=int(args.batch_size / 2), shuffle=True, num_workers=8, pin_memory=True) val_dataset = dataloader.DataSet(val_filelist, image_label_dict, num_classes, image_size=image_size, args=args, phase='val') val_loader = DataLoader(dataset=val_dataset, batch_size=min(8, args.batch_size), shuffle=False, num_workers=8, pin_memory=True) filelist = glob(os.path.join(args.bg_dir, '*')) pretrain_dataset1 = dataloader.DataSet(filelist, image_label_dict, num_classes, image_size=args.image_size, word_index_dict=word_index_dict, args=args, font_range=[8, 32], margin=10, rotate_range=[-10., 10.], phase='pretrain') pretrain_loader1 = DataLoader(dataset=pretrain_dataset1, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) pretrain_dataset2 = dataloader.DataSet(filelist, image_label_dict, num_classes, image_size=(256, 128), word_index_dict=word_index_dict, args=args, font_range=[24, 64], margin=20, rotate_range=[-20., 20.], phase='pretrain') pretrain_loader2 = DataLoader(dataset=pretrain_dataset2, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) best_f1score = 0 # eval_mode = 'pretrain-2' eval_mode = 'eval' for epoch in range(start_epoch, args.epochs): args.epoch = epoch if eval_mode == 'eval': if best_f1score > 0.9: args.lr = 0.0001 if best_f1score > 0.9: args.hard_mining = 1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr train_eval(epoch, model, train_loader1, loss, optimizer, 2., 'train-1') # if best_f1score > 0.9: # train_eval(epoch, model, train_loader2, loss, optimizer, 2., 'train-2') #取消train_dataset2数据集 best_f1score = train_eval( epoch, model, val_loader, loss, optimizer, best_f1score, 'eval-{:d}-{:d}'.format(args.batch_size, args.hard_mining)) continue '''
def main(): global args, best_err1, best_err5, global_epoch_confusion, best_loss args = parser.parse_args() if args.dataset.startswith('cifar'): normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) if args.dataset == 'cifar100': train_loader = torch.utils.data.DataLoader( datasets.CIFAR100('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( datasets.CIFAR100('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 100 elif args.dataset == 'cifar10': train_data = datasets.CIFAR10('../data', train=True, download=True, transform=transform_train) print(train_data.targets[:30]) print(train_data.targets[:30]) print(len(train_data)) class_counts = [9.0, 1.0] num_samples = sum(class_counts) labels = [0, 0, ..., 0, 1] #corresponding labels of samples class_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] class_weights[args.first] = args.weight class_weights[args.second] = args.weight print(class_weights) weights = [ class_weights[train_data.targets[i]] for i in range(len(train_data)) ] sampler = WeightedRandomSampler(torch.DoubleTensor(weights), len(train_data)) train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=sampler) val_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 10 else: raise Exception('unknown dataset: {}'.format(args.dataset)) else: raise Exception('unknown dataset: {}'.format(args.dataset)) print("=> creating model '{}'".format(args.net_type)) if args.net_type == 'resnet': model = RN.ResNet(args.dataset, args.depth, numberofclass, args.bottleneck) # for ResNet elif args.net_type == 'pyramidnet': model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass, args.bottleneck) else: raise Exception('unknown network architecture: {}'.format( args.net_type)) model = torch.nn.DataParallel(model).cuda() if os.path.isfile(args.pretrained): print("=> loading checkpoint '{}'".format(args.pretrained)) checkpoint = torch.load(args.pretrained) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.pretrained)) # print(model) print('the number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss(reduction='none').cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) cudnn.benchmark = True #validate(val_loader, model, criterion, 0) # for checking pre-trained model accuracy and confusion if args.checkmodel: global_epoch_confusion.append({}) get_confusion(val_loader, model, criterion) # cat->dog confusion log_print(str(args.first) + " -> " + str(args.second)) log_print(global_epoch_confusion[-1]["confusion"][(args.first, args.second)]) # dog->cat confusion log_print(str(args.second) + " -> " + str(args.first)) log_print(global_epoch_confusion[-1]["confusion"][(args.second, args.first)]) exit() for epoch in range(0, args.epochs): global_epoch_confusion.append({}) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set err1, err5, val_loss = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint if epoch // (args.epochs * 0.75): is_best = err1 <= best_err1 best_err1 = min(err1, best_err1) if is_best: best_err5 = err5 best_err1 = err1 print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5) save_checkpoint( { 'epoch': epoch, 'arch': args.net_type, 'state_dict': model.state_dict(), 'best_err1': best_err1, 'best_err5': best_err5, 'optimizer': optimizer.state_dict(), }, is_best) get_confusion(val_loader, model, criterion, epoch) # cat->dog confusion log_print(str(args.first) + " -> " + str(args.second)) log_print(global_epoch_confusion[-1]["confusion"][(args.first, args.second)]) # dog->cat confusion log_print(str(args.second) + " -> " + str(args.first)) log_print(global_epoch_confusion[-1]["confusion"][(args.second, args.first)]) print('Best accuracy (top-1 and 5 error):', best_err1, best_err5) directory = "runs/%s/" % (args.expname) if not os.path.exists(directory): os.makedirs(directory) epoch_confusions = 'runs/%s/' % (args.expname) + \ 'epoch_confusion_' + args.expid np.save(epoch_confusions, global_epoch_confusion) log_print("") # output best model accuracy and confusion repaired_model = 'runs/%s/' % (args.expname) + 'model_best.pth.tar' if os.path.isfile(repaired_model): print("=> loading checkpoint '{}'".format(repaired_model)) checkpoint = torch.load(repaired_model) model.load_state_dict(checkpoint['state_dict']) get_confusion(val_loader, model, criterion) # dog->cat confusion log_print(str(args.first) + " -> " + str(args.second)) log_print(global_epoch_confusion[-1]["confusion"][(args.first, args.second)]) # cat->dog confusion log_print(str(args.second) + " -> " + str(args.first)) log_print(global_epoch_confusion[-1]["confusion"][(args.second, args.first)])
def main(): parser = argparse.ArgumentParser( description='Learning CIFAR-10 using ResNet') parser.add_argument('--batchsize', type=int, default=256, help='Leaning minibatch size') parser.add_argument('--epoch', type=int, default=365, help='Number of epochs to train') parser.add_argument('--gpu', type=int, default=-1, help='GPU id (-1 indicates CPU)') parser.add_argument('--loaders', type=int, default=1, help='Number of data loading processes') parser.add_argument('--out', default='./result', help='Path of output directory') parser.add_argument('--path', default='./cifar-10-batches-py', help='Path of dataset files') parser.add_argument('--test', action='store_true') args = parser.parse_args() print('==========================================') print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num Epoch: {}'.format(args.epoch)) print('==========================================') # Model model = resnet.ResNet() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Dataset data, labels = load_cifar10_dataset(args.path) mean = np.mean(data, axis=0) train = Cifar10Dataset(data[0:45000], labels[0:45000], train=True, mean=mean) val = Cifar10Dataset(data[45000:50000], labels[45000:50000], train=False, mean=mean) # Iterators train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaders) val_iter = chainer.iterators.MultiprocessIterator(val, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaders) # Optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # Trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) # Validation val_interval = (10, 'iteration') if args.test else (1, 'epoch') trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Learning rate decay lr_interval = training.triggers.ManualScheduleTrigger([32000, 48000], 'iteration') trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_interval) # Log log_interval = (10, 'iteration') if args.test else (1, 'epoch') trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(): print('[Dataset Configuration]') print('\tImageNet training root: %s' % FLAGS.train_image_root) print('\tImageNet training list: %s' % FLAGS.train_dataset) print('\tImageNet val root: %s' % FLAGS.val_image_root) print('\tImageNet val list: %s' % FLAGS.val_dataset) print('\tNumber of classes: %d' % FLAGS.num_classes) print('\tNumber of training images: %d' % FLAGS.num_train_instance) print('\tNumber of val images: %d' % FLAGS.num_val_instance) print('[Network Configuration]') print('\tBatch size: %d' % FLAGS.batch_size) print('\tNumber of GPUs: %d' % FLAGS.num_gpus) print('\tBasemodel file: %s' % FLAGS.basemodel) print('[Optimization Configuration]') print('\tL2 loss weight: %f' % FLAGS.l2_weight) print('\tThe momentum optimizer: %f' % FLAGS.momentum) print('\tInitial learning rate: %f' % FLAGS.initial_lr) print('\tEpochs per lr step: %s' % FLAGS.lr_step_epoch) print('\tLearning rate decay: %f' % FLAGS.lr_decay) print('[Training Configuration]') print('\tTrain dir: %s' % FLAGS.train_dir) print('\tTraining max steps: %d' % FLAGS.max_steps) print('\tSteps per displaying info: %d' % FLAGS.display) print('\tSteps per validation: %d' % FLAGS.val_interval) print('\tSteps during validation: %d' % FLAGS.val_iter) print('\tSteps per saving checkpoints: %d' % FLAGS.checkpoint_interval) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) print('\tLog device placement: %d' % FLAGS.log_device_placement) with tf.Graph().as_default(): init_step = 0 global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels of ImageNet import multiprocessing num_threads = multiprocessing.cpu_count() / FLAGS.num_gpus print('Load ImageNet dataset(%d threads)' % num_threads) with tf.device('/cpu:0'): print('\tLoading training data from %s' % FLAGS.train_dataset) with tf.variable_scope('train_image'): train_images, train_labels = data_input.distorted_inputs( FLAGS.train_image_root, FLAGS.train_dataset, FLAGS.batch_size, True, num_threads=num_threads, num_sets=FLAGS.num_gpus) print('\tLoading validation data from %s' % FLAGS.val_dataset) with tf.variable_scope('test_image'): val_images, val_labels = data_input.inputs( FLAGS.val_image_root, FLAGS.val_dataset, FLAGS.batch_size, False, num_threads=num_threads, num_sets=FLAGS.num_gpus) tf.summary.image('images', train_images[0][:2]) # Build model lr_decay_steps = map(float, FLAGS.lr_step_epoch.split(',')) lr_decay_steps = list( map(int, [ s * FLAGS.num_train_instance / FLAGS.batch_size / FLAGS.num_gpus for s in lr_decay_steps ])) hp = resnet.HParams(batch_size=FLAGS.batch_size, num_gpus=FLAGS.num_gpus, num_classes=FLAGS.num_classes, weight_decay=FLAGS.l2_weight, momentum=FLAGS.momentum, finetune=FLAGS.finetune) network_train = resnet.ResNet(hp, train_images, train_labels, global_step, name="train") network_train.build_model() network_train.build_train_op() train_summary_op = tf.summary.merge_all() # Summaries(training) network_val = resnet.ResNet(hp, val_images, val_labels, global_step, name="val", reuse_weights=True) network_val.build_model() print('Number of Weights: %d' % network_train._weights) print('FLOPs: %d' % network_train._flops) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), allow_soft_placement=False, # allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=10000) if FLAGS.checkpoint is not None: print('Load checkpoint %s' % FLAGS.checkpoint) saver.restore(sess, FLAGS.checkpoint) init_step = global_step.eval(session=sess) elif FLAGS.basemodel: # Define a different saver to save model checkpoints print('Load parameters from basemodel %s' % FLAGS.basemodel) variables = tf.global_variables() vars_restore = [ var for var in variables if not "Momentum" in var.name and not "global_step" in var.name ] saver_restore = tf.train.Saver(vars_restore, max_to_keep=10000) saver_restore.restore(sess, FLAGS.basemodel) else: print( 'No checkpoint file of basemodel found. Start from the scratch.' ) # Start queue runners & summary_writer tf.train.start_queue_runners(sess=sess) if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) summary_writer = tf.summary.FileWriter( os.path.join(FLAGS.train_dir, str(global_step.eval(session=sess))), sess.graph) # Training! val_best_acc = 0.0 for step in range(init_step, FLAGS.max_steps): # val if step % FLAGS.val_interval == 0: val_loss, val_acc = 0.0, 0.0 for i in range(FLAGS.val_iter): loss_value, acc_value = sess.run( [network_val.loss, network_val.acc], feed_dict={network_val.is_train: False}) val_loss += loss_value val_acc += acc_value val_loss /= FLAGS.val_iter val_acc /= FLAGS.val_iter val_best_acc = max(val_best_acc, val_acc) format_str = ('%s: (val) step %d, loss=%.4f, acc=%.4f') print(format_str % (datetime.now(), step, val_loss, val_acc)) val_summary = tf.Summary() val_summary.value.add(tag='val/loss', simple_value=val_loss) val_summary.value.add(tag='val/acc', simple_value=val_acc) val_summary.value.add(tag='val/best_acc', simple_value=val_best_acc) summary_writer.add_summary(val_summary, step) summary_writer.flush() # Train lr_value = get_lr(FLAGS.initial_lr, FLAGS.lr_decay, lr_decay_steps, step) start_time = time.time() # For timeline profiling # if step == 153: # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() # _, loss_value, acc_value, train_summary_str = \ # sess.run([network_train.train_op, network_train.loss, network_train.acc, train_summary_op], # feed_dict={network_train.is_train:True, network_train.lr:lr_value} # , options=run_options, run_metadata=run_metadata) # # Create the Timeline object, and write it to a json # tl = timeline.Timeline(run_metadata.step_stats) # ctf = tl.generate_chrome_trace_format() # with open('timeline.json', 'w') as f: # f.write(ctf) # print('Wrote the timeline profile of %d iter training on %s' %(step, 'timeline.json')) # else: # _, loss_value, acc_value, train_summary_str = \ # sess.run([network_train.train_op, network_train.loss, network_train.acc, train_summary_op], # feed_dict={network_train.is_train:True, network_train.lr:lr_value}) _, loss_value, acc_value, train_summary_str = \ sess.run([network_train.train_op, network_train.loss, network_train.acc, train_summary_op], feed_dict={network_train.is_train:True, network_train.lr:lr_value}) duration = time.time() - start_time assert not np.isnan(loss_value) # Display & Summary(training) if step % FLAGS.display == 0 or step < 10: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: (Training) step %d, loss=%.4f, acc=%.4f, lr=%f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, lr_value, examples_per_sec, sec_per_batch)) summary_writer.add_summary(train_summary_str, step) # Save the model checkpoint periodically. if (step > init_step and step % FLAGS.checkpoint_interval == 0) or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if sys.stdin in select.select([sys.stdin], [], [], 0)[0]: char = sys.stdin.read(1) if char == 'b': embed()
def main(): global args, best_err1, best_err5 args = parser.parse_args() if args.dataset.startswith('cifar'): normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([ transforms.ToTensor(), normalize ]) if args.dataset == 'cifar100': train_loader = torch.utils.data.DataLoader( datasets.CIFAR100('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( datasets.CIFAR100('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 100 elif args.dataset == 'cifar10': train_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) numberofclass = 10 else: raise Exception('unknown dataset: {}'.format(args.dataset)) elif args.dataset == 'imagenet': traindir = os.path.join('/home/data/ILSVRC/train') valdir = os.path.join('/home/data/ILSVRC/val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) jittering = utils.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4) lighting = utils.Lighting(alphastd=0.1, eigval=[0.2175, 0.0188, 0.0045], eigvec=[[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), jittering, lighting, normalize, ])) train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) numberofclass = 1000 else: raise Exception('unknown dataset: {}'.format(args.dataset)) print("=> creating model '{}'".format(args.net_type)) if args.net_type == 'resnet': model = RN.ResNet(args.dataset, args.depth, numberofclass, args.bottleneck) # for ResNet elif args.net_type == 'pyramidnet': model = PYRM.PyramidNet(args.dataset, args.depth, args.alpha, numberofclass, args.bottleneck) else: raise Exception('unknown network architecture: {}'.format(args.net_type)) model = torch.nn.DataParallel(model).cuda() print(model) print('the number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) cudnn.benchmark = True for epoch in range(0, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train_loss = train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set err1, err5, val_loss = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = err1 <= best_err1 best_err1 = min(err1, best_err1) if is_best: best_err5 = err5 print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5) save_checkpoint({ 'epoch': epoch, 'arch': args.net_type, 'state_dict': model.state_dict(), 'best_err1': best_err1, 'best_err5': best_err5, 'optimizer': optimizer.state_dict(), }, is_best) print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)
def train(): print('[Dataset Configuration]') print('\tCIFAR-100 dir: %s' % FLAGS.data_dir) print('\tNumber of classes: %d' % FLAGS.num_classes) print('\tNumber of test images: %d' % FLAGS.num_test_instance) print('[Network Configuration]') print('\tBatch size: %d' % FLAGS.batch_size) print('\tResidual blocks per group: %d' % FLAGS.num_residual_units) print('\tNetwork width multiplier: %d' % FLAGS.k) print('[Testing Configuration]') print('\tCheckpoint path: %s' % FLAGS.ckpt_path) print('\tDataset: %s' % ('Training' if FLAGS.train_data else 'Test')) print('\tNumber of testing iterations: %d' % FLAGS.test_iter) print('\tOutput path: %s' % FLAGS.output) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) print('\tLog device placement: %d' % FLAGS.log_device_placement) with tf.Graph().as_default(): # Build a Graph that computes the predictions from the inference model. images = tf.placeholder( tf.float32, [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) # Build model decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size hp = resnet.HParams(batch_size=FLAGS.batch_size, num_classes=FLAGS.num_classes, num_residual_units=FLAGS.num_residual_units, k=FLAGS.k, weight_decay=FLAGS.l2_weight, initial_lr=FLAGS.initial_lr, decay_step=decay_step, lr_decay=FLAGS.lr_decay, momentum=FLAGS.momentum) network = resnet.ResNet(hp, images, labels, None) network.build_model() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) if os.path.isdir(FLAGS.ckpt_path): ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path) # Restores from checkpoint if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found in the dir [%s]' % FLAGS.ckpt_path) sys.exit(1) elif os.path.isfile(FLAGS.ckpt_path): print('\tRestore from %s' % FLAGS.ckpt_path) saver.restore(sess, FLAGS.ckpt_path) else: print('No checkpoint file found in the path [%s]' % FLAGS.ckpt_path) sys.exit(1) graph = tf.get_default_graph() block_num = 3 conv_num = 2 old_kernels_to_cluster = [] old_kernels_to_add = [] old_batch_norm = [] for i in range(1, block_num + 1): for j in range(FLAGS.num_residual_units): old_kernels_to_cluster.append(get_kernel(i, j, 1, graph, sess)) old_kernels_to_add.append(get_kernel(i, j, 2, graph, sess)) old_batch_norm.append(get_batch_norm(i, j, 2, graph, sess)) #old_batch_norm = old_batch_norm[1:] #old_batch_norm.append(get_last_batch_norm(graph, sess)) new_params = [] new_width = [ 16, int(16 * FLAGS.new_k), int(32 * FLAGS.new_k), int(64 * FLAGS.new_k) ] for i in range(len(old_batch_norm)): cluster_num = new_width[int(i / 4) + 1] cluster_kernels, cluster_indices = cluster_kernel( old_kernels_to_cluster[i], cluster_num) add_kernels = add_kernel(old_kernels_to_add[i], cluster_indices, cluster_num) cluster_batchs_norm = cluster_batch_norm(old_batch_norm[i], cluster_indices, cluster_num) new_params.append(cluster_kernels) for p in range(BATCH_NORM_PARAM_NUM): new_params.append(cluster_batchs_norm[p]) new_params.append(add_kernels) # save variables init_params = [] new_param_index = 0 for var in tf.global_variables(): update_match = UPDATE_PARAM_REGEX.match(var.name) skip_match = SKIP_PARAM_REGEX.match(var.name) if update_match and not skip_match: print("update {}".format(var.name)) init_params.append((new_params[new_param_index], var.name)) new_param_index += 1 else: print("not update {}".format(var.name)) var_vector = sess.run(var) init_params.append((var_vector, var.name)) #close old graph sess.close() tf.reset_default_graph() # build new graph and eval with tf.Graph().as_default(): # The CIFAR-100 dataset with tf.variable_scope('test_image'): test_images, test_labels = data_input.input_fn( FLAGS.data_dir, FLAGS.batch_size, train_mode=FLAGS.train_data, num_threads=1) # The class labels with open(os.path.join(FLAGS.data_dir, 'fine_label_names.txt')) as fd: classes = [temp.strip() for temp in fd.readlines()] images = tf.placeholder( tf.float32, [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) new_network = resnet.ResNet(hp, images, labels, None, init_params, FLAGS.new_k) new_network.build_model() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Testing! result_ll = [[0, 0] for _ in range(FLAGS.num_classes) ] # Correct/wrong counts for each class test_loss = 0.0, 0.0 for i in range(FLAGS.test_iter): test_images_val, test_labels_val = sess.run( [test_images, test_labels]) preds_val, loss_value, acc_value = sess.run( [new_network.preds, new_network.loss, new_network.acc], feed_dict={ new_network.is_train: False, images: test_images_val, labels: test_labels_val }) test_loss += loss_value for j in range(FLAGS.batch_size): correct = 0 if test_labels_val[j] == preds_val[j] else 1 result_ll[test_labels_val[j] % FLAGS.num_classes][correct] += 1 test_loss /= FLAGS.test_iter # Summary display & output acc_list = [float(r[0]) / float(r[0] + r[1]) for r in result_ll] result_total = np.sum(np.array(result_ll), axis=0) acc_total = float(result_total[0]) / np.sum(result_total) print('Class \t\t\tT\tF\tAcc.') format_str = '%-31s %7d %7d %.5f' for i in range(FLAGS.num_classes): print(format_str % (classes[i], result_ll[i][0], result_ll[i][1], acc_list[i])) print(format_str % ('(Total)', result_total[0], result_total[1], acc_total)) # Output to file(if specified) if FLAGS.output.strip(): with open(FLAGS.output, 'w') as fd: fd.write('Class \t\t\tT\tF\tAcc.\n') format_str = '%-31s %7d %7d %.5f' for i in range(FLAGS.num_classes): t, f = result_ll[i] format_str = '%-31s %7d %7d %.5f\n' fd.write(format_str % (classes[i].replace(' ', '-'), t, f, acc_list[i])) fd.write( format_str % ('(Total)', result_total[0], result_total[1], acc_total))
def train(): def Load(): if (FLAGS.is_Simple or FLAGS.is_Train == False): train_images = np.load("Input/test_data.npy") train_labels = np.load("Input/test_label.npy") else: train_data = np.load("Input/train_data.npy") train_label = np.load("Input/train_label.npy") permutation = np.random.permutation(train_data.shape[0]) train_images = train_data[permutation, :, :, :] train_labels = train_label[permutation, :] test_data = np.load("Input/test_data.npy") test_label = np.load("Input/test_label.npy") mean_data = np.array(np.load("Input/mean_data.npy"), dtype=np.float16) mean_label = np.load("Input/mean_label.npy") std_label = np.load("Input/std_label.npy") return train_images, train_labels, test_data, test_label, mean_data, mean_label, std_label with tf.Graph().as_default(): init_step = 0 global_step = tf.Variable(0, trainable=False, name='global_step') X = tf.placeholder(tf.float32, [None, 224, 224, 3], name='Input_Images') SHAPE = tf.placeholder(tf.float32, [None, 100], name='SHAPE') EXP = tf.placeholder(tf.float32, [None, 79], name='EXP') EULAR = tf.placeholder(tf.float32, [None, 3], name='EULAR') T = tf.placeholder(tf.float32, [None, 2], name='T') S = tf.placeholder(tf.float32, [None], name='S') # Build model hp = resnet.HParams(batch_size=FLAGS.batch_size, num_gpus=FLAGS.num_gpus, num_output=FLAGS.dim_output, weight_decay=FLAGS.l2_weight, momentum=FLAGS.momentum, finetune=FLAGS.finetune) network_train = resnet.ResNet(hp, X, SHAPE, EXP, EULAR, T, S, global_step, name="train") network_train.build_model() network_train.build_train_op() train_summary_op = tf.summary.merge_all() # Summaries(training) print('Number of Weights: %d' % network_train._weights) print('FLOPs: %d' % network_train._flops) # Build an initialization operation to run below. init = tf.global_variables_initializer() print("sess 0") # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), allow_soft_placement=False, # allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) print("sess 1") sess.run(init) print("sess done") # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) if (FLAGS.is_Train == False): checkpoint_dir = FLAGS.train_dir # os.path.join(export_dir, 'checkpoint') checkpoints = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoints and checkpoints.model_checkpoint_path: checkpoints_name = os.path.basename( checkpoints.model_checkpoint_path) saver.restore(sess, os.path.join(checkpoint_dir, checkpoints_name)) print('Load checkpoint %s' % checkpoints_name) init_step = global_step.eval(session=sess) else: checkpoint_dir = FLAGS.train_dir # os.path.join(export_dir, 'checkpoint') checkpoints = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoints and checkpoints.model_checkpoint_path: checkpoints_name = os.path.basename( checkpoints.model_checkpoint_path) saver.restore(sess, os.path.join(checkpoint_dir, checkpoints_name)) print('Load checkpoint %s' % checkpoints_name) init_step = global_step.eval(session=sess) #print('Start from the scratch.') # if not os.path.exists(FLAGS.train_dir): # os.mkdir(FLAGS.train_dir) # summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.train_dir, str(global_step.eval(session=sess))), # sess.graph) # Training! train_images, train_labels, test_data, test_label, mean_data, mean_label, std_label = Load( ) one_epoch_step = int(len(train_labels) / FLAGS.batch_size) train_images = (train_images - mean_data) / 255.0 #train_labels = (train_labels-mean_label)/std_label test_data = (test_data - mean_data) / 255.0 #test_label= (test_label - mean_label)/std_label print("data done") if (FLAGS.is_CustomTest == True): batch_data = (np.load('Input/gx.npy') - mean_data) / 255.0 batch_labels = np.zeros((len(batch_data), 185)) tmp = np.zeros((len(batch_data), 185)) shape_logits, exp_logits, eular_logits, t_logits, s_logits = sess.run( [network_train.shape_logits, network_train.exp_logits, network_train.eular_logits, network_train.t_logits, network_train.s_logits, ], \ feed_dict={network_train.is_train: False, X: batch_data, SHAPE: batch_labels[:, :100], EXP: batch_labels[:, 100:179], EULAR: batch_labels[:, 179:182], T: batch_labels[:, 182:184], S: batch_labels[:, 184]}) tmp[:, 0:100] = np.array(exp_logits) tmp[:, 100:179] = np.array(shape_logits) tmp[:, 179:182] = np.array(eular_logits) tmp[:, 182:184] = np.array(t_logits) tmp[:, 184][:, None] = np.array(s_logits) np.savetxt("tmp/gx.txt", tmp) elif (FLAGS.is_Train == False): max_iteration = int(len(test_label) / FLAGS.batch_size) print("max iteration is " + str(max_iteration)) loss_ = 0 tmp = np.zeros([185]) for i in range(10): print(i) offset = (i * FLAGS.batch_size) % (test_data.shape[0] - FLAGS.batch_size) batch_data = test_data[offset:(offset + FLAGS.batch_size), :, :, :] batch_labels = test_label[offset:(offset + FLAGS.batch_size), :] shape_logits,exp_logits,eular_logits,t_logits,s_logits = sess.run([network_train.shape_logits,network_train.exp_logits, network_train.eular_logits,network_train.t_logits, network_train.s_logits,],\ feed_dict={network_train.is_train: False, X: batch_data, SHAPE: batch_labels[:,:100],EXP:batch_labels[:,100:179], EULAR:batch_labels[:,179:182],T:batch_labels[:,182:184], S:batch_labels[:,184]}) tmp[0:100] = np.array(exp_logits[0, :]) tmp[100:179] = np.array(shape_logits[0, :]) tmp[179:182] = np.array(eular_logits[0, :]) tmp[182:184] = np.array(t_logits[0, :]) tmp[184] = np.array(s_logits[0, :]) #loss_+=loss_value[0] #print("test loss = " +str(loss_/ max_iteration)) np.savetxt("tmp/" + str(i) + ".txt", tmp) fig = np.array((batch_data[0, :, :, :] * 255 + mean_data), dtype=np.uint8) cv2.imwrite("tmp/" + str(i) + ".jpg", fig) else: for step in range(init_step, FLAGS.max_steps): offset = (step * FLAGS.batch_size) % (train_labels.shape[0] - FLAGS.batch_size) batch_data = train_images[offset:(offset + FLAGS.batch_size), :, :, :] batch_labels = train_labels[offset:(offset + FLAGS.batch_size), :] # Train lr_value = get_lr(FLAGS.initial_lr, FLAGS.lr_decay, one_epoch_step, step) start_time = time.time() _, loss_value, shape_loss, exp_loss, eular_loss, t_loss, s_loss, points_loss, geo_loss, pose_loss = sess.run( [ network_train.train_op, network_train.loss, network_train.shape_loss, network_train.exp_loss, network_train.eular_loss, network_train.t_loss, network_train.s_loss, network_train.points_loss, network_train.geo_loss, network_train.pose_loss ], feed_dict={ network_train.is_train: True, network_train.lr: lr_value, X: batch_data, SHAPE: batch_labels[:, :100], EXP: batch_labels[:, 100:179], EULAR: batch_labels[:, 179:182], T: batch_labels[:, 182:184], S: batch_labels[:, 184] }) duration = time.time() - start_time assert not np.isnan(loss_value) # Display & Summary(training) if step % FLAGS.display == 0 or step < 10: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: (Training) step %d, loss=%.4f, lr=%f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, lr_value, examples_per_sec, sec_per_batch)) format_str = ( 'shape_loss=%.4f, exp_loss=%.4f,eular_loss=%.4f,t_loss=%.4f,s_loss=%.4f,points_loss=%.4f,geo_loss=%.4f,pose_loss=%.4f' ) print(format_str % (shape_loss, exp_loss, eular_loss, t_loss, s_loss, points_loss, geo_loss, pose_loss)) elapse = time.time() - start_time time_left = (FLAGS.max_steps - step) * elapse print("\tTime left: %02d:%02d:%02d" % (int(time_left / 3600), int( time_left % 3600 / 60), time_left % 60)) # summary_writer.add_summary(train_summary_str, step) # Save the model checkpoint periodically. if (step > init_step and step % FLAGS.checkpoint_interval == 0) or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') images = [tf.placeholder(tf.float32, [2, 224, 224, 3])] labels = [tf.placeholder(tf.int32, [2])] # Build model print("Build ResNet-18 model") hp = resnet.HParams(batch_size=2, num_gpus=1, num_classes=1000, weight_decay=0.001, momentum=0.9, finetune=False) network_train = resnet.ResNet(hp, images, labels, global_step, name="train") network_train.build_model() print('Number of Weights: %d' % network_train._weights) print('FLOPs: %d' % network_train._flops) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.96), allow_soft_placement=True, log_device_placement=False)) sess.run(init)
n_classes = len(train_yy) if len(args.bst) == n_classes: bs_list = args.bst elif len(args.bst) == 1: bs_list = [args.bst[0]] * n_classes else: raise RuntimeError() train_loaders = [utils.cycle(D.DataLoader(ds, bs, shuffle=True)) \ for ds, bs in zip(train_datasets, bs_list)] if args.model == 'linear': model = th.nn.Linear(train_x.size(1), n_classes) elif args.model == 'mlp': model = mlp.MLP([train_x.size(1), 64, 64, 64, n_classes], th.relu, bn=True) elif args.model == 'resnet': model = resnet.ResNet(18, n_classes)[args.model] else: raise RuntimeError() dev = th.device('cpu') if args.gpu < 0 else th.device('cuda:%d' % args.gpu) model = model.to(dev) params = list(model.parameters()) kwargs = {'params' : params, 'lr' : args.lr, 'weight_decay' : args.wd} opt = {'sgd' : optim.SGD(**kwargs), 'adam' : optim.Adam(amsgrad=True, **kwargs)}[args.opt] metric = getattr(utils, args.metric) if args.tb: path = 'tb/%s' % args.id writer = tb.SummaryWriter(path) train_writer = tb.SummaryWriter(path + '/a') val_writer = tb.SummaryWriter(path + '/b')
def train(): print('[Dataset Configuration]') print('\tCIFAR-100 dir: %s' % FLAGS.data_dir) print('\tNumber of classes: %d' % FLAGS.num_classes) print('\tNumber of test images: %d' % FLAGS.num_test_instance) print('[Network Configuration]') print('\tBatch size: %d' % FLAGS.batch_size) print('\tResidual blocks per group: %d' % FLAGS.num_residual_units) print('\tNetwork width multiplier: %d' % FLAGS.k) print('[Optimization Configuration]') print('\tL2 loss weight: %f' % FLAGS.l2_weight) print('\tThe momentum optimizer: %f' % FLAGS.momentum) print('\tInitial learning rate: %f' % FLAGS.initial_lr) print('\tEpochs per lr step: %f' % FLAGS.lr_step_epoch) print('\tLearning rate decay: %f' % FLAGS.lr_decay) print('[Training Configuration]') print('\tTrain dir: %s' % FLAGS.train_dir) print('\tTraining max steps: %d' % FLAGS.max_steps) print('\tSteps per displaying info: %d' % FLAGS.display) print('\tSteps per testing: %d' % FLAGS.test_interval) print('\tSteps during testing: %d' % FLAGS.test_iter) print('\tSteps per saving checkpoints: %d' % FLAGS.checkpoint_interval) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) print('\tLog device placement: %d' % FLAGS.log_device_placement) with tf.Graph().as_default(): # Build a Graph that computes the predictions from the inference model. images = tf.placeholder( tf.float32, [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) # Build model decay_step = FLAGS.lr_step_epoch * FLAGS.num_train_instance / FLAGS.batch_size hp = resnet.HParams(batch_size=FLAGS.batch_size, num_classes=FLAGS.num_classes, num_residual_units=FLAGS.num_residual_units, k=FLAGS.k, weight_decay=FLAGS.l2_weight, initial_lr=FLAGS.initial_lr, decay_step=decay_step, lr_decay=FLAGS.lr_decay, momentum=FLAGS.momentum) network = resnet.ResNet(hp, images, labels, None) network.build_model() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) if os.path.isdir(FLAGS.ckpt_path): ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_path) # Restores from checkpoint if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found in the dir [%s]' % FLAGS.ckpt_path) sys.exit(1) elif os.path.isfile(FLAGS.ckpt_path): print('\tRestore from %s' % FLAGS.ckpt_path) saver.restore(sess, FLAGS.ckpt_path) else: print('No checkpoint file found in the path [%s]' % FLAGS.ckpt_path) sys.exit(1) graph = tf.get_default_graph() block_num = 3 conv_num = 2 old_kernels_to_cluster = [] old_kernels_to_add = [] old_batch_norm = [] for i in range(1, block_num + 1): for j in range(FLAGS.num_residual_units): old_kernels_to_cluster.append(get_kernel(i, j, 1, graph, sess)) old_kernels_to_add.append(get_kernel(i, j, 2, graph, sess)) old_batch_norm.append(get_batch_norm(i, j, 2, graph, sess)) #old_batch_norm = old_batch_norm[1:] #old_batch_norm.append(get_last_batch_norm(graph, sess)) new_params = [] new_width = [ 16, int(16 * FLAGS.new_k), int(32 * FLAGS.new_k), int(64 * FLAGS.new_k) ] for i in range(len(old_batch_norm)): cluster_num = new_width[int(i / 4) + 1] cluster_kernels, cluster_indices = cluster_kernel( old_kernels_to_cluster[i], cluster_num) add_kernels = add_kernel(old_kernels_to_add[i], cluster_indices, cluster_num) cluster_batchs_norm = cluster_batch_norm(old_batch_norm[i], cluster_indices, cluster_num) new_params.append(cluster_kernels) for p in range(BATCH_NORM_PARAM_NUM): new_params.append(cluster_batchs_norm[p]) new_params.append(add_kernels) # save variables init_params = [] new_param_index = 0 for var in tf.global_variables(): update_match = UPDATE_PARAM_REGEX.match(var.name) skip_match = SKIP_PARAM_REGEX.match(var.name) if update_match and not skip_match: print("update {}".format(var.name)) init_params.append((new_params[new_param_index], var.name)) new_param_index += 1 else: print("not update {}".format(var.name)) var_vector = sess.run(var) init_params.append((var_vector, var.name)) #close old graph sess.close() tf.reset_default_graph() # build new graph and eval with tf.Graph().as_default(): init_step = 0 global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels of CIFAR-100 with tf.variable_scope('train_image'): train_images, train_labels = data_input.input_fn(FLAGS.data_dir, FLAGS.batch_size, train_mode=True) with tf.variable_scope('test_image'): test_images, test_labels = data_input.input_fn(FLAGS.data_dir, FLAGS.batch_size, train_mode=False) # The class labels with open(os.path.join(FLAGS.data_dir, 'fine_label_names.txt')) as fd: classes = [temp.strip() for temp in fd.readlines()] images = tf.placeholder( tf.float32, [FLAGS.batch_size, data_input.HEIGHT, data_input.WIDTH, 3]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) new_network = resnet.ResNet(hp, images, labels, global_step, init_params, FLAGS.new_k) new_network.build_model() new_network.build_train_op() train_summary_op = tf.summary.merge_all() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) init_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found. Start from the scratch.') sys.stdout.flush() # Start queue runners & summary_writer tf.train.start_queue_runners(sess=sess) if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) # Training! test_best_acc = 0.0 for step in range(init_step, FLAGS.max_steps): # Test if step % FLAGS.test_interval == 0: test_loss, test_acc = 0.0, 0.0 for i in range(FLAGS.test_iter): test_images_val, test_labels_val = sess.run( [test_images, test_labels]) loss_value, acc_value = sess.run( [new_network.loss, new_network.acc], feed_dict={ new_network.is_train: False, images: test_images_val, labels: test_labels_val }) test_loss += loss_value test_acc += acc_value test_loss /= FLAGS.test_iter test_acc /= FLAGS.test_iter test_best_acc = max(test_best_acc, test_acc) format_str = ('%s: (Test) step %d, loss=%.4f, acc=%.4f') print(format_str % (datetime.now(), step, test_loss, test_acc)) sys.stdout.flush() test_summary = tf.Summary() test_summary.value.add(tag='test/loss', simple_value=test_loss) test_summary.value.add(tag='test/acc', simple_value=test_acc) test_summary.value.add(tag='test/best_acc', simple_value=test_best_acc) summary_writer.add_summary(test_summary, step) summary_writer.flush() # Train start_time = time.time() train_images_val, train_labels_val = sess.run( [train_images, train_labels]) _, lr_value, loss_value, acc_value, train_summary_str = \ sess.run([new_network.train_op, new_network.lr, new_network.loss, new_network.acc, train_summary_op], feed_dict={new_network.is_train:True, images:train_images_val, labels:train_labels_val}) duration = time.time() - start_time assert not np.isnan(loss_value) # Display & Summary(training) if step % FLAGS.display == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: (Training) step %d, loss=%.4f, acc=%.4f, lr=%f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, lr_value, examples_per_sec, sec_per_batch)) sys.stdout.flush() summary_writer.add_summary(train_summary_str, step) # Save the model checkpoint periodically. if (step > init_step and step % FLAGS.checkpoint_interval == 0) or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def eval(root_image, root_model): print("\nCurrent test image path ==> ", root_image) print("Current model ==> ", root_model) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # resnet50 : [3, 4, 6, 3] # resnet101 : [3, 4, 23, 3] # resnet152 : [3, 8, 36, 3] model = resnet.ResNet(resnet.Bottleneck, [3, 8, 36, 3]) model.load_state_dict(torch.load(root_model)) model.to(device) transforms_test = transforms.Compose([ # transforms.Pad(4), # transforms.RandomHorizontalFlip(), # transforms.RandomCrop(10), transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # test loader testset = torchvision.datasets.ImageFolder(root=root_image, transform=transforms_test) test_loader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=False, num_workers=4) class_correct = list(0. for i in range(67)) class_total = list(0. for i in range(67)) classes = os.listdir("./data/rename_headline_piap/") classes.sort() class_count = len(classes) model.eval() with torch.no_grad(): total_corr = 0. correct = 0. total = 0. for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) # best percentage _, predicted = torch.max(outputs.data, 1) total += labels.size(0) total_corr += (predicted == labels).sum().item() correct = (predicted == labels).squeeze() for i in range(len(labels)): label = labels[i] class_correct[label] += correct[i].item() class_total[label] += 1 for i in range(class_count): ### For euc-kr decoding # unhexlify(classes[i]).decode('euc-kr') # print('Accuracy of %s : %2d %%' %(unhexlify(classes[i]).decode('euc-kr')[0], 100*class_correct[i]/class_total[i])) print('Accuracy of %s ==> %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i])) print("Accuracy of the network ======> %2d %%" % (100 * total_corr / total))
def train(): print('[Dataset Configuration]') print('\tImageNet test root: %s' % FLAGS.test_image_root) print('\tImageNet test list: %s' % FLAGS.test_dataset) print('\tNumber of classes: %d' % FLAGS.num_classes) print('\tNumber of test images: %d' % FLAGS.num_test_instance) print('[Network Configuration]') print('\tBatch size: %d' % FLAGS.batch_size) print('\tCheckpoint file: %s' % FLAGS.checkpoint) print('[Optimization Configuration]') print('\tL2 loss weight: %f' % FLAGS.l2_weight) print('\tThe momentum optimizer: %f' % FLAGS.momentum) print('\tInitial learning rate: %f' % FLAGS.initial_lr) print('\tEpochs per lr step: %s' % FLAGS.lr_step_epoch) print('\tLearning rate decay: %f' % FLAGS.lr_decay) print('[Evaluation Configuration]') print('\tOutput file path: %s' % FLAGS.output_file) print('\tTest iterations: %d' % FLAGS.test_iter) print('\tSteps per displaying info: %d' % FLAGS.display) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) print('\tLog device placement: %d' % FLAGS.log_device_placement) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels of ImageNet print('Load ImageNet dataset') with tf.device('/cpu:0'): print('\tLoading test data from %s' % FLAGS.test_dataset) with tf.variable_scope('test_image'): test_images, test_labels = data_input.inputs( FLAGS.test_image_root, FLAGS.test_dataset, FLAGS.batch_size, False, num_threads=1, center_crop=True) # Build a Graph that computes the predictions from the inference model. images = tf.placeholder(tf.float32, [ FLAGS.batch_size, data_input.IMAGE_HEIGHT, data_input.IMAGE_WIDTH, 3 ]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size]) # Build model with tf.device('/GPU:0'): hp = resnet.HParams(batch_size=FLAGS.batch_size, num_classes=FLAGS.num_classes, weight_decay=FLAGS.l2_weight, momentum=FLAGS.momentum, finetune=FLAGS.finetune) network = resnet.ResNet(hp, images, labels, global_step) network.build_model() print('\tNumber of Weights: %d' % network._weights) print('\tFLOPs: %d' % network._flops) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction), allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) '''debugging attempt from tensorflow.python import debug as tf_debug sess = tf_debug.LocalCLIDebugWrapperSession(sess) def _get_data(datum, tensor): return tensor == train_images sess.add_tensor_filter("get_data", _get_data) ''' sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) if FLAGS.checkpoint is not None: saver.restore(sess, FLAGS.checkpoint) print('Load checkpoint %s' % FLAGS.checkpoint) else: print( 'No checkpoint file of basemodel found. Start from the scratch.' ) # Start queue runners & summary_writer tf.train.start_queue_runners(sess=sess) # Test! test_loss = 0.0 test_acc = 0.0 test_time = 0.0 confusion_matrix = np.zeros((FLAGS.num_classes, FLAGS.num_classes), dtype=np.int32) for i in range(FLAGS.test_iter): test_images_val, test_labels_val = sess.run( [test_images, test_labels]) start_time = time.time() loss_value, acc_value, pred_value = sess.run( [network.loss, network.acc, network.preds], feed_dict={ network.is_train: False, images: test_images_val, labels: test_labels_val }) duration = time.time() - start_time test_loss += loss_value test_acc += acc_value test_time += duration for l, p in zip(test_labels_val, pred_value): confusion_matrix[l, p] += 1 if i % FLAGS.display == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: iter %d, loss=%.4f, acc=%.4f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), i, loss_value, acc_value, examples_per_sec, sec_per_batch)) test_loss /= FLAGS.test_iter test_acc /= FLAGS.test_iter # Print and save results sec_per_image = test_time / FLAGS.test_iter / FLAGS.batch_size print('Done! Acc: %.6f, Test time: %.3f sec, %.7f sec/example' % (test_acc, test_time, sec_per_image)) print('Saving result... ') result = { 'accuracy': test_acc, 'confusion_matrix': confusion_matrix, 'test_time': test_time, 'sec_per_image': sec_per_image } with open(FLAGS.output_file, 'wb') as fd: pickle.dump(result, fd) print('done!')