def GenerateSolverPrototxt(self): '''Generates the solver prototxt file for training in Caffe.''' solver = caffe_pb2.SolverParameter() # net files solver.train_net = self.caffeTrainModelFileName solver.test_net.append(self.caffeTestModelFileName) # solver solver.type = "SGD" solver.solver_mode = caffe_pb2.SolverParameter.GPU # intervals solver.test_iter.append(100) solver.test_interval = 100000 solver.snapshot = 3000 solver.snapshot_prefix = self.caffeWeightsFilePrefix[:-6] # learning rate solver.lr_policy = "step" solver.base_lr = 0.00025 solver.gamma = 0.5 solver.stepsize = 1000 solver.display = 100 solver.max_iter = 450000 solver.momentum = 0.9 solver.weight_decay = 0.0005 # write to file with open(self.caffeSolverFileName, 'w') as solverFile: solverFile.write(str(solver))
def make_solver(niter=20000, lr = 0.1): s = caffe_pb2.SolverParameter() s.random_seed = 0xCAFFE s.train_net = 'examples/python_stoch_dep/residual_train.prototxt' s.test_net.append('examples/python_stoch_dep/residual_test.prototxt') s.test_interval = 10000 s.test_iter.append(100) s.max_iter = niter s.type = 'Nesterov' s.base_lr = lr s.momentum = 0.9 s.weight_decay = 1e-4 s.lr_policy='multistep' s.gamma = 0.1 s.stepvalue.append(int(0.5 * s.max_iter)) s.stepvalue.append(int(0.75 * s.max_iter)) s.solver_mode = caffe_pb2.SolverParameter.GPU solver_path = 'examples/resnet_cifar/solver.prototxt' with open(solver_path, 'w') as f: f.write(str(s))
def write_solver_prototxt(template, train_prototxt, maxiter, path='../prototxt/solver/redeye/'): model_name, proto_name = [], [] for prototxt in train_prototxt: solver = caffe_pb2.SolverParameter() proto.Merge((open(template).read()), solver) solver.snapshot_prefix = os.path.join( '../models/googlenet/snapshots/', os.path.basename(prototxt).split('.')[0]) solver.max_iter = maxiter solver.snapshot = maxiter model_name.append(os.path.join('../models/googlenet/snapshots/', os.path.basename(prototxt).split('.')[0] + \ "_iter_" + str(maxiter) + ".caffemodel")) solver.net = prototxt solver.test_interval = 50000 new_solver = proto.MessageToString(solver) file_name = os.path.join(path, 'goog_solver_' + \ os.path.basename(prototxt).split('.')[0] + '.prototxt') proto_name.append(file_name) with open(file_name, 'w') as new_proto: new_proto.write(new_solver) return proto_name, model_name
def __init__(self, net_name, cfg, net_param): self._cfg = cfg self._solver = caffe_pb2.SolverParameter() self._solver.net_param.MergeFrom(net_param) self._solver.iter_size = cfg.IMS_PER_BATCH self._solver.base_lr = cfg.BASE_LR self._solver.lr_policy = cfg.LR_POLICY.TYPE if cfg.LR_POLICY.TYPE == "step": self._solver.gamma = cfg.LR_POLICY.GAMMA elif cfg.LR_POLICY.TYPE == "multistep": self._solver.stepvalue.extend(cfg.LR_POLICY.STEPS) self._solver.gamma = cfg.LR_POLICY.GAMMA elif cfg.LR_POLICY == 'fixed': pass else: raise NotImplementedError(cfg.LR_POLICY.TYPE) self._solver.momentum = cfg.MOMENTUM self._solver.weight_decay = cfg.WEIGHT_DECAY self._solver.display = cfg.DISPLAY.PERIOD self._solver.average_loss = cfg.DISPLAY.AVERAGE_LOSS self._solver.snapshot = 0 self._solver.snapshot_prefix = net_name self._solver_file = mkstemp() print('Created solver path:', self._solver_file[1]) with open(self._solver_file[1], 'w') as f: f.write(str(self._solver))
def create_caffe_solver(self, snapshot_prefix): s = caffe_pb2.SolverParameter() s.train_net = self.train_proto if self.test_proto is not None: s.test_net.append(self.test_proto) # Test every 'test_interval' iterations. s.test_interval = self.pars['test_interval'] # Batch size to test. IMPORTANT! test_iter * test_proto.batch_size should equal your test data. s.test_iter.append(self.pars['test_iter']) # Learning rate. s.base_lr = self.pars['base_learning_rate'] s.lr_policy = 'step' s.gamma = 0.1 # CJB: whats this? Should we create a hyper parameter for it? s.stepsize = self.pars['step_size'] s.max_iter = self.pars['max_iter'] s.display = self.pars['train_interval'] s.momentum = self.pars['learning_momentum'] s.weight_decay = self.pars['weight_decay'] s.snapshot = self.pars['snapshot'] s.snapshot_prefix = snapshot_prefix s.solver_mode = caffe_pb2.SolverParameter.GPU s.random_seed = 333 return s
def make_solver(): s = caffe_pb2.SolverParameter() s.random_seed = 0xCAFFE s.type = 'SGD' s.display = 5 s.base_lr = 1e-1 s.lr_policy = "step" s.gamma = 0.5 s.momentum = 0.9 s.stepsize = 10000 s.max_iter = maxIter s.snapshot = 5000 snapshot_prefix = join(dirname(__file__), 'model') if not isdir(snapshot_prefix): os.makedirs(snapshot_prefix) s.snapshot_prefix = join(snapshot_prefix, args.data + '-Ratio' + str(args.ratio)) s.train_net = join( tmp_dir, args.data + '-train-ratio' + str(args.ratio) + '.prototxt') s.test_net.append( join(tmp_dir, args.data + '-test-ratio' + str(args.ratio) + '.prototxt')) s.test_interval = maxIter + 1 # will test mannualy s.test_iter.append(test_iter) s.test_initialization = False # s.debug_info = True return s
def get_solver_pt(train_data_file): solver_config = cfg.SOLVER max_epoch = solver_config.max_epoch with open(train_data_file, 'r') as f: train_data_num = len(f.readlines()) batch_size = cfg.TRAIN.IMS_PER_BATCH max_iter = int(max_epoch * train_data_num / batch_size) stepsize = int(solver_config.step_epoch * train_data_num / batch_size) solver_param = { # Train parameters 'train_net': "train.prototxt", 'base_lr': solver_config.base_lr, 'weight_decay': 0.0005, 'lr_policy': "step", 'stepsize': stepsize, 'gamma': 0.1, 'momentum': 0.9, 'iter_size': 1, 'max_iter': max_iter, 'snapshot': solver_config.snapshot, 'display': 20, 'average_loss': 10, 'type': "SGD", 'snapshot_prefix': "models_solverstates/", 'snapshot_after_train': True, } solver = caffe_pb2.SolverParameter(**solver_param) with open("solver.prototxt", 'w') as f: f.write(str(solver))
def __init__(self, solver_prototxt, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.output_dir = output_dir # print "ROIDB: {}".format(str(roidb)) if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS): # RPN can only use precomputed normalization because there are no # fixed statistics to compute a priori assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED if cfg.TRAIN.BBOX_REG: print 'Computing bounding-box regression targets...' self.bbox_means, self.bbox_stds = \ rdl_roidb.add_bbox_regression_targets(roidb) print 'done' self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) self.solver.net.layers[0].set_roidb(roidb)
def __init__(self, solver_prototxt, roidb, output_dir, pretrained_model=None, snapshot=None): """Initialize the SolverWrapper.""" self.output_dir = output_dir assert (cfg.TRAIN.HAS_RPN \ and cfg.TRAIN.BBOX_REG \ and cfg.TRAIN.BBOX_NORMALIZE_TARGETS \ and cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED) self.solver = caffe.SGDSolver(solver_prototxt) if snapshot is not None: # restore from snapshot print('Restoring from {:s}').format(snapshot) self.solver.restore(snapshot) elif pretrained_model is not None: # copy pretrained weights print('Loading pretrained model ' 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: text_format.Merge(f.read(), self.solver_param) self.solver.net.layers[0].set_roidb(roidb)
def Solver(model_name, trainModel, testModel, total_samples, train_batch_size, epochs, outputDir=None): s = caffe_pb2.SolverParameter() s.solver_mode: GPU s.random_seed = 0xCAFFE # Specify locations of the train and (maybe) test networks. s.train_net = trainModel #s.max_iter = 50 # no. of times to update the net (training iterations) s.test_net.append(testModel) s.test_interval = 500 # Test after every 500 training iterations. s.test_iter.append(300) # Test on 10 batches each time we test. # EDIT HERE to try different solvers # solver types include "SGD", "Adam", and "Nesterov" among others. s.type = "Adam" # Set the initial learning rate for SGD. s.base_lr = 0.00001 # EDIT HERE to try different learning rates #current best shot: 0.001 # Set momentum to accelerate learning by # taking weighted average of current and previous updates. s.momentum = 0.90 # Set weight decay to regularize and prevent overfitting s.weight_decay = 5e-4 # Set `lr_policy` to define how the learning rate changes during training. # This is the same policy as our default LeNet. s.lr_policy = 'inv' s.gamma = 0.0001 #current best shot: 0.00001 s.power = 0.75 # EDIT HERE to try the fixed rate (and compare with adaptive solvers) # `fixed` is the simplest policy that keeps the learning rate constant. #s.lr_policy = 'fixed' # Display the current training loss and accuracy every 1000 iterations. s.display = 1000 # Snapshots are files used to store networks we've trained. # We'll snapshot every 5K iterations -- twice during training. s.snapshot = epochs * (total_samples // train_batch_size) - 1 s.snapshot_prefix = model_name # Train on the GPU #s.solver_mode = caffe_pb2.SolverParameter.CPU # Write the solver to a temporary file and return its filename. output_dir = os.getcwd() if outputDir: output_dir = outputDir solver_path = os.path.join(output_dir, 'solver.prototxt') with open(solver_path, 'w') as f: f.write(str(s)) return solver_path
def make_solver(batch_size, epoch_mult, train_sam, test_sam): epoch = int(train_sam / batch_size) + 1 max_iter = epoch * epoch_mult test_iter = int(test_sam / batch_size) + 1 test_interval = epoch s = caffe_pb2.SolverParameter() s.random_seed = 0xCAFFE s.train_net = train_dir s.test_net.append(test_dir) s.test_interval = test_interval s.test_iter.append(test_iter) s.max_iter = max_iter s.type = 'Nesterov' s.display = int(epoch / 5) s.base_lr = 0.1 s.momentum = 0.9 s.weight_decay = 0.0005 s.lr_policy = 'multistep' s.gamma = 0.1 s.stepvalue.append(int(0.5 * s.max_iter)) s.stepvalue.append(int(0.75 * s.max_iter)) s.stepvalue.append(int(0.9 * s.max_iter)) s.solver_mode = caffe_pb2.SolverParameter.GPU s.snapshot = 5000 s.snapshot_prefix = './snap/cifar100_dense40_v2' print(s) with open(solver_dir, 'w') as f: f.write(str(s))
def make_solver(batch_size, train_sam, test_sam, epoch_mult): epoch = int(train_sam / batch_size) + 1 max_iter = epoch * epoch_mult test_iter = int(test_sam / batch_size) + 1 test_interval = epoch s = caffe_pb2.SolverParameter() s.train_net = train_dir s.test_net.append(test_dir) s.test_interval = test_interval s.test_iter.append(test_iter) s.max_iter = max_iter s.type = 'Nesterov' s.display = int(epoch / 5) # oscillation if lr is excessive, overfitting if lr is too small s.base_lr = 0.1 s.momentum = 0.9 s.weight_decay = 0.0001 s.lr_policy = 'multistep' s.gamma = 0.1 s.stepvalue.append(int(0.5 * s.max_iter)) s.stepvalue.append(int(0.75 * s.max_iter)) s.stepvalue.append(int(0.9 * s.max_iter)) s.solver_mode = caffe_pb2.SolverParameter.GPU s.snapshot = 5000 s.snapshot_prefix = snapshot_prefix print(s) with open(solver_dir, 'w') as f: f.write(str(s))
def solver(self, params): # set parameters of the solver s = caffe_pb2.SolverParameter() # Specify locations of the network s.net = params['path2train_net'] # The number of iterations over which to average the gradient. # s.iter_size = 1 s.max_iter = 1 # use SGD algorithm s.type = 'SGD' # Set learning rate policy s.lr_policy = 'step' s.gamma = 0.5 s.stepsize = 50 s.base_lr = 0.0001 # Set SGD hyperparameters s.momentum = 0.9 s.weight_decay = 5e-4 # Train on the CPU or GPU if params['useGPU']: s.solver_mode = caffe_pb2.SolverParameter.GPU s.device_id = params['DEVICE_ID'] else: s.solver_mode = caffe_pb2.SolverParameter.CPU f = open(params['path2solver'], 'w') f.write(str(s)) f.close()
def make_solver(train_net_path, solver_path, snapshot_path, opt, dataset_size): s = caffe_pb2.SolverParameter() # specify locations of the train and test networks. s.train_net = train_net_path s.max_iter = int(opt.num_epoch * dataset_size / opt.train_batch_size) # specify parameters for learning policy s.base_lr = opt.base_lr s.lr_policy = opt.lr_policy if s.lr_policy == 'step': s.gamma = opt.gamma s.stepsize = opt.stepsize s.type = "Adam" s.momentum = 0.9 s.weight_decay = 5e-4 s.iter_size = 1 # no gradient accumulation # specify other helper parameters s.display = 20 s.snapshot = 2500 s.snapshot_prefix = snapshot_path s.solver_mode = caffe_pb2.SolverParameter.GPU print "Writing prototxt file for solver..." with open(solver_path, 'w') as f: f.write(str(s))
def make_solver(): s = caffe_pb2.SolverParameter() s.random_seed = 0xCAFFE s.train_net = 'train_densenet.prototxt' s.test_net.append('test_densenet.prototxt') s.test_interval = 800 s.test_iter.append(200) s.max_iter = 230000 s.type = 'Nesterov' s.display = 1 s.base_lr = 0.1 s.momentum = 0.9 s.weight_decay = 1e-4 s.lr_policy = 'multistep' s.gamma = 0.1 s.stepvalue.append(int(0.5 * s.max_iter)) s.stepvalue.append(int(0.75 * s.max_iter)) s.solver_mode = caffe_pb2.SolverParameter.GPU solver_path = 'solver.prototxt' with open(solver_path, 'w') as f: f.write(str(s))
def create_solver_proto(train_net, test_net, lr, prefix, test_iter=300, test_interval=10000, max_iter=2e6, snapshot=100000, gpu=0, debug_info=False): solver = PB.SolverParameter() solver.train_net = train_net solver.test_net.extend([test_net]) solver.test_iter.extend([test_iter]) solver.test_interval = test_interval solver.display = 1000 solver.max_iter = max_iter solver.snapshot = snapshot solver.snapshot_prefix = prefix solver.snapshot_format = PB.SolverParameter.HDF5 solver.solver_mode = PB.SolverParameter.GPU solver.solver_type = PB.SolverParameter.ADAM solver.base_lr = lr solver.lr_policy = "fixed" solver.average_loss = 10000 solver.momentum = 0.9 solver.momentum2 = 0.999 solver.delta = 1e-08 solver.debug_info = debug_info return solver
def create_solver_step(net_name): s = caffe_pb2.SolverParameter() s.net = "{0}_train_test.prototxt".format(net_name) s.test_interval = 500 s.test_iter.append(100) s.base_lr = 0.01 s.momentum = 0.9 s.weight_decay = 0.0005 s.lr_policy = "step" s.gamma = 0.96 s.stepsize = 5000 s.display = 100 s.max_iter = 20000 s.snapshot = 1000 s.snapshot_prefix = "{0}{1}".format(snapshot_dir, net_name) s.type = "SGD" s.solver_mode = caffe_pb2.SolverParameter.GPU filename = "{0}_solver.prototxt".format(net_name) with open(filename, "w") as f: f.write(str(s))
def make_solver(snapshot_dir, train_net_path, test_net_path, base_lr=0.0001, boost=1): s = caffe_pb2.SolverParameter() # Specify locations of the train and (maybe) test networks. s.train_net = train_net_path if test_net_path != '': s.test_initialization = False s.test_net.append(test_net_path) # Don't use caffe testing, we write our own tests in training script s.test_interval = 99999999 s.test_iter.append(100000000) # The number of iterations over which to average the gradient. # Effectively boosts the training batch size by the given factor, without # affecting memory utilization. s.iter_size = boost s.solver_type = caffe_pb2.SolverParameter.SGD s.base_lr = base_lr s.momentum = 0.9 s.weight_decay = 5e-4 s.display = 100 # display training loss every 100 iters s.solver_mode = caffe_pb2.SolverParameter.GPU s.debug_info = False # Write the solver to a temporary file and return its filename. with tempfile.NamedTemporaryFile(delete=False) as f: f.write(str(s)) return f.name
def __init__(self, solver_prototxt, roidb, output_dir, pretrained_model=None): """Initialize the SolverWrapper.""" self.output_dir = output_dir print 'Computing bounding-box regression targets...' if cfg.TRAIN.BBOX_REG: if cfg.IS_RPN: self.bbox_means, self.bbox_stds = gdl_roidb.add_bbox_regression_targets( roidb) else: self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( roidb) print 'done' self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) self.solver.net.layers[0].set_roidb(roidb)
def __init__(self, config): # "Use Caffe as self." # caffe constructor: network_file, phase, level, stages, weight, engine topology_path = os.path.expanduser(str(config.model.topology)) if (hasattr(config.backend, 'engine')) and (config.backend.engine != "default"): engine = str(config.backend.engine) else: engine = 'CAFFE' if hasattr(config.model, 'weight'): logger.debug("loading weights from: {}".format( config.model.weight)) weight_path = os.path.expanduser(str(config.model.weight)) else: weight_path = None if config.model.type == "test": phase = caffe.TEST else: phase = caffe.TRAIN caffe.set_mode_cpu() caffe.set_random_seed(0) if hasattr(config, 'batch_size') and config.model.prototxt_type == 'train_val': topology_path = self.reshape_in_train_val( topology_path, config.batch_size, \ config.out_dir,) if config.model.prototxt_type == 'solver': logger.debug("using engine: {}".format(engine)) modified_solver_path = os.path.join(str(config.out_dir), 'modified_solver.prototxt') if not os.path.exists(os.path.dirname(modified_solver_path)): os.makedirs(os.path.dirname(modified_solver_path)) solver_params = caffe_pb2.SolverParameter() with open(config.model.topology) as f: s = f.read() txtf.Merge(s, solver_params) solver_params.engine = engine if hasattr(config, 'batch_size'): solver_params.net = self.reshape_in_train_val( str(solver_params.net), \ config.batch_size, config.out_dir) with open(modified_solver_path, 'w') as fp: fp.write(str(solver_params)) self.solver = caffe.get_solver(modified_solver_path) self.net = self.solver.net if weight_path != None: self.net.copy_from(weight_path) else: try: logger.debug("using engine: {}".format(engine)) self.net = caffe.Net(topology_path, phase, weights=weight_path, engine=engine) except: self.net = caffe.Net(topology_path, phase, weights=weight_path)
def solver_deploy(train_net_path, test_net_path=None, solver_path=None, base_lr=0.001): s = caffe_pb2.SolverParameter() # Specify locations of the train and (maybe) test networks. s.train_net = train_net_path if test_net_path is not None: s.test_net.append(test_net_path) s.test_interval = 10000 # Test after every 1000 training iterations s.test_iter.append(16) # Test on 100 batches each time we test # The number of iterations over which to average the gradient # Effectively boosts the training batch size by the given factor, without # affecting memory utilization s.iter_size = 8 s.max_iter = 10000 # of times to update the net (training iterations) # Solve using the stochastic gradient decent (SGD) algorithm. # Other choices include 'Adam' adn 'RMSProp' s.type = 'SGD' # Set the initial learning rate for SGD s.base_lr = base_lr # Set 'lr_policy' to define how the learning rate changes during traing. # Here, we 'step' the learning rate by mulitplying it by a factor 'gamma' # every 'stepsize' iteration s.lr_policy = 'step' s.gamma = 0.1 s.stepsize = 2000 # Set other SGD hyperparameters. Setting a nonzero 'momentum' takes a # weighted average of the current gradient and previous gradients to make # learning more stable. L2 weight decay regularizes learning, to help prevent # the model from overfitting s.momentum = 0.9 s.weight_decay = 5e-4 # Display the current training loss and accuracy every 1000 iterations s.display = 1000 # Snapshots are files used to store networks we've trained. Here, we'll # snapshot every 10K iterations -- ten times during training. s.snapshot = 1000 s.snapshot_prefix = '/home/xingyunyang/Documents/workspace/Multi-Attention-CNN/experiment/train_bird_fix_cls' # Train on the GPU. s.solver_mode = caffe_pb2.SolverParameter.GPU # Write the solver to a temporary file and return its filename if solver_path is None: with tempfile.NamedTemporaryFile(delete=False) as f: f.write(str(s)) return f.name else: with open(solver_path, 'w') as f: f.write(str(s)) return solver_path
def solver(train_net_path, test_net_path=None, base_lr=0.001): s = caffe_pb2.SolverParameter() # Specify locations of the train and (maybe) test networks. s.train_net = train_net_path if test_net_path is not None: s.test_net.append(test_net_path) s.test_interval = 1000 # Test after every 1000 training iterations. s.test_iter.append(100) # Test on 100 batches each time we test. # The number of iterations over which to average the gradient. # Effectively boosts the training batch size by the given factor, without # affecting memory utilization. s.iter_size = 1 s.max_iter = 100000 # # of times to update the net (training iterations) # Solve using the stochastic gradient descent (SGD) algorithm. # Other choices include 'Adam' and 'RMSProp'. s.type = 'SGD' # Set the initial learning rate for SGD. s.base_lr = base_lr # Set `lr_policy` to define how the learning rate changes during training. # Here, we 'step' the learning rate by multiplying it by a factor `gamma` # every `stepsize` iterations. s.lr_policy = 'step' s.gamma = 0.1 s.stepsize = 20000 # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a # weighted average of the current gradient and previous gradients to make # learning more stable. L2 weight decay regularizes learning, to help prevent # the model from overfitting. s.momentum = 0.9 s.weight_decay = 5e-4 # Display the current training loss and accuracy every 1000 iterations. s.display = 1000 # Snapshots are files used to store networks we've trained. Here, we'll # snapshot every 10K iterations -- ten times during training. s.snapshot = 10000 s.snapshot_prefix = caffe_root + 'models/finetune_flickr_style/finetune_flickr_style' # Train on the GPU. Using the CPU to train large networks is very slow. s.solver_mode = caffe_pb2.SolverParameter.GPU # Write the solver to a temporary file and return its filename. f = tempfile.NamedTemporaryFile(delete=False) filename = f.name f.close() print('filename: ' + filename) with open(filename, 'w') as f: #with tempfile.NamedTemporaryFile(delete=False) as f: f.write(str(s)) return f.name
def __init__(self, solver_prototxt, output_dir, imdb, pretrained_model=None): self.output_dir = output_dir self.solver = caffe.SGDSolver(solver_prototxt) self.solver_param = caffe_pb2.SolverParameter() self.imdb = imdb with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) # TODO:是否有必要? self.solver.net.layers[0].prepare_imdb(imdb) # TODO: 有待实现。设定网络第一层的输入blob
def finetune(self, train_set_path, validation_set_path): # Setting the right paths for training (finetuning) self.__set_data_paths(train_set_path, validation_set_path) # Get useful values from solver file solver_config = caffe_pb2.SolverParameter() with open(self.SOLVER) as f: text_format.Merge(str(f.read()), solver_config) max_iter = solver_config.max_iter test_iter = solver_config.test_iter # 128 images on each batch/iteration test_interval = solver_config.test_interval # Create the actual solver solver = caffe.SGDSolver(self.SOLVER) solver.net.copy_from(self.PRETRAINED) train_loss = np.zeros(max_iter) test_loss = np.zeros(max_iter / test_interval) accuracies = np.zeros(max_iter / test_interval) test_i = 0 try: for it in xrange(max_iter): solver.step(1) train_loss[it] = solver.net.blobs['loss'].data if it % 50 == 0: print 'Iteration %d, Finetune loss=%f' % (it, train_loss[it]) if it % test_interval == 0: # test net test_loss_it = 0 test_accuracy = 0 for j in xrange(test_iter): solver.test_nets[0].forward() test_loss_it += solver.test_nets[0].blobs['loss'].data test_accuracy += solver.test_nets[0].blobs[ 'accuracy'].data test_loss[test_i] = test_loss_it / test_iter accuracies[test_i] = test_accuracy / test_iter print 'Iteration %d, Test loss=%f, Accuracy=%f' % ( it, test_loss[test_i], accuracies[test_i]) test_i += 1 # save training stats with open('train_state.npz', 'wb') as f: np.savez(f, train_loss=train_loss, test_loss=test_loss, accuracy=accuracies) finally: with open('train_state.npz', 'wb') as f: np.savez(f, train_loss=train_loss, test_loss=test_loss, accuracy=accuracies)
def __init__(self, solver_prototxt, output_dir, pretrained_model=None): self.output_dir = output_dir self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print(('loading model from {:s}').format(pretrained_model)) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: text_format.Merge(f.read(), self.solver_param)
def set_solver(self): ''' Define the solver required by model Input Parameters : None Output Parameters : s : solver object ''' s = caffe_pb2.SolverParameter() # Set a seed for reproducible experiments: # this controls for randomization in training. s.random_seed = 0xCAFFE # Specify locations of the train and (maybe) test networks. s.train_net = self.train_prototxt_filename s.test_net.append(self.val_prototxt_filename) s.test_interval = 500 # Test after every 500 training iterations. s.test_iter.append(100) # Test on 100 batches each time we test. s.max_iter = 10000 # no. of times to update the net (training iterations) # Set the initial learning rate for SGD. s.base_lr = 0.01 # EDIT HERE to try different learning rates # Set momentum to accelerate learning by # taking weighted average of current and previous updates. s.momentum = 0.9 # Set weight decay to regularize and prevent overfitting s.weight_decay = 5e-4 # Set `lr_policy` to define how the learning rate changes during training. # This is the same policy as our default LeNet. s.lr_policy = 'inv' s.gamma = 0.0001 s.power = 0.75 # EDIT HERE to try the fixed rate (and compare with adaptive solvers) # `fixed` is the simplest policy that keeps the learning rate constant. # s.lr_policy = 'fixed' # Display the current training loss and accuracy every 1000 iterations. s.display = 100 # Snapshots are files used to store networks we've trained. # We'll snapshot every 5K iterations -- twice during training. s.snapshot = 1000 s.snapshot_prefix = './caffemodel/lenet' # Train on the GPU s.solver_mode = caffe_pb2.SolverParameter.GPU # Write the solver to a temporary file and return its filename. with open(self.solver_prototxt_filename, 'w') as f: f.write(str(s)) return s
def create_solver(train_net_path, test_net_path=None, base_lr=0.001): import sys from caffe.proto import caffe_pb2 s = caffe_pb2.SolverParameter() # Specify locations of the train and (maybe) test networks. s.train_net = train_net_path s.test_net.append(test_net_path) s.test_interval = 100000 # Test after every 1000 training iterations. s.test_iter.append(10) # Test on 100 batches each time we test. # The number of iterations over which to average the gradient. # Effectively boosts the training batch size by the given factor, without # affecting memory utilization. s.iter_size = 1 s.max_iter = 10000000 # # of times to update the net (training iterations) # Solve using the stochastic gradient descent (SGD) algorithm. # Other choices include 'Adam' and 'RMSProp'. s.type = 'SGD' # Set the initial learning rate for SGD. s.base_lr = base_lr # Set `lr_policy` to define how the learning rate changes during training. # Here, we 'step' the learning rate by multiplying it by a factor `gamma` # every `stepsize` iterations. s.lr_policy = 'step' s.gamma = 0.1 s.stepsize = 50000 # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a # weighted average of the current gradient and previous gradients to make # learning more stable. L2 weight decay regularizes learning, to help prevent # the model from overfitting. s.momentum = 0.9 #SGD 0.9 s.weight_decay = 0.0005 #VGG 0.0001 AlexNet 0.004 # Display the current training loss and accuracy every 1000 iterations. s.display = 1000000 # Snapshots are files used to store networks we've trained. Here, we'll # snapshot every 10K iterations -- ten times during training. s.snapshot = 1000 #1000 s.snapshot_prefix = '../../../datasets/SocialMedia/models/CNNRegression/intagram_cities_CaffeNet_40' # Train on the GPU. Using the CPU to train large networks is very slow. s.solver_mode = caffe_pb2.SolverParameter.GPU with open('solver.prototxt', 'w') as f: f.write(str(s)) return f.name
def __init__(self, solver_prototxt, roidb, output_dir, image_index, image_cls, pretrained_model=None): """Initialize the SolverWrapper.""" self.image_cls = image_cls self.output_dir = output_dir if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS): # RPN can only use precomputed normalization because there are no # fixed statistics to compute a priori assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED if cfg.TRAIN.BBOX_REG: print 'computing bounding-box regression targets...' print 'in lib/fast_rcnn/train.py -- __init__ func...' if roidb is not None: self.bbox_means, self.bbox_stds = \ rdl_roidb.add_bbox_regression_targets(roidb) else: # bbox reg from cache files self.bbox_means = cfg.TRAIN.BBOX_REG_NORMALIZE_MEANS, self.bbox_stds = cfg.TRAIN.BBOX_REG_NORMALIZE_STDS assert (self.bbox_means is not None), 'invalid bbox_means in SolverWrapper' assert (self.bbox_stds is not None), 'invalid bbox_stds in SolverWrapper' print 'computing bounding-box regression targets done...' print 'in lib/fast_rcnn/train.py -- __init__ func of SolverWrapper class.' sleep(3) print "instance solver" self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: print('Loading pretrained model weights from {:s}' ).format(pretrained_model) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() with open(solver_prototxt, 'rt') as f: pb2.text_format.Merge(f.read(), self.solver_param) print print "set image index, image cls and roidb" print "in lib/fast_rcnn/train.py ..." print self.solver.net.layers[0].set_image_cls(image_cls) self.solver.net.layers[0].set_image_index(image_index) self.solver.net.layers[0].set_roidb(roidb) sleep(3)
def solver(caffenet, prefix): s = caffe_pb2.SolverParameter() # Set a seed for reproducible experiments: this controls for randomization in training. #s.random_seed = 0xCAFFE # Specify locations of the train and (maybe) test networks. s.net = caffenet # Test after every 1000 training iterations. s.test_interval = 1000 # Test on 1000 batches each time we test. s.test_iter.append(1000) # EDIT HERE to try different solvers: "SGD", "Adam", and "Nesterov" among others. #s.type = "SGD" # Set the initial learning rate for SGD. EDIT HERE to try different learning rates s.base_lr = 0.01 # Set momentum to accelerate learning by taking weighted average of current and previous updates. s.momentum = 0.9 # Set weight decay to regularize and prevent overfitting s.weight_decay = 5e-4 # Set `lr_policy` to define how the learning rate changes during training. s.lr_policy = 'step' # EDIT HERE to try the fixed rate (and compare with adaptive solvers) `fixed` is the simplest policy that keeps the learning rate constant. # s.lr_policy = 'fixed' # drop the learning rate by a factor of 10 (i.e., multiply it by a factor of gamma = 0.1) s.gamma = 0.1 # drop the learning rate every 100K iterations s.stepsize = 100000 # no. of times to update the net (training iterations) s.max_iter = 450000 # Display the current training loss and accuracy every 1000 iterations. s.display = 20 # Snapshots are files used to store networks we've trained. We'll snapshot every 10K iterations -- twice during training. s.snapshot = 10000 # File path prefix for snapshotting model weights and solver state. # Note: this is relative to the invocation of the `caffe` utility, not the solver definition file. s.snapshot_prefix = prefix # Train on the GPU s.solver_mode = caffe_pb2.SolverParameter.GPU return str(s)
def standard_solver(train_net, test_net, prefix, solver_type='SGD', weight_decay=0.001, base_lr=0.01, gamma=0.1, stepsize=100, test_iter=100, test_interval=1000, max_iter=1e5, iter_size=1, snapshot=1000, display=1, random_seed=0, debug_info=False, create_prototxt=True, save_path=None): solver = caffe_pb2.SolverParameter() solver.train_net = train_net solver.test_net.extend([test_net]) solver.test_iter.extend([test_iter]) solver.test_interval = test_interval solver.base_lr = base_lr solver.lr_policy = 'step' # "fixed" solver.gamma = gamma solver.stepsize = stepsize solver.display = display solver.max_iter = max_iter solver.iter_size = iter_size solver.snapshot = snapshot solver.snapshot_prefix = prefix solver.random_seed = random_seed solver.solver_mode = caffe_pb2.SolverParameter.GPU if solver_type is 'SGD': solver.solver_type = caffe_pb2.SolverParameter.SGD elif solver_type is 'ADAM': solver.solver_type = caffe_pb2.SolverParameter.ADAM solver.momentum = 0.9 solver.momentum2 = 0.999 solver.weight_decay = weight_decay solver.debug_info = debug_info if create_prototxt: solver = get_prototxt(solver, save_path) return solver