def GenerateSolverPrototxt(self):
    '''Generates the solver prototxt file for training in Caffe.'''

    solver = caffe_pb2.SolverParameter()

    # net files
    solver.train_net = self.caffeTrainModelFileName
    solver.test_net.append(self.caffeTestModelFileName)

    # solver
    solver.type = "SGD"
    solver.solver_mode = caffe_pb2.SolverParameter.GPU

    # intervals
    solver.test_iter.append(100)
    solver.test_interval = 100000
    solver.snapshot = 3000
    solver.snapshot_prefix = self.caffeWeightsFilePrefix[:-6]

    # learning rate
    solver.lr_policy = "step"
    solver.base_lr = 0.00025
    solver.gamma = 0.5
    solver.stepsize = 1000
    solver.display = 100
    solver.max_iter = 450000
    solver.momentum = 0.9
    solver.weight_decay = 0.0005

    # write to file
    with open(self.caffeSolverFileName, 'w') as solverFile:
      solverFile.write(str(solver))
Ejemplo n.º 2
0
def make_solver(niter=20000, lr = 0.1):
    s = caffe_pb2.SolverParameter()
    s.random_seed = 0xCAFFE

    s.train_net = 'examples/python_stoch_dep/residual_train.prototxt'
    s.test_net.append('examples/python_stoch_dep/residual_test.prototxt')
    s.test_interval = 10000
    s.test_iter.append(100)

    s.max_iter = niter
    s.type = 'Nesterov'

    s.base_lr = lr
    s.momentum = 0.9
    s.weight_decay = 1e-4

    s.lr_policy='multistep'
    s.gamma = 0.1
    s.stepvalue.append(int(0.5 * s.max_iter))
    s.stepvalue.append(int(0.75 * s.max_iter))
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    solver_path = 'examples/resnet_cifar/solver.prototxt'
    with open(solver_path, 'w') as f:
        f.write(str(s))
Ejemplo n.º 3
0
def write_solver_prototxt(template,
                          train_prototxt,
                          maxiter,
                          path='../prototxt/solver/redeye/'):
    model_name, proto_name = [], []
    for prototxt in train_prototxt:
        solver = caffe_pb2.SolverParameter()
        proto.Merge((open(template).read()), solver)
        solver.snapshot_prefix = os.path.join(
            '../models/googlenet/snapshots/',
            os.path.basename(prototxt).split('.')[0])
        solver.max_iter = maxiter
        solver.snapshot = maxiter
        model_name.append(os.path.join('../models/googlenet/snapshots/',
         os.path.basename(prototxt).split('.')[0] + \
         "_iter_" + str(maxiter) + ".caffemodel"))
        solver.net = prototxt
        solver.test_interval = 50000
        new_solver = proto.MessageToString(solver)
        file_name = os.path.join(path, 'goog_solver_' + \
         os.path.basename(prototxt).split('.')[0] + '.prototxt')
        proto_name.append(file_name)
        with open(file_name, 'w') as new_proto:
            new_proto.write(new_solver)
    return proto_name, model_name
Ejemplo n.º 4
0
    def __init__(self, net_name, cfg, net_param):
        self._cfg = cfg

        self._solver = caffe_pb2.SolverParameter()
        self._solver.net_param.MergeFrom(net_param)

        self._solver.iter_size = cfg.IMS_PER_BATCH

        self._solver.base_lr = cfg.BASE_LR
        self._solver.lr_policy = cfg.LR_POLICY.TYPE

        if cfg.LR_POLICY.TYPE == "step":
            self._solver.gamma = cfg.LR_POLICY.GAMMA
        elif cfg.LR_POLICY.TYPE == "multistep":
            self._solver.stepvalue.extend(cfg.LR_POLICY.STEPS)
            self._solver.gamma = cfg.LR_POLICY.GAMMA
        elif cfg.LR_POLICY == 'fixed':
            pass
        else:
            raise NotImplementedError(cfg.LR_POLICY.TYPE)

        self._solver.momentum = cfg.MOMENTUM
        self._solver.weight_decay = cfg.WEIGHT_DECAY
        self._solver.display = cfg.DISPLAY.PERIOD
        self._solver.average_loss = cfg.DISPLAY.AVERAGE_LOSS
        self._solver.snapshot = 0
        self._solver.snapshot_prefix = net_name

        self._solver_file = mkstemp()

        print('Created solver path:', self._solver_file[1])
        with open(self._solver_file[1], 'w') as f:
            f.write(str(self._solver))
Ejemplo n.º 5
0
    def create_caffe_solver(self, snapshot_prefix):
        s = caffe_pb2.SolverParameter()

        s.train_net = self.train_proto
        if self.test_proto is not None:
            s.test_net.append(self.test_proto)
            # Test every 'test_interval' iterations.
            s.test_interval = self.pars['test_interval']
            # Batch size to test. IMPORTANT! test_iter * test_proto.batch_size should equal your test data.
            s.test_iter.append(self.pars['test_iter'])

        # Learning rate.
        s.base_lr = self.pars['base_learning_rate']
        s.lr_policy = 'step'
        s.gamma = 0.1  # CJB: whats this? Should we create a hyper parameter for it?
        s.stepsize = self.pars['step_size']
        s.max_iter = self.pars['max_iter']
        s.display = self.pars['train_interval']
        s.momentum = self.pars['learning_momentum']
        s.weight_decay = self.pars['weight_decay']
        s.snapshot = self.pars['snapshot']
        s.snapshot_prefix = snapshot_prefix
        s.solver_mode = caffe_pb2.SolverParameter.GPU
        s.random_seed = 333

        return s
Ejemplo n.º 6
0
def make_solver():
    s = caffe_pb2.SolverParameter()
    s.random_seed = 0xCAFFE
    s.type = 'SGD'
    s.display = 5
    s.base_lr = 1e-1
    s.lr_policy = "step"
    s.gamma = 0.5
    s.momentum = 0.9
    s.stepsize = 10000
    s.max_iter = maxIter
    s.snapshot = 5000
    snapshot_prefix = join(dirname(__file__), 'model')
    if not isdir(snapshot_prefix):
        os.makedirs(snapshot_prefix)
    s.snapshot_prefix = join(snapshot_prefix,
                             args.data + '-Ratio' + str(args.ratio))
    s.train_net = join(
        tmp_dir, args.data + '-train-ratio' + str(args.ratio) + '.prototxt')
    s.test_net.append(
        join(tmp_dir,
             args.data + '-test-ratio' + str(args.ratio) + '.prototxt'))
    s.test_interval = maxIter + 1  # will test mannualy
    s.test_iter.append(test_iter)
    s.test_initialization = False
    # s.debug_info = True
    return s
Ejemplo n.º 7
0
def get_solver_pt(train_data_file):
    solver_config = cfg.SOLVER
    max_epoch = solver_config.max_epoch
    with open(train_data_file, 'r') as f:
        train_data_num = len(f.readlines())
    batch_size = cfg.TRAIN.IMS_PER_BATCH
    max_iter = int(max_epoch * train_data_num / batch_size)
    stepsize = int(solver_config.step_epoch * train_data_num / batch_size)

    solver_param = {
        # Train parameters
        'train_net': "train.prototxt",
        'base_lr': solver_config.base_lr,
        'weight_decay': 0.0005,
        'lr_policy': "step",
        'stepsize': stepsize,
        'gamma': 0.1,
        'momentum': 0.9,
        'iter_size': 1,
        'max_iter': max_iter,
        'snapshot': solver_config.snapshot,
        'display': 20,
        'average_loss': 10,
        'type': "SGD",
        'snapshot_prefix': "models_solverstates/",
        'snapshot_after_train': True,
    }
    solver = caffe_pb2.SolverParameter(**solver_param)

    with open("solver.prototxt", 'w') as f:
        f.write(str(solver))
Ejemplo n.º 8
0
    def __init__(self,
                 solver_prototxt,
                 roidb,
                 output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.output_dir = output_dir

        # print "ROIDB: {}".format(str(roidb))

        if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG
                and cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
            # RPN can only use precomputed normalization because there are no
            # fixed statistics to compute a priori
            assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED

        if cfg.TRAIN.BBOX_REG:
            print 'Computing bounding-box regression targets...'
            self.bbox_means, self.bbox_stds = \
                    rdl_roidb.add_bbox_regression_targets(roidb)
            print 'done'

        self.solver = caffe.SGDSolver(solver_prototxt)
        if pretrained_model is not None:
            print('Loading pretrained model '
                  'weights from {:s}').format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            pb2.text_format.Merge(f.read(), self.solver_param)

        self.solver.net.layers[0].set_roidb(roidb)
Ejemplo n.º 9
0
Archivo: train.py Proyecto: nnop/mtnet
    def __init__(self,
                 solver_prototxt,
                 roidb,
                 output_dir,
                 pretrained_model=None,
                 snapshot=None):
        """Initialize the SolverWrapper."""
        self.output_dir = output_dir

        assert (cfg.TRAIN.HAS_RPN \
            and cfg.TRAIN.BBOX_REG \
            and cfg.TRAIN.BBOX_NORMALIZE_TARGETS \
            and cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED)

        self.solver = caffe.SGDSolver(solver_prototxt)

        if snapshot is not None:
            # restore from snapshot
            print('Restoring from {:s}').format(snapshot)
            self.solver.restore(snapshot)
        elif pretrained_model is not None:
            # copy pretrained weights
            print('Loading pretrained model '
                  'weights from {:s}').format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            text_format.Merge(f.read(), self.solver_param)

        self.solver.net.layers[0].set_roidb(roidb)
def Solver(model_name,
           trainModel,
           testModel,
           total_samples,
           train_batch_size,
           epochs,
           outputDir=None):
    s = caffe_pb2.SolverParameter()
    s.solver_mode: GPU
    s.random_seed = 0xCAFFE

    # Specify locations of the train and (maybe) test networks.
    s.train_net = trainModel
    #s.max_iter = 50     # no. of times to update the net (training iterations)

    s.test_net.append(testModel)
    s.test_interval = 500  # Test after every 500 training iterations.
    s.test_iter.append(300)  # Test on 10 batches each time we test.

    # EDIT HERE to try different solvers
    # solver types include "SGD", "Adam", and "Nesterov" among others.
    s.type = "Adam"

    # Set the initial learning rate for SGD.
    s.base_lr = 0.00001  # EDIT HERE to try different learning rates      #current best shot: 0.001
    # Set momentum to accelerate learning by
    # taking weighted average of current and previous updates.
    s.momentum = 0.90
    # Set weight decay to regularize and prevent overfitting
    s.weight_decay = 5e-4

    # Set `lr_policy` to define how the learning rate changes during training.
    # This is the same policy as our default LeNet.
    s.lr_policy = 'inv'
    s.gamma = 0.0001  #current best shot: 0.00001
    s.power = 0.75
    # EDIT HERE to try the fixed rate (and compare with adaptive solvers)
    # `fixed` is the simplest policy that keeps the learning rate constant.
    #s.lr_policy = 'fixed'

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000

    # Snapshots are files used to store networks we've trained.
    # We'll snapshot every 5K iterations -- twice during training.
    s.snapshot = epochs * (total_samples // train_batch_size) - 1
    s.snapshot_prefix = model_name

    # Train on the GPU
    #s.solver_mode = caffe_pb2.SolverParameter.CPU

    # Write the solver to a temporary file and return its filename.
    output_dir = os.getcwd()
    if outputDir:
        output_dir = outputDir
    solver_path = os.path.join(output_dir, 'solver.prototxt')
    with open(solver_path, 'w') as f:
        f.write(str(s))

    return solver_path
Ejemplo n.º 11
0
def make_solver(batch_size, epoch_mult, train_sam, test_sam):
    epoch = int(train_sam / batch_size) + 1
    max_iter = epoch * epoch_mult
    test_iter = int(test_sam / batch_size) + 1
    test_interval = epoch

    s = caffe_pb2.SolverParameter()
    s.random_seed = 0xCAFFE

    s.train_net = train_dir
    s.test_net.append(test_dir)
    s.test_interval = test_interval
    s.test_iter.append(test_iter)

    s.max_iter = max_iter
    s.type = 'Nesterov'
    s.display = int(epoch / 5)

    s.base_lr = 0.1
    s.momentum = 0.9
    s.weight_decay = 0.0005

    s.lr_policy = 'multistep'
    s.gamma = 0.1
    s.stepvalue.append(int(0.5 * s.max_iter))
    s.stepvalue.append(int(0.75 * s.max_iter))
    s.stepvalue.append(int(0.9 * s.max_iter))
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    s.snapshot = 5000
    s.snapshot_prefix = './snap/cifar100_dense40_v2'
    print(s)
    with open(solver_dir, 'w') as f:
        f.write(str(s))
Ejemplo n.º 12
0
def make_solver(batch_size, train_sam, test_sam, epoch_mult):
    epoch = int(train_sam / batch_size) + 1
    max_iter = epoch * epoch_mult
    test_iter = int(test_sam / batch_size) + 1
    test_interval = epoch

    s = caffe_pb2.SolverParameter()

    s.train_net = train_dir
    s.test_net.append(test_dir)
    s.test_interval = test_interval
    s.test_iter.append(test_iter)

    s.max_iter = max_iter
    s.type = 'Nesterov'
    s.display = int(epoch / 5)
    # oscillation if lr is excessive, overfitting if lr is too small
    s.base_lr = 0.1
    s.momentum = 0.9
    s.weight_decay = 0.0001

    s.lr_policy = 'multistep'
    s.gamma = 0.1
    s.stepvalue.append(int(0.5 * s.max_iter))
    s.stepvalue.append(int(0.75 * s.max_iter))
    s.stepvalue.append(int(0.9 * s.max_iter))
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    s.snapshot = 5000
    s.snapshot_prefix = snapshot_prefix
    print(s)
    with open(solver_dir, 'w') as f:
        f.write(str(s))
Ejemplo n.º 13
0
    def solver(self, params):
        # set parameters of the solver
        s = caffe_pb2.SolverParameter()

        # Specify locations of the network
        s.net = params['path2train_net']

        # The number of iterations over which to average the gradient.
        # s.iter_size = 1

        s.max_iter = 1

        # use SGD algorithm
        s.type = 'SGD'

        # Set learning rate policy
        s.lr_policy = 'step'
        s.gamma = 0.5
        s.stepsize = 50
        s.base_lr = 0.0001

        # Set SGD hyperparameters
        s.momentum = 0.9
        s.weight_decay = 5e-4

        # Train on the CPU or GPU
        if params['useGPU']:
            s.solver_mode = caffe_pb2.SolverParameter.GPU
            s.device_id = params['DEVICE_ID']
        else:
            s.solver_mode = caffe_pb2.SolverParameter.CPU

        f = open(params['path2solver'], 'w')
        f.write(str(s))
        f.close()
def make_solver(train_net_path, solver_path, snapshot_path, opt, dataset_size):
    s = caffe_pb2.SolverParameter()

    # specify locations of the train and test networks.
    s.train_net = train_net_path

    s.max_iter = int(opt.num_epoch * dataset_size / opt.train_batch_size)

    # specify parameters for learning policy
    s.base_lr = opt.base_lr
    s.lr_policy = opt.lr_policy
    if s.lr_policy == 'step':
        s.gamma = opt.gamma
        s.stepsize = opt.stepsize

    s.type = "Adam"
    s.momentum = 0.9
    s.weight_decay = 5e-4
    s.iter_size = 1  # no gradient accumulation

    # specify other helper parameters
    s.display = 20
    s.snapshot = 2500
    s.snapshot_prefix = snapshot_path
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    print "Writing prototxt file for solver..."
    with open(solver_path, 'w') as f:
        f.write(str(s))
Ejemplo n.º 15
0
def make_solver():
    s = caffe_pb2.SolverParameter()
    s.random_seed = 0xCAFFE

    s.train_net = 'train_densenet.prototxt'
    s.test_net.append('test_densenet.prototxt')
    s.test_interval = 800
    s.test_iter.append(200)

    s.max_iter = 230000
    s.type = 'Nesterov'
    s.display = 1

    s.base_lr = 0.1
    s.momentum = 0.9
    s.weight_decay = 1e-4

    s.lr_policy = 'multistep'
    s.gamma = 0.1
    s.stepvalue.append(int(0.5 * s.max_iter))
    s.stepvalue.append(int(0.75 * s.max_iter))
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    solver_path = 'solver.prototxt'
    with open(solver_path, 'w') as f:
        f.write(str(s))
def create_solver_proto(train_net,
                        test_net,
                        lr,
                        prefix,
                        test_iter=300,
                        test_interval=10000,
                        max_iter=2e6,
                        snapshot=100000,
                        gpu=0,
                        debug_info=False):
    solver = PB.SolverParameter()
    solver.train_net = train_net
    solver.test_net.extend([test_net])
    solver.test_iter.extend([test_iter])
    solver.test_interval = test_interval
    solver.display = 1000
    solver.max_iter = max_iter
    solver.snapshot = snapshot
    solver.snapshot_prefix = prefix
    solver.snapshot_format = PB.SolverParameter.HDF5
    solver.solver_mode = PB.SolverParameter.GPU
    solver.solver_type = PB.SolverParameter.ADAM
    solver.base_lr = lr
    solver.lr_policy = "fixed"
    solver.average_loss = 10000
    solver.momentum = 0.9
    solver.momentum2 = 0.999
    solver.delta = 1e-08
    solver.debug_info = debug_info
    return solver
Ejemplo n.º 17
0
def create_solver_step(net_name):
    s = caffe_pb2.SolverParameter()
    s.net = "{0}_train_test.prototxt".format(net_name)

    s.test_interval = 500
    s.test_iter.append(100)

    s.base_lr = 0.01
    s.momentum = 0.9
    s.weight_decay = 0.0005

    s.lr_policy = "step"
    s.gamma = 0.96
    s.stepsize = 5000

    s.display = 100

    s.max_iter = 20000

    s.snapshot = 1000

    s.snapshot_prefix = "{0}{1}".format(snapshot_dir, net_name)

    s.type = "SGD"

    s.solver_mode = caffe_pb2.SolverParameter.GPU

    filename = "{0}_solver.prototxt".format(net_name)
    with open(filename, "w") as f:
        f.write(str(s))
Ejemplo n.º 18
0
def make_solver(snapshot_dir,
                train_net_path,
                test_net_path,
                base_lr=0.0001,
                boost=1):
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path
    if test_net_path != '':
        s.test_initialization = False
        s.test_net.append(test_net_path)
        # Don't use caffe testing, we write our own tests in training script
        s.test_interval = 99999999
        s.test_iter.append(100000000)

    # The number of iterations over which to average the gradient.
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization.
    s.iter_size = boost

    s.solver_type = caffe_pb2.SolverParameter.SGD
    s.base_lr = base_lr
    s.momentum = 0.9
    s.weight_decay = 5e-4
    s.display = 100  # display training loss every 100 iters
    s.solver_mode = caffe_pb2.SolverParameter.GPU
    s.debug_info = False
    # Write the solver to a temporary file and return its filename.
    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(str(s))
        return f.name
Ejemplo n.º 19
0
    def __init__(self,
                 solver_prototxt,
                 roidb,
                 output_dir,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.output_dir = output_dir

        print 'Computing bounding-box regression targets...'
        if cfg.TRAIN.BBOX_REG:
            if cfg.IS_RPN:
                self.bbox_means, self.bbox_stds = gdl_roidb.add_bbox_regression_targets(
                    roidb)
            else:
                self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(
                    roidb)
        print 'done'

        self.solver = caffe.SGDSolver(solver_prototxt)
        if pretrained_model is not None:
            print('Loading pretrained model '
                  'weights from {:s}').format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            pb2.text_format.Merge(f.read(), self.solver_param)

        self.solver.net.layers[0].set_roidb(roidb)
Ejemplo n.º 20
0
    def __init__(self, config):
        # "Use Caffe as self."
        # caffe constructor: network_file, phase, level, stages, weight, engine
        topology_path = os.path.expanduser(str(config.model.topology))

        if (hasattr(config.backend,
                    'engine')) and (config.backend.engine != "default"):
            engine = str(config.backend.engine)
        else:
            engine = 'CAFFE'

        if hasattr(config.model, 'weight'):
            logger.debug("loading weights from: {}".format(
                config.model.weight))
            weight_path = os.path.expanduser(str(config.model.weight))
        else:
            weight_path = None

        if config.model.type == "test":
            phase = caffe.TEST
        else:
            phase = caffe.TRAIN

        caffe.set_mode_cpu()
        caffe.set_random_seed(0)

        if hasattr(config,
                   'batch_size') and config.model.prototxt_type == 'train_val':
            topology_path = self.reshape_in_train_val( topology_path, config.batch_size, \
                config.out_dir,)

        if config.model.prototxt_type == 'solver':
            logger.debug("using engine: {}".format(engine))
            modified_solver_path = os.path.join(str(config.out_dir),
                                                'modified_solver.prototxt')
            if not os.path.exists(os.path.dirname(modified_solver_path)):
                os.makedirs(os.path.dirname(modified_solver_path))
            solver_params = caffe_pb2.SolverParameter()
            with open(config.model.topology) as f:
                s = f.read()
                txtf.Merge(s, solver_params)
            solver_params.engine = engine
            if hasattr(config, 'batch_size'):
                solver_params.net = self.reshape_in_train_val( str(solver_params.net), \
                    config.batch_size, config.out_dir)
            with open(modified_solver_path, 'w') as fp:
                fp.write(str(solver_params))
            self.solver = caffe.get_solver(modified_solver_path)
            self.net = self.solver.net
            if weight_path != None:
                self.net.copy_from(weight_path)
        else:
            try:
                logger.debug("using engine: {}".format(engine))
                self.net = caffe.Net(topology_path,
                                     phase,
                                     weights=weight_path,
                                     engine=engine)
            except:
                self.net = caffe.Net(topology_path, phase, weights=weight_path)
Ejemplo n.º 21
0
def solver_deploy(train_net_path, test_net_path=None, solver_path=None, base_lr=0.001):
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path

    if test_net_path is not None:
        s.test_net.append(test_net_path)
        s.test_interval = 10000 # Test after every 1000 training iterations
        s.test_iter.append(16) # Test on 100 batches each time we test

    # The number of iterations over which to average the gradient
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization
    s.iter_size = 8

    s.max_iter = 10000 # of times to update the net (training iterations)

    # Solve using the stochastic gradient decent (SGD) algorithm.
    # Other choices include 'Adam' adn 'RMSProp'
    s.type = 'SGD'

    # Set the initial learning rate for SGD
    s.base_lr = base_lr

    # Set 'lr_policy' to define how the learning rate changes during traing.
    # Here, we 'step' the learning rate by mulitplying it by a factor 'gamma'
    # every 'stepsize' iteration
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 2000

    # Set other SGD hyperparameters. Setting a nonzero 'momentum' takes a 
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss and accuracy every 1000 iterations
    s.display = 1000

    # Snapshots are files used to store networks we've trained. Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 1000
    s.snapshot_prefix = '/home/xingyunyang/Documents/workspace/Multi-Attention-CNN/experiment/train_bird_fix_cls'

    #  Train on the GPU. 
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    # Write the solver to a temporary file and return its filename
    if solver_path is None:
        with tempfile.NamedTemporaryFile(delete=False) as f:
            f.write(str(s))
            return f.name
    else:
        with open(solver_path, 'w') as f:
            f.write(str(s))
            return solver_path
Ejemplo n.º 22
0
def solver(train_net_path, test_net_path=None, base_lr=0.001):
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path
    if test_net_path is not None:
        s.test_net.append(test_net_path)
        s.test_interval = 1000  # Test after every 1000 training iterations.
        s.test_iter.append(100)  # Test on 100 batches each time we test.

    # The number of iterations over which to average the gradient.
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization.
    s.iter_size = 1

    s.max_iter = 100000  # # of times to update the net (training iterations)

    # Solve using the stochastic gradient descent (SGD) algorithm.
    # Other choices include 'Adam' and 'RMSProp'.
    s.type = 'SGD'

    # Set the initial learning rate for SGD.
    s.base_lr = base_lr

    # Set `lr_policy` to define how the learning rate changes during training.
    # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
    # every `stepsize` iterations.
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 20000

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 10000
    s.snapshot_prefix = caffe_root + 'models/finetune_flickr_style/finetune_flickr_style'

    # Train on the GPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    # Write the solver to a temporary file and return its filename.
    f = tempfile.NamedTemporaryFile(delete=False)
    filename = f.name
    f.close()

    print('filename: ' + filename)
    with open(filename, 'w') as f:
        #with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(str(s))
        return f.name
Ejemplo n.º 23
0
	def __init__(self, solver_prototxt, output_dir, imdb, pretrained_model=None):
		self.output_dir = output_dir
		self.solver = caffe.SGDSolver(solver_prototxt)
		self.solver_param = caffe_pb2.SolverParameter()
		self.imdb = imdb
		with open(solver_prototxt, 'rt') as f:
			pb2.text_format.Merge(f.read(), self.solver_param)  # TODO:是否有必要?
		self.solver.net.layers[0].prepare_imdb(imdb)   # TODO: 有待实现。设定网络第一层的输入blob
Ejemplo n.º 24
0
    def finetune(self, train_set_path, validation_set_path):
        # Setting the right paths for training (finetuning)
        self.__set_data_paths(train_set_path, validation_set_path)

        # Get useful values from solver file
        solver_config = caffe_pb2.SolverParameter()
        with open(self.SOLVER) as f:
            text_format.Merge(str(f.read()), solver_config)

        max_iter = solver_config.max_iter
        test_iter = solver_config.test_iter  # 128 images on each batch/iteration
        test_interval = solver_config.test_interval

        # Create the actual solver
        solver = caffe.SGDSolver(self.SOLVER)
        solver.net.copy_from(self.PRETRAINED)

        train_loss = np.zeros(max_iter)
        test_loss = np.zeros(max_iter / test_interval)
        accuracies = np.zeros(max_iter / test_interval)
        test_i = 0

        try:
            for it in xrange(max_iter):
                solver.step(1)
                train_loss[it] = solver.net.blobs['loss'].data
                if it % 50 == 0:
                    print 'Iteration %d, Finetune loss=%f' % (it,
                                                              train_loss[it])

                if it % test_interval == 0:  # test net
                    test_loss_it = 0
                    test_accuracy = 0
                    for j in xrange(test_iter):
                        solver.test_nets[0].forward()

                        test_loss_it += solver.test_nets[0].blobs['loss'].data
                        test_accuracy += solver.test_nets[0].blobs[
                            'accuracy'].data

                    test_loss[test_i] = test_loss_it / test_iter
                    accuracies[test_i] = test_accuracy / test_iter
                    print 'Iteration %d, Test loss=%f, Accuracy=%f' % (
                        it, test_loss[test_i], accuracies[test_i])
                    test_i += 1

                    # save training stats
                    with open('train_state.npz', 'wb') as f:
                        np.savez(f,
                                 train_loss=train_loss,
                                 test_loss=test_loss,
                                 accuracy=accuracies)
        finally:
            with open('train_state.npz', 'wb') as f:
                np.savez(f,
                         train_loss=train_loss,
                         test_loss=test_loss,
                         accuracy=accuracies)
Ejemplo n.º 25
0
 def __init__(self, solver_prototxt, output_dir, pretrained_model=None):
     self.output_dir = output_dir
     self.solver = caffe.SGDSolver(solver_prototxt)
     if pretrained_model is not None:
         print(('loading model from {:s}').format(pretrained_model))
         self.solver.net.copy_from(pretrained_model)
     self.solver_param = caffe_pb2.SolverParameter()
     with open(solver_prototxt, 'rt') as f:
         text_format.Merge(f.read(), self.solver_param)
Ejemplo n.º 26
0
    def set_solver(self):
        '''
        Define the solver required by model

        Input Parameters : None
        Output Parameters :
        s : solver object
        '''

        s = caffe_pb2.SolverParameter()

        # Set a seed for reproducible experiments:
        # this controls for randomization in training.
        s.random_seed = 0xCAFFE

        # Specify locations of the train and (maybe) test networks.
        s.train_net = self.train_prototxt_filename
        s.test_net.append(self.val_prototxt_filename)
        s.test_interval = 500  # Test after every 500 training iterations.
        s.test_iter.append(100)  # Test on 100 batches each time we test.

        s.max_iter = 10000  # no. of times to update the net (training iterations)

        # Set the initial learning rate for SGD.
        s.base_lr = 0.01  # EDIT HERE to try different learning rates
        # Set momentum to accelerate learning by
        # taking weighted average of current and previous updates.
        s.momentum = 0.9
        # Set weight decay to regularize and prevent overfitting
        s.weight_decay = 5e-4

        # Set `lr_policy` to define how the learning rate changes during training.
        # This is the same policy as our default LeNet.
        s.lr_policy = 'inv'
        s.gamma = 0.0001
        s.power = 0.75
        # EDIT HERE to try the fixed rate (and compare with adaptive solvers)
        # `fixed` is the simplest policy that keeps the learning rate constant.
        # s.lr_policy = 'fixed'

        # Display the current training loss and accuracy every 1000 iterations.
        s.display = 100

        # Snapshots are files used to store networks we've trained.
        # We'll snapshot every 5K iterations -- twice during training.
        s.snapshot = 1000
        s.snapshot_prefix = './caffemodel/lenet'

        # Train on the GPU
        s.solver_mode = caffe_pb2.SolverParameter.GPU

        # Write the solver to a temporary file and return its filename.
        with open(self.solver_prototxt_filename, 'w') as f:
            f.write(str(s))

        return s
def create_solver(train_net_path, test_net_path=None, base_lr=0.001):

    import sys
    from caffe.proto import caffe_pb2

    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path
    s.test_net.append(test_net_path)

    s.test_interval = 100000  # Test after every 1000 training iterations.
    s.test_iter.append(10)  # Test on 100 batches each time we test.

    # The number of iterations over which to average the gradient.
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization.
    s.iter_size = 1

    s.max_iter = 10000000  # # of times to update the net (training iterations)

    # Solve using the stochastic gradient descent (SGD) algorithm.
    # Other choices include 'Adam' and 'RMSProp'.
    s.type = 'SGD'

    # Set the initial learning rate for SGD.
    s.base_lr = base_lr

    # Set `lr_policy` to define how the learning rate changes during training.
    # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
    # every `stepsize` iterations.
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 50000

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9  #SGD 0.9
    s.weight_decay = 0.0005  #VGG 0.0001 AlexNet 0.004

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000000

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 1000  #1000
    s.snapshot_prefix = '../../../datasets/SocialMedia/models/CNNRegression/intagram_cities_CaffeNet_40'

    # Train on the GPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    with open('solver.prototxt', 'w') as f:
        f.write(str(s))
        return f.name
Ejemplo n.º 28
0
    def __init__(self,
                 solver_prototxt,
                 roidb,
                 output_dir,
                 image_index,
                 image_cls,
                 pretrained_model=None):
        """Initialize the SolverWrapper."""
        self.image_cls = image_cls
        self.output_dir = output_dir

        if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG
                and cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
            # RPN can only use precomputed normalization because there are no
            # fixed statistics to compute a priori
            assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED

        if cfg.TRAIN.BBOX_REG:
            print 'computing bounding-box regression targets...'
            print 'in lib/fast_rcnn/train.py -- __init__ func...'
            if roidb is not None:
                self.bbox_means, self.bbox_stds = \
                        rdl_roidb.add_bbox_regression_targets(roidb)
            else:
                # bbox reg from cache files
                self.bbox_means = cfg.TRAIN.BBOX_REG_NORMALIZE_MEANS,
                self.bbox_stds = cfg.TRAIN.BBOX_REG_NORMALIZE_STDS
                assert (self.bbox_means
                        is not None), 'invalid bbox_means in SolverWrapper'
                assert (self.bbox_stds
                        is not None), 'invalid bbox_stds in SolverWrapper'

            print 'computing bounding-box regression targets done...'
            print 'in lib/fast_rcnn/train.py -- __init__ func of SolverWrapper class.'
            sleep(3)

        print "instance solver"
        self.solver = caffe.SGDSolver(solver_prototxt)
        if pretrained_model is not None:
            print('Loading pretrained model weights from {:s}'
                  ).format(pretrained_model)
            self.solver.net.copy_from(pretrained_model)

        self.solver_param = caffe_pb2.SolverParameter()
        with open(solver_prototxt, 'rt') as f:
            pb2.text_format.Merge(f.read(), self.solver_param)

        print
        print "set image index, image cls and roidb"
        print "in lib/fast_rcnn/train.py ..."
        print
        self.solver.net.layers[0].set_image_cls(image_cls)
        self.solver.net.layers[0].set_image_index(image_index)
        self.solver.net.layers[0].set_roidb(roidb)
        sleep(3)
Ejemplo n.º 29
0
def solver(caffenet, prefix):
    s = caffe_pb2.SolverParameter()

    # Set a seed for reproducible experiments: this controls for randomization in training.
    #s.random_seed = 0xCAFFE

    # Specify locations of the train and (maybe) test networks.
    s.net = caffenet

    # Test after every 1000 training iterations.
    s.test_interval = 1000

    # Test on 1000 batches each time we test.
    s.test_iter.append(1000)

    # EDIT HERE to try different solvers: "SGD", "Adam", and "Nesterov" among others.
    #s.type = "SGD"

    # Set the initial learning rate for SGD. EDIT HERE to try different learning rates
    s.base_lr = 0.01

    # Set momentum to accelerate learning by taking weighted average of current and previous updates.
    s.momentum = 0.9

    # Set weight decay to regularize and prevent overfitting
    s.weight_decay = 5e-4

    # Set `lr_policy` to define how the learning rate changes during training.
    s.lr_policy = 'step'
    # EDIT HERE to try the fixed rate (and compare with adaptive solvers) `fixed` is the simplest policy that keeps the learning rate constant.
    # s.lr_policy = 'fixed'

    # drop the learning rate by a factor of 10 (i.e., multiply it by a factor of gamma = 0.1)
    s.gamma = 0.1

    # drop the learning rate every 100K iterations
    s.stepsize = 100000

    # no. of times to update the net (training iterations)
    s.max_iter = 450000

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 20

    # Snapshots are files used to store networks we've trained. We'll snapshot every 10K iterations -- twice during training.
    s.snapshot = 10000

    # File path prefix for snapshotting model weights and solver state.
    # Note: this is relative to the invocation of the `caffe` utility, not the solver definition file.
    s.snapshot_prefix = prefix

    # Train on the GPU
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    return str(s)
def standard_solver(train_net,
                    test_net,
                    prefix,
                    solver_type='SGD',
                    weight_decay=0.001,
                    base_lr=0.01,
                    gamma=0.1,
                    stepsize=100,
                    test_iter=100,
                    test_interval=1000,
                    max_iter=1e5,
                    iter_size=1,
                    snapshot=1000,
                    display=1,
                    random_seed=0,
                    debug_info=False,
                    create_prototxt=True,
                    save_path=None):

    solver = caffe_pb2.SolverParameter()
    solver.train_net = train_net
    solver.test_net.extend([test_net])

    solver.test_iter.extend([test_iter])
    solver.test_interval = test_interval

    solver.base_lr = base_lr
    solver.lr_policy = 'step'  # "fixed"
    solver.gamma = gamma
    solver.stepsize = stepsize

    solver.display = display
    solver.max_iter = max_iter
    solver.iter_size = iter_size
    solver.snapshot = snapshot
    solver.snapshot_prefix = prefix
    solver.random_seed = random_seed

    solver.solver_mode = caffe_pb2.SolverParameter.GPU
    if solver_type is 'SGD':
        solver.solver_type = caffe_pb2.SolverParameter.SGD
    elif solver_type is 'ADAM':
        solver.solver_type = caffe_pb2.SolverParameter.ADAM
    solver.momentum = 0.9
    solver.momentum2 = 0.999

    solver.weight_decay = weight_decay

    solver.debug_info = debug_info

    if create_prototxt:
        solver = get_prototxt(solver, save_path)

    return solver