def setUp(self): self.num_output = 13 net_f = simple_net_file(self.num_output) f = tempfile.NamedTemporaryFile(mode="w+", delete=False) f.write( """net: '""" + net_f + """' test_iter: 10 test_interval: 10 base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 lr_policy: 'inv' gamma: 0.0001 power: 0.75 display: 100 max_iter: 100 snapshot_after_train: false snapshot_prefix: "models" """ ) f.close() self.solver = caffe.SGDSolver(f.name) # also make sure get_solver runs caffe.get_solver(f.name) caffe.set_mode_cpu() # fill in valid labels self.solver.net.blobs["label"].data[...] = np.random.randint( self.num_output, size=self.solver.net.blobs["label"].data.shape ) self.solver.test_nets[0].blobs["label"].data[...] = np.random.randint( self.num_output, size=self.solver.test_nets[0].blobs["label"].data.shape ) os.remove(f.name) os.remove(net_f)
def setUp(self): self.num_output = 13 net_f = simple_net_file(self.num_output) f = tempfile.NamedTemporaryFile(mode='w+', delete=False) net_f_mod = net_f if os.name == 'nt': net_f_mod = net_f_mod.replace("\\", "/") f.write("""net: '""" + net_f_mod + """' test_iter: 10 test_interval: 10 base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 lr_policy: 'inv' gamma: 0.0001 power: 0.75 display: 100 max_iter: 100 snapshot_after_train: false snapshot_prefix: "model" """) f.close() self.solver = caffe.SGDSolver(f.name) # also make sure get_solver runs caffe.get_solver(f.name) caffe.set_mode_cpu() # fill in valid labels self.solver.net.blobs['label'].data[...] = \ np.random.randint(self.num_output, size=self.solver.net.blobs['label'].data.shape) self.solver.test_nets[0].blobs['label'].data[...] = \ np.random.randint(self.num_output, size=self.solver.test_nets[0].blobs['label'].data.shape) os.remove(f.name) os.remove(net_f)
def init_solver(self): """ Helper method to initialize the solver. """ solver_param = SolverParameter() solver_param.snapshot_prefix = self._hyperparams['weights_file_prefix'] solver_param.display = 0 # Don't display anything. solver_param.base_lr = self._hyperparams['lr'] solver_param.lr_policy = self._hyperparams['lr_policy'] solver_param.momentum = self._hyperparams['momentum'] solver_param.weight_decay = self._hyperparams['weight_decay'] solver_param.type = self._hyperparams['solver_type'] solver_param.random_seed = self._hyperparams['random_seed'] # Pass in net parameter either by filename or protostring. if isinstance(self._hyperparams['network_model'], basestring): self.solver = caffe.get_solver(self._hyperparams['network_model']) else: network_arch_params = self._hyperparams['network_arch_params'] network_arch_params['dim_input'] = self._dO network_arch_params['dim_output'] = self._dU network_arch_params['batch_size'] = self.batch_size network_arch_params['phase'] = TRAIN solver_param.train_net_param.CopyFrom( self._hyperparams['network_model'](**network_arch_params) ) # For running forward in python. network_arch_params['batch_size'] = 1 network_arch_params['phase'] = TEST solver_param.test_net_param.add().CopyFrom( self._hyperparams['network_model'](**network_arch_params) ) # For running forward on the robot. network_arch_params['batch_size'] = 1 network_arch_params['phase'] = 'deploy' solver_param.test_net_param.add().CopyFrom( self._hyperparams['network_model'](**network_arch_params) ) # These are required by Caffe to be set, but not used. solver_param.test_iter.append(1) solver_param.test_iter.append(1) solver_param.test_interval = 1000000 f = tempfile.NamedTemporaryFile(mode='w+', delete=False) f.write(MessageToString(solver_param)) f.close() self.solver = caffe.get_solver(f.name)
def __get_solver(self, solver_proto_path): ''' Returns a caffe.SGDSolver for the given protofile path, ignoring Caffe command line chatter if debug mode is not set to True. ''' if not self.debug_mode: # disable Caffe init chatter when not in debug with Suppressor(): # return caffe.SGDSolver(solver_proto_path) return caffe.get_solver(solver_proto_path) else: # return caffe.SGDSolver(solver_proto_path) return caffe.get_solver(solver_proto_path)
def train_and_test(niter=200, learn_all_pre=False, learn_all_scra=False): global typeNet typeNet = 'pretrained_net' # Initialize chartnet with pretrained ImageNet weights. chart_solver = caffe.get_solver( solver(chart_net(train=True, learn_all=learn_all_pre))) chart_solver.net.copy_from(weights_pretrained) typeNet = 'scratch_net' # Create chartnet that is initialized randomly. scratch_chart_solver = caffe.get_solver( solver(chart_net(train=True, learn_all=learn_all_scra))) print 'Running solvers for %d iterations...' % niter solvers = [('pretrained', chart_solver), ('scratch', scratch_chart_solver)] loss, acc, weights = run_solvers(niter, solvers, disp_interval=50) print 'Done.' train_loss, scratch_train_loss = loss['pretrained'], loss['scratch'] train_acc, scratch_train_acc = acc['pretrained'], acc['scratch'] chart_weights, scratch_chart_weights = weights['pretrained'], weights[ 'scratch'] # Delete solvers to save memory. del chart_solver, scratch_chart_solver, solvers plt.figure() plt.plot(np.vstack([train_loss, scratch_train_loss]).T) plt.xlabel('Iteration #') plt.ylabel('Loss') plt.figure() plt.plot(np.vstack([train_acc, scratch_train_acc]).T) plt.xlabel('Iteration #') plt.ylabel('Accuracy') typeNet = 'pretrained_net' test_net, accuracy = eval_chart_net(chart_weights) print 'Accuracy, trained from ImageNet initialization: %3.1f%%' % ( 100 * accuracy, ) typeNet = 'scratch_net' scratch_test_net, scratch_accuracy = eval_chart_net(scratch_chart_weights) print 'Accuracy, trained from random initialization: %3.1f%%' % ( 100 * scratch_accuracy, ) classify_image(test_net, '00024.png') classify_image(scratch_test_net, '00024.png') return chart_weights, scratch_chart_weights
def train_network(solver_file, output_weights, batch_size, num_iterations, stop_loss_thres=0.01, use_gpu=True) : if use_gpu : caffe.set_mode_gpu() else : caffe.set_mode_cpu() solver = caffe.get_solver(solver_file) solver.net.blobs['data'].reshape(batch_size, 1, 64, 64) solver.net.blobs['target'].reshape(batch_size, 1, 64, 64) solver.net.reshape() for i in range(num_iterations): data, target = get_data(batch_size, numclasses=3) solver.net.blobs['data'].data[...] = data solver.net.blobs['target'].data[...] = target solver.step(1) output = solver.net.blobs['seg'].data[...] loss = solver.net.blobs['loss'].data if loss < stop_loss_thres: solver.net.save(output_weights) fig, sub = plt.subplots(ncols=3, figsize=(15, 5)) sub[0].set_title('Input') sub[0].imshow(data[0, 0, :, :]) sub[1].set_title('Ground Truth') sub[1].imshow(target[0, 0, :, :]) sub[2].set_title('Segmentation') sub[2].imshow(np.argmax(output[0, :, :, :], axis=0)) plt.show() break
def simple_train_example(): if 'posix' == os.name: caffe_home_dir_path = '/home/sangwook/lib_repo/cpp/caffe_github' else: caffe_home_dir_path = 'D:/lib_repo/cpp/rnd/caffe_github' #caffe_home_dir_path = 'D:/lib_repo/cpp/caffe_github' solver_filepath = caffe_home_dir_path + '/models/bvlc_reference_caffenet/solver.prototxt' solver = caffe.get_solver(solver_filepath) #solver = caffe.SGDSolver(solver_filepath) #solver = caffe.AdaDelta(solver_filepath) #solver = caffe.AdaGrad(solver_filepath) #solver = caffe.Adam(solver_filepath) #solver = caffe.Nesterov(solver_filepath) #solver = caffe.RMSprop(solver_filepath) # Now, it's time to begin to see if everything works well and to fill the layers in a forward propagation in the net (computation of net.blobs[k].data from input layer until the loss layer). # Trains net. solver.net.forward() # Tests net (there can be more than one). solver.test_nets[0].forward() # For the computation of the gradients (computation of the net.blobs[k].diff and net.params[k][j].diff from the loss layer until input layer). solver.net.backward() # To launch one step of the gradient descent, that is a forward propagation, a backward propagation and the update of the net params given the gradients (update of the net.params[k][j].data). solver.step(1) # To run the full gradient descent, that is the max_iter steps. solver.solve() # Computes accuracy of the model on the test data. """
def train_net(args, with_val_net=False): train_net_file = miniplaces_net(get_split('train'), args.crop, args.batch, args.image_root, train=True, cudnn=args.cudnn) # Set with_val_net=True to test during training. # Environment variable GLOG_minloglevel should be set to 0 to display # Caffe output in this case; otherwise, the test result will not be # displayed. if with_val_net: val_net_file = miniplaces_net(get_split('val'), args.crop, args.batch, args.image_root, train=False, cudnn=args.cudnn) else: val_net_file = None solver_file = miniplaces_solver(args, train_net_file, val_net_file) solver = caffe.get_solver(solver_file) solver.net.params['conv1'][0].data[...] = weights.make_weights() if args.weights_file: solver.net.copy_from(args.weights_file) outputs = sorted(solver.net.outputs) def str_output(output): value = solver.net.blobs[output].data if output.startswith('accuracy'): valstr = '%5.2f%%' % (100 * value, ) else: valstr = '%6f' % value return '%s = %s' % (output, valstr) def disp_outputs(iteration, iter_pad_len=len(str(args.iters))): metrics = '; '.join(str_output(o) for o in outputs) return 'Iteration %*d: %s' % (iter_pad_len, iteration, metrics) # We could just call `solver.solve()` rather than `step()`ing in a loop. # (If we hadn't set GLOG_minloglevel = 3 at the top of this file, Caffe # would display loss/accuracy information during training.) previous_time = None for iteration in xrange(args.iters): solver.step(1) if (args.disp > 0) and (iteration % args.disp == 0): current_time = time.clock() if previous_time is None: benchmark = '' else: time_per_iter = (current_time - previous_time) / args.disp benchmark = ' (%5f s/it)' % time_per_iter previous_time = current_time print disp_outputs(iteration), benchmark # Print accuracy for last iteration. solver.net.forward() disp_outputs(args.iters) solver.net.save( snapshot_at_iteration(args.iters, args.snapshot_dir, args.snapshot_prefix))
def run_solver(niter = 18000, disp_interval = 10): solver = caffe.get_solver(vgg_new_model + 'VGG_dfi_solver.prototxt') print 'Solver loaded' if os.path.isfile(vgg_face_model + 'VGG_FACE.caffemodel'): weights = vgg_face_model + 'VGG_FACE.caffemodel' print 'Caffe VGG weights found' solver.net.copy_from(weights) print 'weights copied' loss = np.zeros(niter) acc = np.zeros(niter) for it in range(niter): solver.step(1) #Run a single SGD step # Simulate a batch size of loss[it] = solver.net.blobs['loss'].data.copy() acc[it] = solver.net.blobs['acc'].data.copy() if it % disp_interval == 0 or it + 1 == niter: loss_disp = 'Loss:', loss[it], ' Acc:', np.round(100*acc[it]) print '%3d) %s' % (it, loss_disp) # Save the learned weights. weights = os.path.join(vgg_new_model, 'weights.VGG_dfi.caffemodel') solver.net.save(weights) return
def train_equivariance_model(model_type, loss, input_train_features, input_test_features, target_train_features, target_test_features, train_labels, test_labels, target_train_output_probs, target_test_output_probs, classifier_weights, classifier_bias, args): num_train_instances = input_train_features.shape[0] num_test_instances = input_test_features.shape[0] num_activations = input_train_features.shape[1] num_classes = target_train_output_probs.shape[1] mlp = model_type == 'mlp' write_hdf5s(args, input_train_features, target_train_features, target_train_output_probs, train_labels, input_test_features, target_test_features, target_test_output_probs, test_labels) # Write the prototxt for the train and test nets, as well as the solver net_param = equivariance_proto(args, num_activations, num_classes, loss, mlp) with open(args.train_file, 'w') as f: f.write(re.sub("VAL_", "", str(net_param))) solver_param = create_solver(args, num_train_instances, num_test_instances) with open(args.solver_file, 'w') as f: f.write(str(solver_param)) # load the solver and the network files it references solver = caffe.get_solver(args.solver_file) # fix the classificaiton weights/biases to be the passed in weights/biases classify_layer_params = solver.net.params['classify'] classify_layer_params[0].data[:] = classifier_weights[:] # data copy, not object reassignment classify_layer_params[1].data[:] = classifier_bias[:] solver.solve() return solver.net, solver.test_nets[0]
def solve_sphconv(layer, network='faster-rcnn', log=False, random_init=False, state=None): log_dir = os.path.join(LOG_ROOT, "SphConv") if not os.path.isdir(log_dir): os.makedirs(log_dir) log_path = os.path.join(log_dir, "{0}{1}.log".format(network, layer)) if os.path.isfile(log_path): sys.stderr.write("Log already exists, please delete log file first.\n") return if log: stderr = os.dup(2) log = os.open(log_path, os.O_WRONLY | os.O_CREAT) os.dup2(log, 2) solver_proto = os.path.join( SphConv_ROOT, "prototxt", "SphConv", "{0}{1}.solver.prototxt".format(network, layer)) solver = caffe.get_solver(solver_proto) if state is not None: solver.restore(state) elif not random_init: init_sphconv(solver.net, layer, pretrained=False) solver.solve()
def __init__(self, config): # "Use Caffe as self." # caffe constructor: network_file, phase, level, stages, weight, engine topology_path = os.path.expanduser(str(config.model.topology)) if (hasattr(config.backend, 'engine')) and (config.backend.engine != "default"): engine = str(config.backend.engine) else: engine = 'CAFFE' if hasattr(config.model, 'weight'): logger.debug("loading weights from: {}".format( config.model.weight)) weight_path = os.path.expanduser(str(config.model.weight)) else: weight_path = None if config.model.type == "test": phase = caffe.TEST else: phase = caffe.TRAIN caffe.set_mode_cpu() caffe.set_random_seed(0) if hasattr(config, 'batch_size') and config.model.prototxt_type == 'train_val': topology_path = self.reshape_in_train_val( topology_path, config.batch_size, \ config.out_dir,) if config.model.prototxt_type == 'solver': logger.debug("using engine: {}".format(engine)) modified_solver_path = os.path.join(str(config.out_dir), 'modified_solver.prototxt') if not os.path.exists(os.path.dirname(modified_solver_path)): os.makedirs(os.path.dirname(modified_solver_path)) solver_params = caffe_pb2.SolverParameter() with open(config.model.topology) as f: s = f.read() txtf.Merge(s, solver_params) solver_params.engine = engine if hasattr(config, 'batch_size'): solver_params.net = self.reshape_in_train_val( str(solver_params.net), \ config.batch_size, config.out_dir) with open(modified_solver_path, 'w') as fp: fp.write(str(solver_params)) self.solver = caffe.get_solver(modified_solver_path) self.net = self.solver.net if weight_path != None: self.net.copy_from(weight_path) else: try: logger.debug("using engine: {}".format(engine)) self.net = caffe.Net(topology_path, phase, weights=weight_path, engine=engine) except: self.net = caffe.Net(topology_path, phase, weights=weight_path)
def train(solver_proto, datasets, initialization, gpu_id): """Train a network""" caffe.set_device(gpu_id) caffe.set_mode_gpu() solver = caffe.get_solver(solver_proto) if initialization is not None: assert osp.exists(initialization), 'Path to weights/solverstate does not exist: {}'.format(initialization) if initialization.endswith('.solverstate'): print 'Restoring solverstate from {}'.format(initialization) solver.restore(initialization) elif initialization.endswith('.caffemodel'): print 'Initializing weights from {}'.format(initialization) solver.net.copy_from(initialization) else: raise ValueError('ERROR: {} is not supported for initailization'.format(initialization)) else: warnings.warn("Warning: No initialization provided. Training from scratch.") for dataset in datasets: solver.net.layers[0].add_dataset(dataset) solver.net.layers[0].print_params() solver.net.layers[0].generate_datum_ids() # train according to solver params solver.solve()
def train(solver_prototxt_filename): ''' Train the ANN ''' caffe.set_mode_cpu() solver = caffe.get_solver(solver_prototxt_filename) solver.solve()
def train(config): with open('./track_model/proto_train.prototxt', 'w') as f: f.write(str(track_model.generate_model('train', config))) caffe.set_device(config.gpu_id) caffe.set_mode_gpu() solver = caffe.get_solver('./track_model/solver_sgd.prototxt') solver.net.copy_from(config.weights) #solver.net.save('./snapshots/track_model/_iter_0.caffemodel') #solver.restore('./snapshots/track_model/_iter_50000.solverstate') loss_avg = 0.0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0 decay = 0.99 for it in range(config.max_iter): solver.step(1) loss_val = solver.net.blobs['loss'].data #scores_val = solver.net.blobs['fcn_scaled_scores'].data.copy() #label_val = solver.net.blobs['label'].data.copy() #import ipdb; ipdb.set_trace() loss_avg = decay * loss_avg + (1 - decay) * loss_val if it % config.iter_display == 0: print('\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f' % (it, loss_val, loss_avg))
def __init__(self, CNN_NETWORK_PATH, CNN_SOLVER_PATH, CAFFEMODEL_PATH, SOLVERSTATE_PATH, USE_GPU=True, resume_training=False): self.CNN_NETWORK_PATH = CNN_NETWORK_PATH self.CNN_SOLVER_PATH = CNN_SOLVER_PATH self.CAFFEMODEL_PATH = CAFFEMODEL_PATH self.SOLVERSTATE_PATH = SOLVERSTATE_PATH self.resume_training = resume_training if USE_GPU: caffe.set_device(0) caffe.set_mode_gpu() else: caffe.set_mode_cpu() if self.resume_training: self.net = caffe.Net(self.CNN_NETWORK_PATH, self.CAFFEMODEL_PATH, caffe.TRAIN) else: self.net = caffe.Net(self.CNN_NETWORK_PATH, caffe.TRAIN) self.solver = caffe.get_solver(self.CNN_SOLVER_PATH)
def train(solver_proto): ''' Train the ANN ''' caffe.set_mode_cpu() solver = caffe.get_solver(solver_proto) solver.solve()
def train_model(model_type, loss, classifier_weights, classifier_bias, num_train_instances, num_test_instances, num_features, num_classes, args): net_param = equivalence_proto(args, num_features, num_classes, loss, model_type == 'mlp') with open(args.train_file, 'w') as f: f.write(re.sub("VAL_", "", str(net_param))) solver_param = create_solver(args, num_train_instances, num_test_instances) with open(args.solver_file, 'w') as f: f.write(str(solver_param)) # load the solver and the network files it references solver = caffe.get_solver(args.solver_file) # fix the classificaiton weights/biases to be the passed in weights/biases classify_layer_params = solver.net.params['classify'] classify_layer_params[0].data[:] = classifier_weights[:] # data copy, not object reassignment classify_layer_params[1].data[:] = classifier_bias[:] # apparently necessary, though I thought the weights were shared between the two networks classify_layer_params = solver.test_nets[0].params['classify'] classify_layer_params[0].data[:] = classifier_weights[:] # data copy, not object reassignment classify_layer_params[1].data[:] = classifier_bias[:] solver.solve() return solver.net, solver.test_nets[0]
def train(): # Instantiate a solver object load the prototxt solver = caffe.get_solver(solverPATH) solver.max_iter = MAX_ITER solver.test_iter = 92 solver.test_bs = 64 solver.test_interval = TEST_INTERVAL # the main solver loop for it in range(1, solver.max_iter + 1): #train the next batch solver.step(1) #Test if the right interval if it % solver.test_interval == 0: print 'Iteration', it, 'testing' acc = 0 loss = 0 label = [] out1 = [] out2 = [] out3 = [] for test_it in range(solver.test_iter): #Run the test data through the network solver.test_nets[0].forward() label = np.concatenate( (label, solver.test_nets[0].blobs['label'].data), axis=0) out1 = np.concatenate( (out1, solver.test_nets[0]. blobs['loss1/classifier_transfer'].data.argmax(1)), axis=0) out2 = np.concatenate( (out2, solver.test_nets[0]. blobs['loss2/classifier_transfer'].data.argmax(1)), axis=0) out3 = np.concatenate( (out3, solver.test_nets[0]. blobs['loss3/classifier_transfer'].data.argmax(1)), axis=0) # loss += solver.test_nets[0].blobs['loss'].data # acc += solver.test_nets[0].blobs['accuracy'].data # Record the test data # print float(loss) / solver.test_iter # print float(acc) / solver.test_iter labell.append(label) out1l.append(out1) out2l.append(out2) out3l.append(out3) return
def tune_VGG_for_falls(): # simple python wrapper for running already written prototxt files solver_file = '/home/nestsense/code/ns-main/models/vgg_solver.prototxt' weights_file = '/home/nestsense/code/ns-main/snapshot/VGG_ILSVRC_16_layers.caffemodel' solver = caffe.get_solver(solver_file) solver.net.copy_from(weights_file) solver.solve()
def train(solver_proto_path, snapshot_solver_path=None, init_weights=None, GPU_ID=0): """ Train the defined net. While we did not use this function for our final net, we used the caffe executable for multi-gpu use, this was used for prototyping """ t0 = time.time() if GPU_ID >= 0: caffe.set_mode_gpu() caffe.set_device(GPU_ID) else: caffe.set_mode_cpu() solver = caffe.get_solver(solver_proto_path) if snapshot_solver_path is not None: solver.solve(snapshot_solver_path) # train from previous solverstate else: if init_weights is not None: solver.net.copy_from( init_weights ) # for copying weights from a model without solverstate solver.solve() # train form scratch t1 = time.time() print 'Total training time: ', t1 - t0, ' sec' model_dir = "calc_" + time.strftime("%d-%m-%Y_%I%M%S") moveModel(model_dir=model_dir) # move all the model files to a directory print "Moved model to model/" + model_dir
def train(): with open('./seg_low_res_model/proto_train.prototxt', 'w') as f: f.write(str(segmodel.generate_model('train', config.N))) caffe.set_device(config.gpu_id) caffe.set_mode_gpu() solver = caffe.get_solver('./seg_low_res_model/solver.prototxt') if config.weights is not None: solver.net.copy_from(config.weights) cls_loss_avg = 0.0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0.0, 0.0, 0.0 decay = 0.99 for it in range(config.max_iter): solver.step(1) cls_loss_val = solver.net.blobs['loss'].data scores_val = solver.net.blobs['fcn_scores'].data.copy() label_val = solver.net.blobs['label'].data.copy() cls_loss_avg = decay*cls_loss_avg + (1-decay)*cls_loss_val print('\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f' % (it, cls_loss_val, cls_loss_avg)) # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay*avg_accuracy_all + (1-decay)*accuracy_all avg_accuracy_pos = decay*avg_accuracy_pos + (1-decay)*accuracy_pos avg_accuracy_neg = decay*avg_accuracy_neg + (1-decay)*accuracy_neg print('\titer = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (it, accuracy_all, accuracy_pos, accuracy_neg)) print('\titer = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (it, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
def solve2(solver, args, uid, rank): if args.cpu: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpus[rank]) caffe.set_solver_count(len(args.gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.get_solver(solver) if args.init_model: if args.init_model.endswith('.caffemodel'): solver.net.copy_from(args.init_model) else: solver.net.copy_from(os.path.join(exp_dir, '{}_iter_{}.caffemodel'.format(category, args.init_model))) if args.init_state: if args.init_state.endswith('.solverstate'): solver.restore(args.init_state) else: solver.restore(os.path.join(exp_dir, '{}_iter_{}.solverstate'.format(category, args.init_state))) nccl = caffe.NCCL(solver, uid) nccl.bcast() if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) print(rank) #pdb.set_trace() solver.step(solver.param.max_iter)
def train(config): with open('./seg_model/proto_train.prototxt', 'w') as f: f.write(str(segmodel.generate_model('train', config))) caffe.set_device(config.gpu_id) caffe.set_mode_gpu() solver = caffe.get_solver('./seg_model/solver.prototxt') if config.weights is not None: solver.net.copy_from(config.weights) cls_loss_avg = 0.0 avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0.0, 0.0, 0.0 decay = 0.99 for it in range(config.max_iter): solver.step(1) cls_loss_val = solver.net.blobs['loss'].data scores_val = solver.net.blobs['upscores'].data.copy() label_val = solver.net.blobs['label'].data.copy() cls_loss_avg = decay*cls_loss_avg + (1-decay)*cls_loss_val print('\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f' % (it, cls_loss_val, cls_loss_avg)) # Accuracy accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val) avg_accuracy_all = decay*avg_accuracy_all + (1-decay)*accuracy_all avg_accuracy_pos = decay*avg_accuracy_pos + (1-decay)*accuracy_pos avg_accuracy_neg = decay*avg_accuracy_neg + (1-decay)*accuracy_neg print('\titer = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' % (it, accuracy_all, accuracy_pos, accuracy_neg)) print('\titer = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' % (it, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
def main(): if not os.path.exists('./result'): os.makedirs('./result') question_vocab, answer_vocab = {}, {} if os.path.exists('./result/vdict.json') and os.path.exists('./result/adict.json'): write_log('restoring vocab', 'log.txt') with open('./result/vdict.json','r') as f: question_vocab = json.load(f) with open('./result/adict.json','r') as f: answer_vocab = json.load(f) else: question_vocab, answer_vocab = make_vocab_files() with open('./result/vdict.json','w') as f: json.dump(question_vocab, f) with open('./result/adict.json','w') as f: json.dump(answer_vocab, f) write_log('question vocab size: '+ str(len(question_vocab)), 'log.txt') write_log('answer vocab size: '+ str(len(answer_vocab)), 'log.txt') with open('./result/proto_train.prototxt', 'w') as f: f.write(str(qlstm(config.TRAIN_DATA_SPLITS, config.BATCH_SIZE, \ config.MAX_WORDS_IN_QUESTION, len(question_vocab)))) with open('./result/proto_test.prototxt', 'w') as f: f.write(str(qlstm('val', config.VAL_BATCH_SIZE, \ config.MAX_WORDS_IN_QUESTION, len(question_vocab)))) caffe.set_device(config.GPU_ID) caffe.set_mode_gpu() solver = caffe.get_solver('./qlstm_solver.prototxt') train_loss = np.zeros(config.MAX_ITERATIONS) # results = [] for it in range(config.MAX_ITERATIONS): solver.step(1) # store the train loss train_loss[it] = solver.net.blobs['loss'].data if it != 0 and it % config.PRINT_INTERVAL == 0: write_log('------------------------------------', 'log.txt') write_log('Iteration: ' + str(it), 'log.txt') c_mean_loss = train_loss[it-config.PRINT_INTERVAL:it].mean() write_log('Train loss: ' + str(c_mean_loss), 'log.txt') if it != 0 and it % config.VALIDATE_INTERVAL == 0: # acutually test solver.test_nets[0].save('./result/tmp.caffemodel') write_log('Validating...', 'log.txt') test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation(config.GPU_ID, 'val', it=it) write_log('Iteration: ' + str(it), 'log.txt') write_log('Test loss: ' + str(test_loss), 'log.txt') write_log('Overall Accuracy: ' + str(acc_overall), 'log.txt') write_log('Per Question Type Accuracy is the following:', 'log.txt') for quesType in acc_per_ques: write_log("%s : %.02f" % (quesType, acc_per_ques[quesType]), 'log.txt') write_log('Per Answer Type Accuracy is the following:', 'log.txt') for ansType in acc_per_ans: write_log("%s : %.02f" % (ansType, acc_per_ans[ansType]), 'log.txt')
def train(self): train_data, __ = utils.load_train_data() # raw = train_data['input'] # r = raw[0] # g = raw[1] # b = raw[2] # print len(r.flatten()) # r_std = np.std(r.flatten()) # r_mean = np.mean(r.flatten()) # adjusted_stddev = max(r_std, 1.0 / math.sqrt(3072)) # r = (np.array(r, dtype=np.float32) - r_mean) / adjusted_stddev # # g_std = np.std(g.flatten()) # g_mean = np.mean(g.flatten()) # adjusted_stddev = max(g_std, 1.0 / math.sqrt(3072)) # g = (np.array(g, dtype=np.float32) - g_mean) / adjusted_stddev # # # b_std = np.std(b.flatten()) # b_mean = np.mean(b.flatten()) # adjusted_stddev = max(b_std, 1.0 / math.sqrt(3072)) # b = (np.array(b, dtype=np.float32) - b_mean) / adjusted_stddev # # raw[0] = r # raw[1] = g # raw[2] = b # print raw # train_data['input'] = raw # # train_data['input'] = utils.additive_gaussian_noise(raw) # print train_data['input'] # utils.save_data_as_lmdb('cifar6_train_data_lmdb',train_data) caffe.set_mode_gpu() solver = caffe.get_solver('alexnet_solver_6.prototxt') solver.solve() pass
def _train_net (solver_prototxt, data_provider, output_dir, maxiter,\ resume_file=None, weight_file=None, log_to_file=False): ''' This is private helper function to train a deep net INPUT: solver_prototxt: the file name and pull path of the solver prototxt data_provider: should be in the form of [train_data_provider, test_data_provider] NOTE: This fucntion assumes the first layer is a data layer ''' # get the solver from the prototxt solver = caffe.get_solver(solver_prototxt) # assuming the data layer is located at the very very bottom # set the data for training solver.net.layers[0].set_data(data_provider[0]) # set the data for testing if necessary if len(solver.test_nets) > 0: solver.test_nets[0].layers[0].set_data(data_provider[1]) if resume_file is not None: # if a resume file exists, we want to resume the process solver.solve(resume_file) elif weight_file is not None: # reload the weight solver.net.copy_from(weight_file) solver.solve() else: # train from the begining solver.solve()
def solve(self,max_iter): caffe.set_mode_gpu() if self.gpu_id is not None: caffe.set_device(self.gpu_id) solver = caffe.get_solver(self.solverfile) for i in xrange( max_iter+1): solver.step(1)
def run(dataset, gpu, learning_rates, momentums, weight_decays, steps, num_hiddens, runs): DEVICE = 'GPU' EXP_PATH = '/home/szymon.zareba/dev/phd_pycaffe/experiments/rbm_pm1_geo/' if DEVICE == 'CPU': caffe.set_mode_cpu() elif DEVICE == 'GPU': caffe.set_device(gpu) caffe.set_mode_gpu() else: raise ValueError(DEVICE) EXP_PATH = os.path.join(EXP_PATH, dataset) all_params = [learning_rates, momentums, weight_decays, steps, num_hiddens] for setup in tqdm(list(product(*all_params))): for run in range(1, runs + 1): run = str(run) print 'setup: {}'.format(setup) learning_rate, momentum, weight_decay, step, num_hidden = setup name = '_'.join([str(s) for s in setup]) print 'name: pm_geo_{}'.format(name) if os.path.exists(os.path.join(EXP_PATH, name, run)): print 'experiment exists' else: model_proto, solver_proto = create_protos( EXP_PATH, name, DEVICE, learning_rate, momentum, weight_decay, step, num_hidden, dataset, run) solver = caffe.get_solver(solver_proto) solver.solve()
def model_prepare(gpu_id=GPU_ID): logging.info('Prepare Model ...') caffe.set_mode_gpu() caffe.set_device(gpu_id) solver = caffe.get_solver(solver_file) solver.net.copy_from(pretrained_model) logging.info('Done.') return solver
def init_solver(solver_config, options): caffe.set_mode_gpu() caffe.select_device(options.train_device, False) solver_inst = caffe.get_solver(solver_config) if options.test_net is None: return solver_inst, None else: return solver_inst, init_testnet(options.test_net, test_device=options.test_device, level=options.test_level, stages=options.test_stages)
def Training(solver_file): """ training """ set_device() solver = caffe.get_solver(solver_file) #solver.solve() # solve completely number_iteration = 10000 # collect the information display = 100 # test information test_iteration = 100 test_interval = 100 # loss and accuracy information train_loss = np.zeros( int(np.ceil(number_iteration * 1.0 / display)) ) test_loss = np.zeros( int(np.ceil(number_iteration * 1.0 / test_interval)) ) test_accuracy = np.zeros( int(np.ceil(number_iteration * 1.0 / test_interval)) ) # tmp variables _train_loss = 0; _test_loss = 0; _test_accuracy = 0; # main loop for iter in range(number_iteration): solver.step(1) # save model during training #~ if iter == number_iteration - 1: #in [10, 30, 60, 100, 300, 600, 1000, 3000, 6000, number_iteration - 1]: #~ string = 'lenet_iter_%(iter)d.caffemodel'%{'iter': iter} #~ solver.net.save(string) if 0 == iter % display: train_loss[iter // display] = solver.net.blobs['loss'].data ''' # accumulate the train loss _train_loss += solver.net.blobs['SoftmaxWithLoss1'].data if 0 == iter % display: train_loss[iter // display] = _train_loss / display _train_loss = 0 ''' if 0 == iter % test_interval: for test_iter in range(test_iteration): solver.test_nets[0].forward() _test_loss += solver.test_nets[0].blobs['loss'].data _test_accuracy += solver.test_nets[0].blobs['accuracy'].data test_loss[iter / test_interval] = _test_loss / test_iteration test_accuracy[iter / test_interval] = _test_accuracy / test_iteration _test_loss = 0 _test_accuracy = 0
def main(): if not os.path.exists('./result'): os.makedirs('./result') question_vocab, answer_vocab = {}, {} if os.path.exists('./result/vdict.json') and os.path.exists('./result/adict.json'): print 'restoring vocab' with open('./result/vdict.json','r') as f: question_vocab = json.load(f) with open('./result/adict.json','r') as f: answer_vocab = json.load(f) else: question_vocab, answer_vocab = make_vocab_files() with open('./result/vdict.json','w') as f: json.dump(question_vocab, f) with open('./result/adict.json','w') as f: json.dump(answer_vocab, f) print 'question vocab size:', len(question_vocab) print 'answer vocab size:', len(answer_vocab) with open('./result/proto_train.prototxt', 'w') as f: f.write(str(qlstm(config.TRAIN_DATA_SPLITS, config.BATCH_SIZE, \ config.MAX_WORDS_IN_QUESTION, len(question_vocab)))) with open('./result/proto_test.prototxt', 'w') as f: f.write(str(qlstm('val', config.VAL_BATCH_SIZE, \ config.MAX_WORDS_IN_QUESTION, len(question_vocab)))) raise caffe.set_device(config.GPU_ID) caffe.set_mode_gpu() solver = caffe.get_solver('./qlstm_solver.prototxt') train_loss = np.zeros(config.MAX_ITERATIONS) results = [] for it in range(config.MAX_ITERATIONS): solver.step(1) # store the train loss train_loss[it] = solver.net.blobs['loss'].data if it % config.PRINT_INTERVAL == 0: print 'Iteration:', it c_mean_loss = train_loss[it-config.PRINT_INTERVAL:it].mean() print 'Train loss:', c_mean_loss if it != 0 and it % config.VALIDATE_INTERVAL == 0: solver.test_nets[0].save('./result/tmp.caffemodel') print 'Validating...' test_loss, acc_overall, acc_per_ques, acc_per_ans = exec_validation(config.GPU_ID, 'val', it=it) print 'Test loss:', test_loss print 'Accuracy:', acc_overall results.append([it, c_mean_loss, test_loss, acc_overall, acc_per_ques, acc_per_ans]) best_result_idx = np.array([x[3] for x in results]).argmax() print 'Best accuracy of', results[best_result_idx][3], 'was at iteration', results[best_result_idx][0] drawgraph(results)
def create_solver(solver_param, file_name=""): if file_name: f = open(file_name, 'w') else: f = tempfile.NamedTemporaryFile(mode='w+', delete=False) f.write(str(solver_param)) f.close() solver = caffe.get_solver(f.name) return solver
def Train(solver = "Data/solver.prototxt"): # Tell caffe to use cpu. switch to caffe.set_mode_gpu() to try using the gpu caffe.set_mode_cpu() # Load solver from file solver = caffe.get_solver(solver) # Train a new model solver.solve()
def train_cnn(solver_config_path, s, output_plot_path): ### load the solver and create train and test nets solver = None # ignore this workaround for lmdb data (can't instantiate two solvers on the same data) solver = caffe.get_solver(solver_config_path) test_interval = s.test_interval global niter
def train(solver_prototxt_filename): """ Train the ANN """ caffe.set_mode_gpu() solver = caffe.get_solver(solver_prototxt_filename) solver.net.copy_from(caffemodel_path) solver.restore(caffe_solverstate_path) solver.solve()
def objective(parameters): #print("Parameters:") #pprint(parameters) #print() base_lr = parameters['base_lr'] momentum = parameters['momentum'] batch_size = int(parameters['batch_size']) weight_decay = parameters['weight_decay'] proba = parameters['dropout'] solver_options = {} solver_options["base_lr"] = base_lr solver_options["momentum"] = momentum solver_options["weight_decay"] = weight_decay mlab.update_solver( "/home/sanand2/hypercaffe/caffe/examples/cifar10/cifar10_full_solver.prototxt", solver_options) solver_options = {} solver_options["batch_size"] = batch_size mlab.update_solver( "/home/sanand2/hypercaffe/caffe/examples/cifar10/cifar10_full_solver.prototxt", solver_options) net = caffe.Net( '/home/sanand2/hypercaffe/caffe/examples/cifar10/cifar10_full_train_test.prototxt', caffe.TRAIN) solver = caffe.get_solver( '/home/sanand2/hypercaffe/caffe/examples/cifar10/cifar10_full_solver.prototxt' ) solver.solve() min = solver.net.blobs['loss'].data print min #loss_val = np.zeros(10) #min = 0 #for it in range(10): # solver.solve() # loss_val[it] = solver.net.blobs['loss'].data # if min<loss_val[it] : # min = loss_val[it] #print "loss value is " + str(loss_val[it]) # print min #print net.blobs['mnist'].batch_size #min = 0 #for i in range(10): #print str(loss_val[i]) + " " #sum += int(loss_val[i]) #sum = sum /10 #print "loss value is " + loss_val #os.system("cd /home/syazdani/caffe") #os.system("./examples/mnist/train_lenet.sh") #os.system("cd hyperopt") #sys.stdout.close() #sys.stdout = tmp return {'loss': min, 'status': STATUS_OK}
def train(self): train_data, __ = utils.load_train_data() print train_data['input'].shape print train_data['output'].shape utils.save_data_as_lmdb(const.LMDB_TRAIN_DATA_PATH, train_data) caffe.set_mode_gpu() solver = caffe.get_solver(const.ALEXNET_SOLVER) solver.solve() pass
def train(self): train_data, __ = utils.load_train_data() print train_data['input'].shape print train_data['output'].shape utils.save_data_as_lmdb(const.LMDB_TRAIN_WHITEN_DATA_PATH,train_data, False, True) caffe.set_mode_gpu() solver = caffe.get_solver('alexnet_solver_3.prototxt') solver.solve() pass
def __init__(self): train_net_file = "yelp_train.prototxt" test_net_file = "yelp_test.prototxt" weights = "weights/weights.pretrained.caffemodel" yelp_solver_filename = "yelp_multilabel_solver.prototxt" yelp_solver = caffe.get_solver(yelp_solver_filename) yelp_solver.net.copy_from(weights) self.net = yelp_solver.test_nets[0]
def init_solver(solver_config, options): caffe.set_mode_gpu() caffe.select_device(options.train_device, False) solver_inst = caffe.get_solver(solver_config) if (options.test_net == None): return (solver_inst, None) else: return (solver_inst, init_testnet(options.test_net, test_device=options.test_device))
def trainNewSolver(trainSolverPath, saveToSolverPath): """ Here we want to train a new solver with a certain features: 1. All biased nodes are zero 2. Construct a certain net and train :return: void. Save the solverstate to a directory """ global solver solver = caffe.get_solver(trainSolverPath) solver.step(10000)
def main(): #read arguments parser = argparser() args = parser.parse_args() solver = args.solver train_loss_path = args.train_loss train_acc_path = args.train_accuracy val_loss_path = args.val_loss val_acc_path = args.val_accuracy snapshot = args.snapshot threshold = args.threshold max_iter = args.max_iter record_iter = args.record_iter test_iter = args.test_iter early_stop = args.early_stop caffe.set_mode_gpu() caffe.set_device(0) solver = caffe.get_solver(solver) if snapshot != "": solver.net.copy_from(snapshot) train_loss = np.zeros(max_iter / record_iter) train_acc = np.zeros(max_iter / record_iter) val_loss = np.zeros(max_iter / record_iter) val_acc = np.zeros(max_iter / record_iter) pre_loss = 0 pre_acc = 0 for i in range(max_iter): solver.step(1) cur_train_loss = solver.net.blobs["loss"].data cur_train_acc = solver.net.blobs["accuracy"].data cur_val_loss = solver.test_nets[0].blobs["loss"].data cur_val_acc = solver.test_nets[0].blobs["accuracy"].data if i % record_iter == 0: train_loss[i / record_iter] = cur_train_loss train_acc[i / record_iter] = cur_train_acc val_loss[i / record_iter] = cur_val_loss val_acc[i / record_iter] = cur_val_acc if early_stop and (i % test_iter == 0): if i == 0: pre_train_loss = cur_train_loss pre_train_acc = cur_train_acc pre_val_loss = cur_val_loss pre_val_acc = cur_val_acc elif (pre_loss - cur_loss) / pre_loss < threshold: print "Converged, stopping...." solver.snapshot() train_loss = train_loss[0 : i / record_iter + 1] train_acc = train_acc[0 : i / record_iter + 1] np.savetxt(train_loss_path, train_loss) np.savetxt(train_acc_path, train_acc) np.savetxt(val_loss_path, val_loss) np.savetxt(val_acc_path, val_acc)
def __init__(self, action_space, model=pms.newModel): self.action_space = action_space actionSolver = None actionSolver = caffe.get_solver(pms.actionSolverPath) actionSolver.net.copy_from(model) # test net share weights with train net actionSolver.test_nets[0].share_with(actionSolver.net) self.solver = actionSolver self.targetNet = caffe.Net(pms.actionTestNetPath, model, caffe.TEST)
def learn_and_test(solver_file, size_test): caffe.set_mode_cpu() solver = caffe.get_solver(solver_file) solver.solve() accuracy = 0 test_iters = int(size_test / solver.test_nets[0].blobs['data'].num) for i in range(test_iters): solver.test_nets[0].forward() accuracy += solver.test_nets[0].blobs['accuracy'].data accuracy /= test_iters return accuracy
def train_test_net_command(solver_config_path): """ Train/test process launching cpp solver from shell. """ # Load solver solver = None solver = caffe.get_solver(solver_config_path) # Launch training command command = "{caffe} train -solver {solver}".format(caffe=caffe_root + caffe_path, solver=solver_config_path) subprocess.call(command, shell=True)
def learn(solver_file, label_num): #net = caffe.Net(solver_file) #net.forward() # this will load the next mini-batch as defined in the net #label1 = net.blobs['label1'].data # or whatever you want print '========' print 'currently solving for label number: ' + str(label_num) caffe.set_mode_gpu() solver = caffe.get_solver(solver_file) solver.step(3000)
def train_net(with_val_net=False): train_net_file = miniplaces_net(get_split('train'), train=True) # Set with_val_net=True to test during training. # Environment variable GLOG_minloglevel should be set to 0 to display # Caffe output in this case; otherwise, the test result will not be # displayed. if with_val_net: val_net_file = miniplaces_net(get_split('val'), train=False) else: val_net_file = None solver_file = miniplaces_solver(train_net_file, val_net_file) solver = caffe.get_solver(solver_file) outputs = sorted(solver.net.outputs) def str_output(output): value = solver.net.blobs[output].data if output.startswith('accuracy'): valstr = '%5.2f%%' % (100 * value, ) else: valstr = '%6f' % value return '%s = %s' % (output, valstr) def disp_outputs(iteration, iter_pad_len=len(str(args.iters))): metrics = '; '.join(str_output(o) for o in outputs) return 'Iteration %*d: %s' % (iter_pad_len, iteration, metrics) # We could just call `solver.solve()` rather than `step()`ing in a loop. # (If we hadn't set GLOG_minloglevel = 3 at the top of this file, Caffe # would display loss/accuracy information during training.) previous_time = None loss_line = [] for iteration in xrange(args.iters): solver.step(1) if (args.disp > 0) and (iteration % args.disp == 0): loss_line.append( [iteration, float(solver.net.blobs['loss'].data)] ) current_time = time.clock() if previous_time is None: benchmark = '' else: time_per_iter = (current_time - previous_time) / args.disp benchmark = ' (%5f s/it)' % time_per_iter previous_time = current_time print disp_outputs(iteration), benchmark # Print accuracy for last iteration. solver.net.forward() disp_outputs(args.iters) solver.net.save(snapshot_at_iteration(args.iters)) with open('./results/loss.json' + time_str, 'w') as f: json.dump(loss_line,f)
def train_net(fn, with_val_net=False): train_net_file = miniplaces_net(get_split('train'), fn, train=True) # Set with_val_net=True to test during training. # Environment variable GLOG_minloglevel should be set to 0 to display # Caffe output in this case; otherwise, the test result will not be # displayed. if with_val_net: val_net_file = miniplaces_net(get_split('val'), fn, train=False) else: val_net_file = None solver_file = miniplaces_solver(train_net_file, val_net_file) solver = caffe.get_solver(solver_file) solver.restore('snapshot/place_net_iter_24000.solverstate') filters = solver.net.params['Convolution1'][0].data vis_square(filters.transpose(0, 2, 3, 1)) crash # solver.net.copy_from('snapshot/place_net_iter_45000.caffemodel') outputs = sorted(solver.net.outputs) def str_output(output): value = solver.net.blobs[output].data if output.startswith('accuracy'): valstr = '%5.2f%%' % (100 * value, ) else: valstr = '%6f' % value return '%s = %s' % (output, valstr) def disp_outputs(iteration, iter_pad_len=len(str(args.iters))): metrics = '; '.join(str_output(o) for o in outputs) return 'Iteration %*d: %s' % (iter_pad_len, iteration, metrics) # We could just call `solver.solve()` rather than `step()`ing in a loop. # (If we hadn't set GLOG_minloglevel = 3 at the top of this file, Caffe # would display loss/accuracy information during training.) previous_time = None for iteration in xrange(args.iters): solver.step(1) if (args.disp > 0) and (iteration % args.disp == 0): current_time = time.clock() if previous_time is None: benchmark = '' else: time_per_iter = (current_time - previous_time) / args.disp benchmark = ' (%5f s/it)' % time_per_iter previous_time = current_time print disp_outputs(iteration), benchmark # Print accuracy for last iteration. solver.net.forward() disp_outputs(args.iters) solver.net.save(snapshot_at_iteration(args.iters))
def training(solver_file): """ Training function :param solver_file: prototxt solver :return: void """ solver = caffe.get_solver(solver_file) # solver.solve() # solve completely number_iteration = 10000 # collect the information display = 100 # test information test_iteration = 100 test_interval = 100 # loss and accuracy information train_loss = np.zeros(int(np.ceil(number_iteration * 1.0 / display))) test_loss = np.zeros(int(np.ceil(number_iteration * 1.0 / test_interval))) test_accuracy = np.zeros(int(np.ceil(number_iteration * 1.0 / test_interval))) # tmp variables _test_loss = 0 _test_accuracy = 0 # main loop for iteration in range(number_iteration): solver.step(1) if 0 == iteration % display: train_loss[iteration // display] = solver.net.blobs['loss'].data if 0 == iteration % test_interval: for test_iter in range(test_iteration): solver.test_nets[0].forward() _test_loss += solver.test_nets[0].blobs['loss'].data _test_accuracy += solver.test_nets[0].blobs['accuracy'].data test_loss[iteration / test_interval] = _test_loss / test_iteration test_accuracy[iteration / test_interval] = _test_accuracy / test_iteration _test_loss = 0 _test_accuracy = 0
def _init_solver(self): """ Helper method to initialize the solver. """ solver_param = SolverParameter() solver_param.display = 0 # Don't display anything. solver_param.base_lr = self._hyperparams['lr'] solver_param.lr_policy = self._hyperparams['lr_policy'] solver_param.momentum = self._hyperparams['momentum'] solver_param.weight_decay = self._hyperparams['weight_decay'] solver_param.type = self._hyperparams['solver_type'] solver_param.random_seed = self._hyperparams['random_seed'] # Pass in net parameter by protostring (could add option to input prototxt file). network_arch_params = self._hyperparams['network_arch_params'] network_arch_params['dim_input'] = self._dO network_arch_params['demo_batch_size'] = self._hyperparams['demo_batch_size'] network_arch_params['sample_batch_size'] = self._hyperparams['sample_batch_size'] network_arch_params['T'] = self._T network_arch_params['phase'] = TRAIN solver_param.train_net_param.CopyFrom( self._hyperparams['network_model'](**network_arch_params) ) # For running forward in python. network_arch_params['phase'] = TEST solver_param.test_net_param.add().CopyFrom( self._hyperparams['network_model'](**network_arch_params) ) network_arch_params['phase'] = 'forward_feat' solver_param.test_net_param.add().CopyFrom( self._hyperparams['network_model'](**network_arch_params) ) # These are required by Caffe to be set, but not used. solver_param.test_iter.append(1) solver_param.test_iter.append(1) solver_param.test_interval = 1000000 f = tempfile.NamedTemporaryFile(mode='w+', delete=False) f.write(MessageToString(solver_param)) f.close() self.solver = caffe.get_solver(f.name)
def main(): options, args = getOptions() # In[6]: niter = options.niter # 200 epochs # number of iterations to train quick_solver = caffe.get_solver(options.solver_path) quick_solver.net.copy_from(options.weights_path) print "Options: %s" % options print 'Running solvers for %d iterations...' % niter solvers = [('pretrained', quick_solver)] train_loss, train_acc, val_loss, val_acc, weights = run_solvers(niter, solvers, options.model_name) pickle_filename = "%d_iter.%s.pickle" % (niter, options.model_name) with open(os.path.join(pickle_dir, pickle_filename), 'w+') as f: pickle.dump([train_loss, train_acc, val_loss, val_acc], f)
# Make sure that caffe is on the python path: import matplotlib matplotlib.use('Agg') import sys,caffe,os from os.path import join solver_file = sys.argv[1] outdir = sys.argv[2] gpunum = int(sys.argv[3]) sys.stdout = open(join(outdir,'train.out'), 'w') sys.stderr = open(join(outdir,'train.err'), 'w') caffe.set_device(gpunum) caffe.set_mode_gpu() solver = caffe.get_solver(solver_file) solver.solve()
__author__ = 'luispeinado' import sys #caffe_root = '/Users/luispeinado/caffe/' # this file should be run from {caffe_root}/examples (otherwise change this line) #sys.path.insert(0, caffe_root + 'python') #from sklearn.datasets import load_iris #import sklearn.metrics #import numpy as np #from sklearn.cross_validation import StratifiedShuffleSplit #import matplotlib.pyplot as plt import h5py import caffe #import caffe.draw solver_prototxt_filename="solver.prototxt" caffe.set_mode_cpu() #caffe.set_device(0) #caffe.set_mode_gpu() solver_prototxt_filename = sys.argv[1] solver = caffe.get_solver(solver_prototxt_filename) solver.solve()
weight_dir = tempfile.mkdtemp() weights = {} for name, s in solvers: filename = 'weights.%s.caffemodel' % name weights[name] = os.path.join(weight_dir, filename) s.net.save(weights[name]) return loss, acc, weights # In[ ]: niter = 500 # number of iterations to train # Reset style_solver as before. drink_solver_filename = solver(drink_net(train=True)) drink_solver = caffe.get_solver(drink_solver_filename) drink_solver.net.copy_from(weights) # For reference, we also create a solver that isn't initialized from # the pretrained ImageNet weights. scratch_drink_solver_filename = solver(drink_net(train=True)) scratch_drink_solver = caffe.get_solver(scratch_drink_solver_filename) print 'Running solvers for %d iterations...' % niter solvers = [('pretrained', drink_solver), ('scratch', scratch_drink_solver)] loss, acc, weights = run_solvers(niter, solvers) print 'Done.' train_loss, scratch_train_loss = loss['pretrained'], loss['scratch'] train_acc, scratch_train_acc = acc['pretrained'], acc['scratch']
weight_dir = '/home/ldy/workspace/caffe/models/finetune_UCMerced_LandUse/' weights = {} for name, s in solvers: filename = 'weights_finally.%s.caffemodel' % name weights[name] = os.path.join(weight_dir, filename) print weights[name] s.net.save(weights[name]) return loss, acc, weights print 'training...' niter = 95 # number of iterations to train # Reset style_solver as before. style_solver_filename = solver(style_net(train=True)) style_solver = caffe.get_solver(style_solver_filename) style_solver.net.copy_from(weights) print 'Running solvers for %d iterations...' % niter solvers = [('pretrained', style_solver)] loss, acc, finetuned_weights = run_solvers(niter, solvers) print 'Done.' style_weights_ft = finetuned_weights['pretrained'] # Delete solvers to save memory. del style_solver, solvers def eval_style_net(weights, test_iters=10):