check_path(training_data_path) check_path(testing_data_path) check_path(mean_image_path) check_path(out_dir_path) print("# _/_/_/ load model _/_/_/") # load an original Caffe model original_model = cPickle.load(open(initial_model_path)) # load a new model to be fine-tuned class_size = load_labels(label_path) modified_model = ModifiedReferenceCaffeNet(class_size) # copy W/b from the original model to the new one copy_model(original_model, modified_model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # make the GPU current modified_model.to_gpu() print("# _/_/_/ load dataset _/_/_/") in_size = ModifiedReferenceCaffeNet.IN_SIZE mean = np.load(mean_image_path) train = DataPreprocessor(training_data_path, root_dir_path, mean, in_size, random=True, is_scaled=True)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) logger.info("load VGG model") vgg = VGGNet() serializers.load_hdf5(VGG_MODEL_PATH, vgg) logger.info("copy vgg model") copy_model(vgg, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = load_cifar10(os.path.join(os.path.dirname(__file__), '../data/cifar-10-batches-py')) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
if not os.path.exists(CAFFEMODEL_PATH): import urllib print "Downloading {}".format(caffemodel_url.split("/")[-1]) urllib.urlretrieve(caffemodel_url, CAFFEMODEL_PATH) print "Converting {} to {}".format(caffemodel_url.split("/")[-1], PICKLE_PATH.split("/")[-1]) import chainer.links.caffe model = chainer.links.caffe.caffe_function.CaffeFunction(CAFFEMODEL_PATH) pickle.dump(model, open(PICKLE_PATH, "wb")) print "Loading {}".format(PICKLE_PATH.split("/")[-1]) model = pickle.load(open(PICKLE_PATH, "rb")) # CNN network print "Creating CNN" extractor = FeatureExtractor(MEAN_PATH).to_cpu() copy_model(model, extractor) def load_image(filename, color=True): img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32) if img.ndim == 2: img = img[:, :, np.newaxis] if color: img = np.tile(img, (1, 1, 3)) elif img.shape[2] == 4: img = img[:, :, :3] return img img_list = [load_image(f) for f in args.input_image] for feat in extractor(img_list).tolist(): with open(args.output, mode='a') as fh: fh.write(args.c + " ")
# but it works in Ubuntu 32/64 bit. So please convert on ubuntu with live-usb model = chainer.links.caffe.caffe_function.CaffeFunction(CAFFEMODEL_PATH) # protocol=2 is very very important. # it enable pickle.load(file) even on windows 32bit. pickle.dump(model, open(PICKLE_PATH, "wb"), protocol=2) print "Loading {}".format(PICKLE_PATH.split(os.sep)[-1]) ref_model = pickle.load(open(PICKLE_PATH, "rb")) # CNN network print "Creating CNN" dir_list = os.listdir(os.path.join(dir_name, 'data')) dir_list = [os.path.join(os.path.join(dir_name, 'data'), d) for d in dir_list] dir_list = filter(lambda d: os.path.isdir(d), dir_list) my_model = MyCNN(MEAN_PATH, len(dir_list)).to_cpu() copy_model(ref_model, my_model) # Training Parameters LEARNIN_RATE = 0.01 BATCH_SIZE = 10 # BATCH_SIZE should be bigger than len(dir_list), otherwise CNN will diverge. This is very important. EPOCHS = 100 DECAY_FACTOR = 0.97 optimizer = chainer.optimizers.SGD(LEARNIN_RATE) optimizer.setup(my_model) # search train data and test data img_list_train = [] cls_list_train = [] img_list_test = [] cls_list_test = [] for cls, d in enumerate(dir_list):