def wflw_data_iterator(data_dir=None, dataset_mode="encoder_ref", mode="train", use_reference=False, batch_size=1, shuffle=True, rng=None, with_memory_cache=False, with_file_cache=False, transform=None): if use_reference: logger.info( 'WFLW Dataset for Encoder using reference .npz file is created.') return data_iterator( WFLWDataEncoderRefSource(data_dir, shuffle=shuffle, rng=rng, transform=transform, mode=mode), batch_size, rng, with_memory_cache, with_file_cache) else: logger.info('WFLW Dataset for Encoder is created.') return data_iterator( WFLWDataEncoderSource(data_dir, shuffle=shuffle, rng=rng, transform=transform, mode=mode), batch_size, rng, with_memory_cache, with_file_cache)
def main(): # Get settings args = get_args() # Set context from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Load data if args.dataset is not "omniglot": print("\nUse omniglot dataset\n") exit() dataset_path = args.dataset_root + "/omniglot/data/" if not os.path.exists(dataset_path): print("\nSet dataset path with --dataset_root option\n") exit() train_data, valid_data, test_data = load_omniglot(dataset_path) if args.train_or_test == "train": meta_train(args, train_data, valid_data, test_data) elif args.train_or_test == "test": meta_test(args, test_data)
def prepare_dataloader(dataset_path, datatype_list=['train', 'val', 'test'], batch_size=8, img_size=(228, 304)): assert all([dtype in ['train', 'val', 'test'] for dtype in datatype_list]) data_dic = {'train': None, 'val': None, 'test': None} if 'train' in datatype_list: data_dic['train'] = create_data_iterator(dataset_path, data_type='train', image_shape=img_size, batch_size=batch_size) if 'val' in datatype_list: data_dic['val'] = create_data_iterator(dataset_path, data_type='val', image_shape=img_size, batch_size=1, shuffle=False) if 'test' in datatype_list: data_dic['test'] = create_data_iterator(dataset_path, data_type='test', image_shape=img_size, batch_size=1, shuffle=False) # Dataset size information dataset_info = "> Dataset size: " for dtype in datatype_list: dataset_info += "[{}] {} ".format(dtype, data_dic[dtype]['size']) logger.info(dataset_info) return data_dic
def data_iterator_fewshot(img_path, batch_size, imsize=(256, 256), num_samples=1000, shuffle=True, rng=None): imgs = glob.glob("{}/**/*.jpg".format(img_path), recursive=True) if num_samples == -1: num_samples = len(imgs) else: logger.info( "Num. of data ({}) is used for debugging".format(num_samples)) def load_func(i): img = imread(imgs[i], num_channels=3) img = imresize(img, imsize).transpose(2, 0, 1) img = img / 255. * 2. - 1. return img, i return data_iterator_simple(load_func, num_samples, batch_size, shuffle=shuffle, rng=rng, with_file_cache=False, with_memory_cache=False)
def compute_frechet_inception_distance(z, y_fake, x_fake, x, y, args, di=None): h_fakes = [] h_reals = [] for i in range(args.max_iter): logger.info("Compute at {}-th batch".format(i)) # Generate z.d = np.random.randn(args.batch_size, args.latent) y_fake.d = generate_random_class(args.n_classes, args.batch_size) x_fake.forward(clear_buffer=True) # Predict for fake x_fake_d = x_fake.d.copy() x_fake_d = preprocess( x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_fake_d y.forward(clear_buffer=True) h_fakes.append(y.d.copy().squeeze()) # Predict for real x_d, _ = di.next() x_d = preprocess( x_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_d y.forward(clear_buffer=True) h_reals.append(y.d.copy().squeeze()) h_fakes = np.concatenate(h_fakes) h_reals = np.concatenate(h_reals) # FID score ave_h_real = np.mean(h_reals, axis=0) ave_h_fake = np.mean(h_fakes, axis=0) cov_h_real = np.cov(h_reals, rowvar=False) cov_h_fake = np.cov(h_fakes, rowvar=False) score = np.sum((ave_h_real - ave_h_fake) ** 2) \ + np.trace(cov_h_real + cov_h_fake - 2.0 * sqrtm(np.dot(cov_h_real, cov_h_fake))) return score
def __init__(self, celeb_name=None, data_dir=None, mode="all", shuffle=True, rng=None, resize_size=(64, 64), line_thickness=3, gaussian_kernel=(5, 5), gaussian_sigma=3): self.resize_size = resize_size self.line_thickness = line_thickness self.gaussian_kernel = gaussian_kernel self.gaussian_sigma = gaussian_sigma celeb_name_list = ['Donald_Trump', 'Emmanuel_Macron', 'Jack_Ma', 'Kathleen', 'Theresa_May'] assert celeb_name in celeb_name_list self.data_dir = data_dir self._shuffle = shuffle self.mode = mode self.celeb_name = celeb_name self.imgs_root_path = os.path.join(self.data_dir, self.celeb_name) if not os.path.exists(self.imgs_root_path): logger.error('{} is not exists.'.format(self.imgs_root_path)) # use an annotation file to know how many images are needed. self.ant, self._size = self.get_ant_and_size( self.imgs_root_path, self.mode) logger.info(f'the number of images for {self.mode}: {self._size}') self._variables = list() self._ref_files = dict() self.reset()
def _assign_variable_and_load_ref(self, data): assert data in ('image', 'heatmap', 'resized_heatmap') self._variables.append(data) _ref_path = os.path.join(self.ref_dir, f'{self.celeb_name}_{data}.npz') assert _ref_path, f"{_ref_path} does not exist." self._ref_files[data] = np.load(_ref_path) logger.info(f'loaded {_ref_path}.')
def compute_metric(gen, batch_size, img_num, latent, hyper_sphere): num_batches = img_num // batch_size img1 = [] for k in range(num_batches): logger.info("generating at iter={} / {}".format(k, num_batches)) z_data = np.random.randn(batch_size, latent, 1, 1) z = nn.Variable.from_numpy_array(z_data) z = pixel_wise_feature_vector_normalization(z) if hyper_sphere else z y = gen(z, test=True) img = y.d.transpose(0, 2, 3, 1) img1.append(img) img1 = np.concatenate(img1, axis=0) img2 = [] for k in range(num_batches): logger.info("generating at iter={} / {}".format(k, num_batches)) z_data = np.random.randn(batch_size, latent, 1, 1) z = nn.Variable.from_numpy_array(z_data) z = pixel_wise_feature_vector_normalization(z) if hyper_sphere else z y = gen(z, test=True) img = y.d.transpose(0, 2, 3, 1) img2.append(img) img2 = np.concatenate(img2, axis=0) img1 = np.uint8((img1 + 1.) / 2. * 255) img2 = np.uint8((img2 + 1.) / 2. * 255) return msssim(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03, weights=None)
def data_iterator(img_path, batch_size, num_samples, dataset_name, shuffle=True, rng=None): imgs = glob.glob("{}/*.png".format(img_path)) if num_samples == -1: num_samples = len(imgs) else: logger.info( "Num. of data ({}) is used for debugging".format(num_samples)) def load_func(i): img = imread(imgs[i], num_channels=3) img = img.transpose(2, 0, 1) / 255. img = img * 2. - 1. return img, None return data_iterator_simple(load_func, num_samples, batch_size, shuffle=shuffle, rng=rng, with_memory_cache=False, with_file_cache=False)
def __init__(self, param_path=None): assert os.path.isfile( param_path), "pretrained VGG19 weights not found." self.h5_file = param_path if not os.path.exists(self.h5_file): print( "Pretrained VGG19 parameters not found. Downloading. Please wait..." ) url = "https://nnabla.org/pretrained-models/nnabla-examples/GANs/first-order-model/vgg19.h5" from nnabla.utils.data_source_loader import download download(url, url.split('/')[-1], False) with nn.parameter_scope("VGG19"): logger.info('loading vgg19 parameters...') nn.load_parameters(self.h5_file) # drop all the affine layers. drop_layers = [ 'classifier/0/affine', 'classifier/3/affine', 'classifier/6/affine' ] for layers in drop_layers: nn.parameter.pop_parameter((layers + '/W')) nn.parameter.pop_parameter((layers + '/b')) self.mean = nn.Variable.from_numpy_array( np.asarray([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)) self.std = nn.Variable.from_numpy_array( np.asarray([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1))
def unpool_block(x, n=0, k=(4, 4), s=(2, 2), p=(1, 1), leaky=False, unpool=False, init_method=None): if not unpool: logger.info("Deconvolution was used.") x = deconvolution(x, n=n, kernel=k, stride=s, pad=p, init_method=init_method) else: logger.info("Unpooling was used.") x = F.unpooling(x, kernel=(2, 2)) x = convolution(x, n, kernel=(3, 3), stride=(1, 1), pad=(1, 1), init_method=init_method) x = instance_normalization(x, fix_parameters=True) x = F.leaky_relu(x, alpha=0.2) if leaky else F.relu(x) return x
def svd_affine(x, n_outputs, cr): W = get_parameter('affine/W') if W is None: UV = None else: UV = W.d b = get_parameter('affine/b') # compute rank (size of intermediate activations) # to obtained desired reduction inshape = np.prod(x.shape[1:]) outshape = np.prod(n_outputs) rank = int( np.floor((1 - cr) * inshape * outshape / (inshape + outshape))) # Initialize bias to existing b in affine if exists if b is not None: b_new = get_parameter_or_create('svd_affine/b', b.d.shape, need_grad=b.need_grad) b_new.d = b.d.copy() logger.info( "SVD affine created: input_shape = {}; output_shape = {}; compression = {}; rank = {};" .format(inshape, outshape, cr, rank)) # create svd_affine initialized from W in current context if it exists return PF.svd_affine(x, n_outputs, rank, uv_init=UV)
def svd_convolution(x, n_outputs, kernel, pad, with_bias, cr): W = get_parameter('conv/W') if W is None: UV = None else: UV = W.d b = get_parameter('conv/b') # compute rank (size of intermediate activations) # to obtained desired reduction inmaps = x.shape[1] outmaps = n_outputs Ksize = np.prod(kernel) rank = int( np.floor((1 - cr) * inmaps * outmaps * Ksize / (inmaps * Ksize + inmaps * outmaps))) # Initialize bias to existing b in affine if exists if b is not None: b_new = get_parameter_or_create('svd_conv/b', b.d.shape, need_grad=b.need_grad) b_new.d = b.d.copy() logger.info( "SVD convolution created: inmaps = {}; outmaps = {}; compression = {}; rank = {};" .format(inmaps, outmaps, cr, rank)) # create svd_convolution initialized from W in current context if it exists return PF.svd_convolution(x, n_outputs, kernel=kernel, r=rank, pad=pad, with_bias=with_bias, uv_init=UV)
def SimpleDataIterator(batch_size, root_dir, image_size, comm=None, shuffle=True, rng=None, on_memory=True, fix_aspect_ratio=True): # get all files paths = [ os.path.join(root_dir, x) for x in os.listdir(root_dir) if os.path.splitext(x)[-1] in SUPPORT_IMG_EXTS ] if len(paths) == 0: raise ValueError( f"[SimpleDataIterator] '{root_dir}' is not found. " "Please make sure that you specify the correct directory path.") ds = SimpleDatasource(img_paths=paths, img_size=image_size, rng=rng, on_memory=on_memory, fix_aspect_ratio=fix_aspect_ratio) logger.info(f"Initialized data iterator. {ds.size} images are found.") ds = _get_sliced_data_source(ds, comm, shuffle) return data_iterator(ds, batch_size, with_memory_cache=False, use_thread=True, with_file_cache=False)
def _load_dataset(self, dataset_path, batch_size=100, shuffle=False): if os.path.isfile(dataset_path): logger.info("Load a dataset from {}.".format(dataset_path)) return data_iterator_csv_dataset(dataset_path, batch_size, shuffle=shuffle) return None
def main(): args = get_args() nn.load_parameters(args.input) params = nn.get_parameters(grad_only=False) processed = False # Convert memory layout layout = get_memory_layout(params) if args.memory_layout is None: pass elif args.memory_layout != layout: logger.info(f'Converting memory layout to {args.memory_layout}.') convert_memory_layout(params, args.memory_layout) processed |= True else: logger.info('No need to convert memory layout.') if args.force_3_channels: ret = force_3_channels(params, args.memory_layout) if ret: logger.info('Converted first conv to 3-channel input.') processed |= ret if not processed: logger.info( 'No change has been made for the input. Not saving a new parameter file.' ) return logger.info(f'Save a new parameter file at {args.output}') for key, param in params.items(): nn.parameter.set_parameter(key, param) nn.save_parameters(args.output)
def data_iterator_celeba(img_path, batch_size, imsize=(128, 128), num_samples=100, shuffle=True, rng=None): imgs = glob.glob("{}/*.png".format(img_path)) if num_samples == -1: num_samples = len(imgs) else: logger.info( "Num. of data ({}) is used for debugging".format(num_samples)) def load_func(i): cx = 89 cy = 121 img = imread(imgs[i]) img = img[cy - 64:cy + 64, cx - 64:cx + 64, :].transpose(2, 0, 1) / 255. img = img * 2. - 1. return img, None return data_iterator_simple(load_func, num_samples, batch_size, shuffle=shuffle, rng=rng, with_file_cache=False)
def __init__(self, fpath, knn=50, test_rate=0.25, test=False, shuffle=True, rng=None): super(PointCloudDataSource, self).__init__(shuffle=shuffle) self.knn = knn self.test_rate = 0.25 self.rng = np.random.RandomState(313) if rng is None else rng # Split info pcd = self._read_dataset(fpath) total_size = len(pcd.points) test_size = int(total_size * test_rate) indices = self.rng.permutation(total_size) test_indices = indices[:test_size] train_indices = indices[test_size:] indices = test_indices if test else train_indices self._size = test_size if test else total_size - test_size # Points points = np.asarray(pcd.points) self._points = self._preprocess(points)[indices] # Normals normals = np.asarray(pcd.normals) self._normals = normals[indices] if self.has_normals( normals) else normals # Radius self._radius = self._compute_radius(self._points, self.knn) self._variables = ('points', 'normals', 'radius') self.reset() logger.info("Data size = {}".format(self._size))
def main(): args = get_args() logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and not args.overwrite_output_dir: raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir)) args.task_name = args.task_name.lower() if args.task_name not in processors: raise ValueError("Task not found: %s" % (args.task_name)) processor = processors[args.task_name]() args.output_mode = output_modes[args.task_name] label_list = processor.get_labels() args.num_labels = len(label_list) tokenizer_class = BertTokenizer tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None) logger.info("Training/evaluation parameters %s", args) train_dataset = BERTDataSource(args, tokenizer, shuffle=True) train(args, train_dataset, tokenizer)
def main(imagenet_base_dir, imagenet64_base_dir): from neu.misc import init_nnabla comm = init_nnabla(ext_name="cpu", device_id=0, type_config="float") if imagenet_base_dir is not None and os.path.exists(imagenet_base_dir): logger.info("Test imagenet data iterator.") di = ImagenetDataIterator(2, imagenet_base_dir, comm=comm) test_data_iterator(di, "./tmp/imagene", comm)
def load_parameters(monitor_path): ''' ''' import glob param_files = sorted(glob.glob(os.path.join(monitor_path, 'params_*.h5'))) # use latest logger.info('Loading `%s`.' % param_files[-1]) _ = nn.load_parameters(param_files[-1])
def data_iterator(img_path, batch_size, imsize=(128, 128), num_samples=100, shuffle=True, rng=None, dataset_name="CelebA"): if dataset_name == "CelebA": di = data_iterator_celeba(img_path, batch_size, imsize=imsize, num_samples=num_samples, shuffle=shuffle, rng=rng) else: logger.info("Currently CelebA is only supported.") sys.exit(0) return di
def get_model_url_base(): ''' Returns a root folder for models. ''' url_base = get_model_url_base_from_env() if url_base is not None: logger.info('NNBLA_MODELS_URL_BASE is set as {}.'.format(url_base)) else: url_base = 'https://nnabla.org/pretrained-models/nnp_models/' return url_base
def download_provided_file(url, filepath=None, verbose=True): if not filepath: filepath = os.path.basename(url) if not os.path.exists(filepath): if verbose: logger.info(f"{filepath} not found. Downloading...") download(url, filepath, False) if verbose: logger.info(f"Downloaded {filepath}.") return
def main(): ''' ''' args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Load parameters load_parameters(args.monitor_path) # Build model image, label, noise, recon = model_tweak_digitscaps(10) # Get images from 0 to 10. images, labels = load_mnist(train=False) batch_images = [] batch_labels = [] ind = 123 for i in range(10): class_images = images[labels.flat == i] img = class_images[min(class_images.shape[0], ind)] batch_images.append(img) batch_labels.append(i) batch_images = np.stack(batch_images, axis=0) batch_labels = np.array(batch_labels).reshape(-1, 1) # Generate reconstructed images with tweaking capsules image.d = batch_images label.d = batch_labels results = [] for d in range(noise.shape[2]): # 16 for r in np.arange(-0.25, 0.30, 0.05): batch_noise = np.zeros(noise.shape) batch_noise[..., d] += r noise.d = batch_noise recon.forward(clear_buffer=True) results.append(recon.d.copy()) # results shape: [16, 11, 10, 1, 28, 28] results = np.array(results).reshape((noise.shape[2], -1) + image.shape) # Draw tweaked images from skimage.io import imsave for i in range(10): adigit = (results[:, :, i] * 255).astype(np.uint8) drawn = draw_images(adigit) imsave(os.path.join(args.monitor_path, 'tweak_digit_%d.png' % i), drawn)
def _compute_radius(self, points, knn): if knn < 0: logger.info("Radius is not computed.") return # KDTree logger.info( "Constructing KDTree and querying {}-nearest neighbors".format(self.knn)) tree = spatial.cKDTree(points, compact_nodes=True) # exclude self by adding 1 dists, indices = tree.query(points, k=knn + 1) return dists[:, -1].reshape(dists.shape[0], 1)
def create_pcd_dataset_from_mesh(fpath): mesh = utils.read_mesh(fpath) pcd = mesh.sample_points_poisson_disk(len(mesh.vertices), use_triangle_normal=True, seed=412) dpath = "/".join(fpath.split("/")[:-1]) fname = fpath.split("/")[-1] fname = "{}_pcd.ply".format(os.path.splitext(fname)[0]) fpath = os.path.join(dpath, fname) logger.info("PointCloud data ({}) is being created.".format(fpath)) utils.write_pcd(fpath, pcd)
def main(): args = get_args() rng = np.random.RandomState(1223) # Get context from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) miou = validate(args)
def _get_variable_or_create(self, v, callback, current_scope): if v.variable is not None: return v.variable v = callback._apply_generate_variable(v) if v.variable is not None: return v.variable pvar = v.proto name = pvar.name shape = list(pvar.shape.dim) if shape[0] < 0: shape[0] = self.batch_size shape = tuple(shape) assert np.all(np.array(shape) > 0 ), "Shape must be positive. Given {}.".format(shape) if pvar.type != 'Parameter': # Create a new variable and returns. var = nn.Variable(shape) v.variable = var var.name = name return var # Trying to load the parameter from .nnp file. callback.verbose('Loading parameter `{}` from .nnp.'.format(name)) try: param = get_parameter(name) if param is None: logger.info( 'Parameter `{}` is not found. Initializing.'.format(name)) tmp = _create_variable(pvar, name, shape, self.rng) param = tmp.variable_instance set_parameter(name, param) # Always copy param to current scope even if it already exists. with nn.parameter_scope('', current_scope): set_parameter(name, param) except: import sys import traceback raise ValueError( 'An error occurs during creation of a variable `{}` as a' ' parameter variable. The error was:\n----\n{}\n----\n' 'The parameters registered was {}'.format( name, traceback.format_exc(), '\n'.join( list(nn.get_parameters(grad_only=False).keys())))) assert shape == param.shape param = param.get_unlinked_variable(need_grad=v.need_grad) v.variable = param param.name = name return param
def save_args(args): from nnabla import logger import os if not os.path.exists(args.monitor_path): os.makedirs(args.monitor_path) path = "{}/Arguments.txt".format(args.monitor_path) logger.info("Arguments are saved to {}.".format(path)) with open(path, "w") as fp: for k, v in sorted(vars(args).items()): logger.info("{}={}".format(k, v)) fp.write("{}={}\n".format(k, v))
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((1,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Dataset # We use Tiny ImageNet from Stanford CS231N class. # https://tiny-imagenet.herokuapp.com/ # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 tiny = True # TODO: Switch ILSVRC2012 dataset and TinyImageNet. t_model = get_model( args, num_classes, test=False, tiny=tiny) t_model.pred.persistent = True # Not clearing buffer of pred in backward v_model = get_model( args, num_classes, test=True, tiny=tiny) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) # Training loop. for i in range(args.max_iter): # Save parameters if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % args.val_interval == 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) l += v_model.loss.d e += categorical_error(v_model.pred.d, v_model.label.d) monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients l += t_model.loss.d e += categorical_error(t_model.pred.d, t_model.label.d) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file)
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create predition graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create predition graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) parameter_file = os.path.join( args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) nnp_file = os.path.join( args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x0': vimage0, 'x1': vimage1}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x0', 'x1'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [ vimage0, vimage1], vpred, nnp_file)