def get_model(argument): model_name = argument['model'] if model_name == 'demucs': from separators.demucs_wrapper import Demucs_separator model_param = argument['model_param'] if model_param is None: model_param = 'demucs' if isinstance(model_param, str) and model_param.lower() == "none": model_param = 'demucs' return Demucs_separator(model_param) elif model_name == 'spleeter': from separators.spleeter_wrapper import Spleeter_separator return Spleeter_separator(json_path='4stems-16kHz.json') elif model_name == 'x_umx': from separators.x_umx_wrapper import X_umx_wrapper import nnabla as nn from nnabla.ext_utils import get_extension_context ctx = get_extension_context('cudnn') nn.set_default_context(ctx) nn.set_auto_forward(True) return X_umx_wrapper() elif model_name == 'lasaftnet': model_param = argument['model_param'] from separators.lasaftnet_wrapper import LaSAFT_separator if model_param is None: model_param = 'lasaft_large_2020' return LaSAFT_separator(model_param) else: raise ModuleNotFoundError
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save-image-path', type=str, default='facemorph-dataset-1024jpeg', help="name of directory to save output image") parser.add_argument('--attr-delta-path', type=str, default='stylegan2directions/age.npy', help="Path to npy file of attribute variation in stylegan2 latent space") parser.add_argument('--weights-path', type=str, default='./', help="Path to store pretrained stylegan2 parameters") parser.add_argument('--face-morph', '--style-mix', action='store_true', default=False, help='Set this flag to generate style mixing data') parser.add_argument('--batch-size', type=int, default=16, help="Batch-size of 1 forward pass of the generator") parser.add_argument('--num-images', type=int, default=50000, help="Number of images to generate.") parser.add_argument('--coeff', type=float, default=0.5, help="coefficient of propagation in stylegan2 latent space") parser.add_argument('--context', type=str, default="cudnn", help="context. cudnn is recommended.") args = parser.parse_args() assert args.num_images > args.batch_size, 'Number of images must be more than the batch-size' ctx = get_extension_context(args.context) nn.set_default_context(ctx) nn.set_auto_forward(True) generate_data(args)
def main(): args = get_args() config = read_yaml(args.config_path) nn.set_auto_forward(True) comm = init_nnabla(ext_name="cuda", device_id='0', type_config='float') if args.save_results_dir != '': config.log.save_results_dir = args.save_results_dir config.data.color_perturb = True if ( args.data_perturb == 'color' or args.data_perturb == 'both') else False config.data.occ_perturb = True if ( args.data_perturb == 'occ' or args.data_perturb == 'both') else False if comm is None or comm.rank == 0: if config.data.color_perturb: print('Applying color perturbation to the dataset') if config.data.occ_perturb: print('Applying occlusion perturbation to the dataset') if not config.data.color_perturb and not config.data.occ_perturb: print('No perturbation will be applied to the data') train_nerf(config, comm, args.model, args.dataset)
def test_grad_grad_resnet(seed, ctx, auto_forward, inplace, shared): nn.clear_parameters() # Settings nn.set_default_context(ctx) nn.set_auto_forward(auto_forward) b, c, h, w = 4, 3, 32, 32 n_cls = 10 rng = np.random.RandomState(seed) # Network x = nn.Variable.from_numpy_array(rng.randn(b, c, h, w)).apply(need_grad=True) y = SmallResNet(x, inplace=inplace, shared=shared) # Grad of grad dx = nn.grad([y], [x]) ddx = nn.grad([dx[0]], [x]) ddx[0].forward() if not auto_forward else None # Backward of grad x.grad.zero() dx[0].forward() if not auto_forward else None dx[0].backward() # Check between results of var.backward and nn.grad backend = ctx.backend[0].split(":")[0] if backend == 'cuda': pytest.skip( 'CUDA Convolution N-D is only supported in CUDNN extension') assert_allclose(x.g, ddx[0].d, atol=1e-6)
def style_mixing(self, test_config, args): from nnabla.utils.image_utils import imsave, imresize print('Testing style mixing of generation...') z1 = F.randn(shape=(args.batch_size_A, test_config['latent_dim']), seed=args.seed_1[0]).data z2 = F.randn(shape=(args.batch_size_B, test_config['latent_dim']), seed=args.seed_2[0]).data nn.set_auto_forward(True) mix_image_stacks = [] for i in range(args.batch_size_A): image_column = [] for j in range(args.batch_size_B): style_noises = [ F.reshape(z1[i], (1, 512)), F.reshape(z2[j], (1, 512)) ] rgb_output = self.generator( 1, style_noises, test_config['truncation_psi'], mixing_layer_index=test_config['mix_after']) image = save_generations(rgb_output, None, return_images=True) image_column.append(image[0]) image_column = np.concatenate([image for image in image_column], axis=1) mix_image_stacks.append(image_column) mix_image_stacks = np.concatenate( [image for image in mix_image_stacks], axis=2) style_noises = [z1, z1] rgb_output = self.generator(args.batch_size_A, style_noises, test_config['truncation_psi']) image_A = save_generations(rgb_output, None, return_images=True) image_A = np.concatenate([image for image in image_A], axis=2) style_noises = [z2, z2] rgb_output = self.generator(args.batch_size_B, style_noises, test_config['truncation_psi']) image_B = save_generations(rgb_output, None, return_images=True) image_B = np.concatenate([image for image in image_B], axis=1) top_image = 255 * np.ones(rgb_output[0].shape).astype(np.uint8) top_image = np.concatenate((top_image, image_A), axis=2) grid_image = np.concatenate((image_B, mix_image_stacks), axis=2) grid_image = np.concatenate((top_image, grid_image), axis=1) filename = os.path.join(self.results_dir, 'style_mix.png') imsave(filename, imresize(grid_image, (1024, 1024), channel_first=True), channel_first=True) print(f'Output saved as {filename}')
def main(): # Args args = get_args() # Context ctx = extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) nn.set_auto_forward(True) # Config resolution_list = [4, 8, 16, 32, 64, 128] channel_list = [512, 512, 256, 128, 64, 32] side = 8 # Monitor monitor = Monitor(args.monitor_path) monitor_image_tile = MonitorImageTileWithName("Image Tile", monitor, num_images=side**2) # Generate # generate tile images imgs = [] for _ in range(side): img = generate_images(args.model_load_path, batch_size=side, use_bn=args.use_bn, n_latent=args.latent, hyper_sphere=args.hyper_sphere, last_act=args.last_act, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward, resolution_list=resolution_list, channel_list=channel_list) imgs.append(img) imgs = np.concatenate(imgs, axis=0) monitor_image_tile.add("GeneratedImage", imgs) # generate interpolated tile images imgs = [] for _ in range(side): img = generate_interpolated_images( args.model_load_path, batch_size=side, use_bn=args.use_bn, n_latent=args.latent, hyper_sphere=args.hyper_sphere, last_act=args.last_act, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward, resolution_list=resolution_list, channel_list=channel_list) imgs.append(img) imgs = np.concatenate(imgs, axis=0) monitor_image_tile.add("GeneratedInterpolatedImage", imgs)
def test_shared_leaf_variable_basic_arithmetics(seed, ctx, auto_forward): def add(x, derivative=0): if derivative == 0: return x + x + x if derivative == 1: return 3 * np.ones_like(x) if derivative == 2: return np.zeros_like(x) def sub(x, derivative=0): if derivative == 0: return x - x - x if derivative == 1: return -1 * np.ones_like(x) if derivative == 2: return np.zeros_like(x) def mul(x, derivative=0): if derivative == 0: return x * x * x if derivative == 1: return 3 * x**2 if derivative == 2: return 6 * x def div(x, derivative=0): if derivative == 0: return x / x / x if derivative == 1: return -x**-2 if derivative == 2: return 2 * x**-3 # Settings nn.set_default_context(ctx) nn.set_auto_forward(auto_forward) for math_type in [add, sub, mul, div]: xd = np.random.randn(2, 3) + 0.5 x = nn.Variable.from_numpy_array(xd).apply(need_grad=True) x.grad.zero() y = math_type(x) # First-order gradient dy_dx = nn.grad([y], [x]) if not auto_forward: dy_dx[0].forward() assert_allclose(dy_dx[0].d, math_type(xd, 1)) # Second-order gradient dy_dx[0].backward() assert_allclose(x.g, math_type(xd, 2))
def scope_function(): # turn off auto forward mode nn.set_auto_forward(False) # clear all parameters nn.clear_parameters() # keep context ctx = nn.get_current_context() yield # restore context nn.set_default_context(ctx)
def get_sample_and_feedback(args, data_dict): """ Let the controller predict one architecture and test its performance to get feedback. Here the feedback is validation accuracy and will be reused to train the controller. """ skip_weight = args.skip_weight entropy_weight = args.entropy_weight bl_dec = args.baseline_decay arc_seq, log_probs, entropys, skip_penaltys = sample_from_controller(args) sample_arch = list() for arc in arc_seq: sample_arch.extend(arc.tolist()) show_arch(sample_arch) sample_entropy = entropys sample_log_prob = log_probs nn.set_auto_forward(False) val_acc = CNN_run(args, sample_arch, data_dict) # Execute Evaluation Only nn.set_auto_forward(True) print("Accuracy on Validation: {:.2f} %\n".format(100 * val_acc)) reward = val_acc # use validation accuracy as reward if entropy_weight is not None: reward = F.add_scalar(F.mul_scalar(sample_entropy, entropy_weight), reward).d sample_log_prob = F.mul_scalar(sample_log_prob, (1 / args.num_candidate)) if args.use_variance_reduction: baseline = 0.0 # variance reduction baseline = baseline - ((1 - bl_dec) * (baseline - reward)) reward = reward - baseline loss = F.mul_scalar(sample_log_prob, (-1) * reward) if skip_weight is not None: adding_penalty = F.mul_scalar(skip_penaltys, skip_weight) loss = F.add2(loss, adding_penalty) return loss, val_acc, sample_arch
def get_ppl(self): nn.set_auto_forward(True) distances = [] for bs in tqdm(self.batch_sizes): if bs == 0: continue w = self.get_w(bs) # generate output from generator constant_bc = nn.parameter.get_parameter_or_create( name="G_synthesis/4x4/Const/const", shape=(1, 512, 4, 4)) constant_bc = F.broadcast(constant_bc, (2 * bs, ) + constant_bc.shape[1:]) rgb_output = self.generator.synthesis(w, constant_bc, seed=100) # Crop using face prior c = rgb_output.shape[2] // 8 rgb_output = rgb_output[:, :, c * 3:c * 7, c * 2:c * 6] factor = rgb_output.shape[2] // 256 if factor > 1: rgb_output = F.reshape( rgb_output, (-1, rgb_output.shape[1], rgb_output.shape[2] // factor, factor, rgb_output.shape[3] // factor, factor)) rgb_output = F.mean(rgb_output, (3, 5)) rgb_output_1, rgb_output_2 = rgb_output[::2], rgb_output[1::2] dist = self.lpips_distance( nn.Variable.from_numpy_array(rgb_output_1.data), nn.Variable.from_numpy_array(rgb_output_2.data)) / (self.eps** 2) distances.append(dist.d.squeeze()) distances = np.concatenate(distances, 0) lo = np.percentile(distances, 1, interpolation="lower") hi = np.percentile(distances, 99, interpolation="higher") filtered_dist = np.extract( np.logical_and(lo <= distances, distances <= hi), distances) print("PPL:", filtered_dist.mean())
def demo(opt): ''' NNabla configuration ''' os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str if opt.checkpoint == '': print("Please provide trained model") return # opt.extension_module = 'cpu' if opt.extension_module != 'cpu': if opt.mixed_precision: ctx = get_extension_context(opt.extension_module, device_id="0", type_config="half") else: ctx = get_extension_context(opt.extension_module, device_id="0") nn.set_default_context(ctx) _, ext = os.path.splitext(opt.checkpoint) nn.set_auto_forward(True) Detector = detector_factory[opt.task] detector = Detector(opt) if opt.demo is None: print("Please provide input image/folder.") return if os.path.isdir(opt.demo): image_names = [] ls = os.listdir(opt.demo) for file_name in sorted(ls): ext = file_name[file_name.rfind('.') + 1:].lower() if ext in image_ext: image_names.append(os.path.join(opt.demo, file_name)) else: image_names = [opt.demo] for (image_name) in image_names: assert (os.path.exists(image_name)), "{} not found.".format(image_name) ret = detector.run(image_name) time_str = '' for stat in time_stats: time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) print(time_str)
def get_estimates(self, input_path: str): ctx = get_extension_context(self.context) nn.set_default_context(ctx) nn.set_auto_forward(True) audio, _ = self.audio_adapter.load(input_path, sample_rate=self.sample_rate) if audio.shape[1] > 2: warnings.warn('Channel count > 2! ' 'Only the first two channels will be processed!') audio = audio[:, :2] if audio.shape[1] == 1: print('received mono file, so duplicate channels') audio = np.repeat(audio, 2, axis=1) # Split and separate sources using moving window protocol for each chunk of audio # chunk duration must be lower for machines with low memory chunk_size = self.sample_rate * self.chunk_duration if (audio.shape[0] % chunk_size) == 0: nchunks = (audio.shape[0] // chunk_size) else: nchunks = (audio.shape[0] // chunk_size) + 1 print('Separating...') estimates = {} for chunk_idx in trange(nchunks): cur_chunk = audio[chunk_idx * chunk_size:min((chunk_idx + 1) * chunk_size, audio.shape[0]), :] cur_estimates = separate(cur_chunk, model_path=str(self.model_file_path), niter=self.iterations, alpha=self.alpha, softmask=self.softmask, residual_model=self.residual_model) if any(estimates) is False: estimates = cur_estimates else: for key in cur_estimates: estimates[key] = np.concatenate( (estimates[key], cur_estimates[key]), axis=0) return estimates
def separate_into_parts(self, input_path: str, output_path: Path): self.download_and_verify() ctx = get_extension_context(self.context) nn.set_default_context(ctx) nn.set_auto_forward(True) audio, _ = self.audio_adapter.load(input_path, sample_rate=self.sample_rate) if audio.shape[1] > 2: warnings.warn('Channel count > 2! ' 'Only the first two channels will be processed!') audio = audio[:, :2] if audio.shape[1] == 1: print('received mono file, so duplicate channels') audio = np.repeat(audio, 2, axis=1) print('Separating...') estimates = separate(audio, model_path=str(self.model_file_path), niter=self.iterations, alpha=self.alpha, softmask=self.softmask, residual_model=self.residual_model) output_path = Path(output_path) # Export all source MP3s in parallel pool = Pool() tasks = [] for name, estimate in estimates.items(): filename = f'{name}.mp3' print(f'Exporting {name} MP3...') task = pool.apply_async(self.audio_adapter.save, (os.path.join( output_path, filename), estimate, self.sample_rate, 'mp3', self.bitrate)) tasks.append(task) pool.close() pool.join()
def test_grad_outputs(seed, ctx, auto_forward, type_grad_outputs): from nbla_test_utils import ArrayDiffStats # Settings nn.set_default_context(ctx) nn.set_auto_forward(auto_forward) b, c, h, w = 4, 3, 32, 32 n_cls = 10 rng = np.random.RandomState(seed) x = nn.Variable.from_numpy_array(rng.randn(b, c, h, w)).apply(need_grad=True) y = F.sigmoid(x) # Grad outputs if type_grad_outputs == int: g = rng.randint(-10, 10) elif type_grad_outputs == float: g = rng.randn() elif type_grad_outputs == np.ndarray: g = rng.randn(*y.shape) elif type_grad_outputs == nn.NdArray: g = nn.NdArray.from_numpy_array(rng.randn(*y.shape)) # Zerograd, Forward, Backward on the forward graph inputs = [x] [inp.grad.fill(0) for inp in inputs] if not auto_forward: y.forward() y.backward(g) # Grad inputs = [x] outputs = [y] grad_outputs = [g] grads = nn.grad(outputs, inputs, grad_outputs) if not auto_forward: F.sink(*grads, one_input_grad=1).forward() # Check between results of var.bacwkard and nn.grad for inp, grad in zip(inputs, grads): assert np.allclose(inp.g, grad.d, atol=1e-6), str(ArrayDiffStats(inp.g, grad.d))
def test_multiple_objectives(seed, ctx, auto_forward): from nbla_test_utils import ArrayDiffStats # Settings nn.set_default_context(ctx) nn.set_auto_forward(auto_forward) b, c, h, w = 4, 3, 32, 32 n_cls = 10 rng = np.random.RandomState(seed) # Objecive0 x0 = nn.Variable.from_numpy_array(rng.randn(b, c, h, w)).apply(need_grad=True) y0 = F.sigmoid(x0) # Objecive1 x1 = nn.Variable.from_numpy_array(rng.randn(b, c, h, w)).apply(need_grad=True) y1 = F.tanh(x1) # Zerograd, Forward, Backward on the forward graph g0 = nn.NdArray.from_numpy_array(rng.randn(*x0.shape)) g1 = nn.NdArray.from_numpy_array(rng.randn(*x1.shape)) z = y0 * nn.Variable(g0.shape).apply(data=g0) + y1 * \ nn.Variable(g1.shape).apply(data=g1) inputs = [x0, x1] [inp.grad.fill(0) for inp in inputs] if not auto_forward: z.forward() z.backward() # Grad inputs = [x0, x1] outputs = [y0, y1] grad_outputs = [g0, g1] grads = nn.grad(outputs, inputs, grad_outputs) if not auto_forward: F.sink(*grads, one_input_grad=1).forward() # Check between results of var.bacwkard and nn.grad for inp, grad in zip(inputs, grads): assert np.allclose(inp.g, grad.d, atol=1e-6), str(ArrayDiffStats(inp.g, grad.d))
def scope_function(): # turn off auto forward mode nn.set_auto_forward(False) # clear all parameters nn.clear_parameters() # keep context ctx = nn.get_current_context() # use cached array nn.prefer_cached_array(True) # turn off re-computation nn.set_global_recompute(False) yield # restore context nn.set_default_context(ctx)
def demo(): writer = SummaryWriter() nn.set_auto_forward(True) for n_iter in range(100): demo_scalar(writer, n_iter) x = nn.Variable.from_numpy_array(np.random.random( [32, 3, 64, 64])) # output from network (dummy image) if n_iter % 10 == 0: demo_histogram(writer, n_iter) demo_image(writer, x, n_iter) demo_text(writer, n_iter) demo_pr_curve(writer, n_iter) # export scalar data to JSON for external processing writer.export_scalars_to_json("./all_scalars.json") writer.close()
def test_resnet_expansion(seed, ctx, auto_forward, flag_grad_outputs): from nbla_test_utils import ArrayDiffStats nn.clear_parameters() # Settings nn.set_default_context(ctx) nn.set_auto_forward(auto_forward) b, c, h, w = 4, 3, 32, 32 n_cls = 10 rng = np.random.RandomState(seed) # Network x = nn.Variable.from_numpy_array(rng.randn(b, c, h, w)) y = nn.Variable.from_numpy_array(rng.randint(0, n_cls, b).reshape(b, 1)) p = SmallResNet(x) loss = F.mean(F.softmax_cross_entropy(p, y)) # Zerograd, Forward, Backward on the forward graph inputs = nn.get_parameters().values() [inp.grad.fill(0) for inp in inputs] grad = nn.NdArray.from_numpy_array(np.asarray( rng.randn())) if flag_grad_outputs else 1 if not auto_forward: loss.forward() loss.backward(grad) # Grad grad_outputs = grad if flag_grad_outputs else None grads = nn.grad([loss], inputs, [grad_outputs]) if not auto_forward: F.sink(*grads, one_input_grad=1).forward() # Check between results of var.bacwkard and nn.grad backend = ctx.backend[0].split(":")[0] if backend == 'cuda': pytest.skip( 'CUDA Convolution N-D is only supported in CUDNN extension') for inp, grad in zip(inputs, grads): assert np.allclose(inp.g, grad.d, atol=1e-6), str(ArrayDiffStats(inp.g, grad.d))
def get_sample_and_feedback(args, data_dict): """ Let the controller predict one architecture and test its performance to get feedback. Here the feedback is validation accuracy and will be reused to train the controller. """ entropy_weight = args.entropy_weight bl_dec = args.baseline_decay both_archs, log_probs, entropys = sample_from_controller(args) sample_entropy = entropys sample_log_prob = log_probs show_arch(both_archs) nn.set_auto_forward(False) val_acc = CNN_run(args, both_archs, data_dict) nn.set_auto_forward(True) print("Accuracy on Validation: {:.2f} %\n".format(100 * val_acc)) reward = val_acc if entropy_weight is not None: reward = F.add_scalar(F.mul_scalar(sample_entropy, entropy_weight), reward).d sample_log_prob = F.mul_scalar(sample_log_prob, (1 / args.num_candidate)) if args.use_variance_reduction: baseline = 0.0 # variance reduction baseline = baseline - ((1 - bl_dec) * (baseline - reward)) reward = reward - baseline loss = F.mul_scalar(sample_log_prob, (-1) * reward) return loss, val_acc, both_archs
def main(): parser = argparse.ArgumentParser() parser.add_argument('--classifier-weight-path', type=str, default='bangs/params_001300.h5', help="Path to pretrained classifier parameters") parser.add_argument('--weights-path', type=str, default='./', help="Path to store pretrained stylegan2 parameters") parser.add_argument('--batch-size', type=int, default=4, help="Batch-size of 1 forward pass of the generator") parser.add_argument( '--num-images', type=int, default=50000, help="Number of images to use to generate the direcion.") parser.add_argument('--context', type=str, default="cudnn", help="context. cudnn is recommended.") args = parser.parse_args() assert args.num_images > args.batch_size, 'Number of images must be more than the batch-size' ctx = get_extension_context(args.context) nn.set_default_context(ctx) nn.set_auto_forward(True) attribute_prediction_model = functools.partial(resnet_prediction, nmaps=64, act=F.relu) generate_attribute_direction(args, attribute_prediction_model)
def create_static_mix(self, parts, input_path: str, output_path: Path): self.download_and_verify() ctx = get_extension_context(self.context) nn.set_default_context(ctx) nn.set_auto_forward(True) audio, _ = self.audio_adapter.load(input_path, sample_rate=self.sample_rate) if audio.shape[1] > 2: warnings.warn('Channel count > 2! ' 'Only the first two channels will be processed!') audio = audio[:, :2] if audio.shape[1] == 1: # if we have mono, let's duplicate it # as the input of OpenUnmix is always stereo print('received mono file, so duplicate channels') audio = np.repeat(audio, 2, axis=1) print('Separating...') estimates = separate(audio, model_path=str(self.model_file_path), niter=self.iterations, alpha=self.alpha, softmask=self.softmask, residual_model=self.residual_model) final_source = None for name, source in estimates.items(): if not parts[name]: continue final_source = source if final_source is None else final_source + source print('Writing to MP3...') self.audio_adapter.save(output_path, final_source, self.sample_rate, 'mp3', self.bitrate)
def test_leaf_indexing_access(): import nnabla.functions as F nn.set_auto_forward(False) shape_x = (3, 2) dx = np.random.rand(*shape_x) shape_y = (2, 2) dy = np.random.rand(*shape_y) x = nn.Variable.from_numpy_array(dx) y = nn.Variable.from_numpy_array(dy) x[0:2, :] = y z = F.identity(x) z.forward() d1 = x.d.copy() nn.set_auto_forward(True) x = nn.Variable.from_numpy_array(dx) y = nn.Variable.from_numpy_array(dy) x[0:2, :] = y z2 = F.identity(x) d2 = x.d.copy() nn.set_auto_forward(False) x = nn.Variable.from_numpy_array(dx) y = nn.Variable.from_numpy_array(dy) x[0:2, :] = y z3 = F.identity(x) z3.forward() d3 = x.d.copy() d4 = z3.d.copy() assert_allclose(d1, d2) assert_allclose(d2, d3) assert_allclose(d3, d4)
trainer.validate(epoch) if comm.rank == 0: if epoch % config['train']['save_param_step_interval'] == 0 or epoch == config['train']['num_epochs']-1: trainer.save_checkpoint( config['model']['saved_models_dir'], epoch, pixelcnn=args.pixelcnn_prior) if __name__ == '__main__': parser = make_parser() args = parser.parse_args() config = read_yaml(os.path.join('configs', '{}.yaml'.format(args.data))) ctx = get_extension_context( config['extension_module'], device_id=config['device_id']) nn.set_auto_forward(True) if args.data == 'mnist': data_iterator = mnist_iterator elif args.data == 'imagenet': data_iterator = imagenet_iterator elif args.data == 'cifar10': data_iterator = cifar10_iterator else: print('Dataset not recognized') exit(1) comm = CommunicatorWrapper(ctx) nn.set_default_context(ctx) monitor = None
def main(): # Args args = get_args() save_args(args) # Context ctx = extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) nn.set_auto_forward(True) # Data Itrator di = data_iterator(args.img_path, args.batch_size, imsize=(args.imsize, args.imsize), num_samples=args.train_samples, dataset_name=args.dataset_name) # Model generator = Generator(use_bn=args.use_bn, last_act=args.last_act, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward) discriminator = Discriminator(use_ln=args.use_ln, alpha=args.leaky_alpha, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward) # Solver solver_gen = S.Adam(alpha=args.learning_rate, beta1=args.beta1, beta2=args.beta2) solver_dis = S.Adam(alpha=args.learning_rate, beta1=args.beta1, beta2=args.beta2) # Monitor monitor = Monitor(args.monitor_path) monitor_loss_gen = MonitorSeries("Generator Loss", monitor, interval=10) monitor_loss_dis = MonitorSeries("Discriminator Loss", monitor, interval=10) monitor_p_fake = MonitorSeries("Fake Probability", monitor, interval=10) monitor_p_real = MonitorSeries("Real Probability", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training Time per Resolution", monitor, interval=1) monitor_image_tile = MonitorImageTileWithName("Image Tile", monitor, num_images=4, normalize_method=lambda x: (x + 1.) / 2.) # TODO: use argment resolution_list = [4, 8, 16, 32, 64, 128] channel_list = [512, 512, 256, 128, 64, 32] trainer = Trainer(di, generator, discriminator, solver_gen, solver_dis, args.monitor_path, monitor_loss_gen, monitor_loss_dis, monitor_p_fake, monitor_p_real, monitor_time, monitor_image_tile, resolution_list, channel_list, n_latent=args.latent, n_critic=args.critic, save_image_interval=args.save_image_interval, hyper_sphere=args.hyper_sphere, l2_fake_weight=args.l2_fake_weight) # TODO: use images per resolution? trainer.train(args.epoch_per_resolution)
def main(): args = get_args() # Get context from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) nn.set_auto_forward(True) image = io.imread(args.test_image) if image.ndim == 2: image = color.gray2rgb(image) elif image.shape[-1] == 4: image = image[..., :3] if args.context == 'cudnn': if not os.path.isfile(args.cnn_face_detction_model): # Block of bellow code will download the cnn based face-detection model file provided by dlib for face detection # and will save it in the directory where this script is executed. print("Downloading the face detection CNN. Please wait...") url = "http://dlib.net/files/mmod_human_face_detector.dat.bz2" from nnabla.utils.data_source_loader import download download(url, url.split('/')[-1], False) # get the decompressed data. data = bz2.BZ2File(url.split('/')[-1]).read() # write to dat file. open(url.split('/')[-1][:-4], 'wb').write(data) face_detector = dlib.cnn_face_detection_model_v1( args.cnn_face_detction_model) detected_faces = face_detector( cv2.cvtColor(image[..., ::-1].copy(), cv2.COLOR_BGR2GRAY)) detected_faces = [[ d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom() ] for d in detected_faces] else: face_detector = dlib.get_frontal_face_detector() detected_faces = face_detector( cv2.cvtColor(image[..., ::-1].copy(), cv2.COLOR_BGR2GRAY)) detected_faces = [[d.left(), d.top(), d.right(), d.bottom()] for d in detected_faces] if len(detected_faces) == 0: print("Warning: No faces were detected.") return None # Load FAN weights with nn.parameter_scope("FAN"): print("Loading FAN weights...") nn.load_parameters(args.model) # Load ResNetDepth weights if args.landmarks_type_3D: with nn.parameter_scope("ResNetDepth"): print("Loading ResNetDepth weights...") nn.load_parameters(args.resnet_depth_model) landmarks = [] for i, d in enumerate(detected_faces): center = [d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0] center[1] = center[1] - (d[3] - d[1]) * 0.12 scale = (d[2] - d[0] + d[3] - d[1]) / args.reference_scale inp = crop(image, center, scale) inp = nn.Variable.from_numpy_array(inp.transpose((2, 0, 1))) inp = F.reshape(F.mul_scalar(inp, 1 / 255.0), (1, ) + inp.shape) with nn.parameter_scope("FAN"): out = fan(inp, args.network_size)[-1] pts, pts_img = get_preds_fromhm(out, center, scale) pts, pts_img = F.reshape(pts, (68, 2)) * \ 4, F.reshape(pts_img, (68, 2)) if args.landmarks_type_3D: heatmaps = np.zeros((68, 256, 256), dtype=np.float32) for i in range(68): if pts.d[i, 0] > 0: heatmaps[i] = draw_gaussian(heatmaps[i], pts.d[i], 2) heatmaps = nn.Variable.from_numpy_array(heatmaps) heatmaps = F.reshape(heatmaps, (1, ) + heatmaps.shape) with nn.parameter_scope("ResNetDepth"): depth_pred = F.reshape( resnet_depth(F.concatenate(inp, heatmaps, axis=1)), (68, 1)) pts_img = F.concatenate(pts_img, depth_pred * (1.0 / (256.0 / (200.0 * scale))), axis=1) landmarks.append(pts_img.d) visualize(landmarks, image, args.output)
help='Use TF trained weights converted to NNabla') parser.add_argument('--img_path', type=str, default='', help='Image path for latent space projection') return parser if __name__ == '__main__': parser = make_parser() args = parser.parse_args() config = read_yaml(os.path.join('configs', f'{args.data}.yaml')) ctx = get_extension_context(args.extension_module) nn.set_auto_forward(args.auto_forward or args.test) comm = CommunicatorWrapper(ctx) nn.set_default_context(ctx) monitor = None if comm is not None: if comm.rank == 0: monitor = Monitor(args.monitor_path) start_time = time.time() few_shot_config = None if args.few_shot is not None: few_shot_config = read_yaml(os.path.join( 'configs', args.few_shot + '.yaml'))
def test(): args = get_inference_args() # Set NNabla context and Dynamic graph execution ctx = get_extension_context(args.context) nn.set_default_context(ctx) # Enable the NNabla Dynamic excecution nn.set_auto_forward(True) for input_file in args.inputs: # get audio data from the path - all formats recognized by FFMPEG are recognized audio_with_meta = AudioSegment.from_file(input_file) sample_rate = int(mediainfo(input_file)['sample_rate']) channel_sounds = audio_with_meta.split_to_mono() samples = [ s.get_array_of_samples() for idx, s in enumerate(channel_sounds) if idx < 2 ] fp_arr = np.array(samples).T.astype(np.float32) fp_arr /= np.iinfo(samples[0].typecode).max audio = fp_arr if audio.shape[1] > 2: # if it is multi-channel audio, consider only first two channels warnings.warn('Channel count > 2! ' 'Only the first two channels will be processed!') audio = audio[:, :2] if sample_rate != args.sample_rate: # resample to model samplerate if needed audio = resampy.resample(audio, sample_rate, args.sample_rate, axis=0) if audio.shape[1] == 1: # if we have mono, let's duplicate it # as the input of OpenUnmix is always stereo audio = np.repeat(audio, 2, axis=1) # split and separate sources using moving window protocol for each chunk of audio # chunk duration must be lower for machines with low memory chunk_size = sample_rate * args.chunk_dur if (audio.shape[0] % chunk_size) == 0: nchunks = (audio.shape[0] // chunk_size) else: nchunks = (audio.shape[0] // chunk_size) + 1 estimates = {} for chunk_idx in trange(nchunks): cur_chunk = audio[chunk_idx * chunk_size:min((chunk_idx + 1) * chunk_size, audio.shape[0]), :] cur_estimates = separate(cur_chunk, model_path=args.model, niter=args.niter, alpha=args.alpha, softmask=args.softmask, residual_model=args.residual_model) if any(estimates) is False: estimates = cur_estimates else: for key in cur_estimates: estimates[key] = np.concatenate( (estimates[key], cur_estimates[key]), axis=0) if not args.outdir: model_path = Path(args.model) if not model_path.exists(): output_path = Path(Path(input_file).stem + '_' + model) else: output_path = Path( Path(input_file).stem + '_' + model_path.stem) else: if len(args.inputs) > 1: output_path = Path(args.outdir) / Path(input_file).stem else: output_path = Path(args.outdir) output_path.mkdir(exist_ok=True, parents=True) for target, estimate in estimates.items(): sf.write(str(output_path / Path(target).with_suffix('.wav')), estimate, args.sample_rate)
def test(): args = get_inference_args() # Set NNabla context and Dynamic graph execution ctx = get_extension_context(args.context) nn.set_default_context(ctx) # Enable the NNabla Dynamic excecution nn.set_auto_forward(True) for input_file in args.inputs: # handling an input audio path info = sf.info(input_file) start = int(args.start * info.samplerate) # check if dur is none if args.duration > 0: # stop in soundfile is calc in samples, not seconds stop = start + int(args.duration * info.samplerate) else: # set to None for reading complete file stop = None audio, rate = sf.read( input_file, always_2d=True, start=start, stop=stop ) if audio.shape[1] > 2: warnings.warn( 'Channel count > 2! ' 'Only the first two channels will be processed!') audio = audio[:, :2] if rate != args.sample_rate: # resample to model samplerate if needed audio = resampy.resample(audio, rate, args.sample_rate, axis=0) if audio.shape[1] == 1: # if we have mono, let's duplicate it # as the input of OpenUnmix is always stereo audio = np.repeat(audio, 2, axis=1) estimates = separate( audio, model_path=args.model, niter=args.niter, alpha=args.alpha, softmask=args.softmask, residual_model=args.residual_model ) if not args.outdir: model_path = Path(args.model) if not model_path.exists(): output_path = Path(Path(input_file).stem + '_' + model) else: output_path = Path( Path(input_file).stem + '_' + model_path.stem ) else: if len(args.inputs) > 1: output_path = Path(args.outdir) / Path(input_file).stem else: output_path = Path(args.outdir) output_path.mkdir(exist_ok=True, parents=True) for target, estimate in estimates.items(): sf.write( str(output_path / Path(target).with_suffix('.wav')), estimate, args.sample_rate )
def main(): # Args args = get_args() # Context ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) logger.info(ctx) nn.set_default_context(ctx) nn.set_auto_forward(True) # Monitor monitor = Monitor(args.monitor_path) # Validation logger.info("Start validation") num_images = args.valid_samples num_batches = num_images // args.batch_size # DataIterator di = data_iterator(args.img_path, args.batch_size, imsize=(args.imsize, args.imsize), num_samples=args.valid_samples, dataset_name=args.dataset_name) # generator gen = load_gen(args.model_load_path, use_bn=args.use_bn, last_act=args.last_act, use_wscale=args.not_use_wscale, use_he_backward=args.use_he_backward) # compute metric if args.validation_metric == "ms-ssim": logger.info("Multi Scale SSIM") monitor_time = MonitorTimeElapsed("MS-SSIM-ValidationTime", monitor, interval=1) monitor_metric = MonitorSeries("MS-SSIM", monitor, interval=1) from ms_ssim import compute_metric score = compute_metric(gen, args.batch_size, num_images, args.latent, args.hyper_sphere) monitor_time.add(0) monitor_metric.add(0, score) elif args.validation_metric == "swd": logger.info("Sliced Wasserstein Distance") monitor_time = MonitorTimeElapsed("SWD-ValidationTime", monitor, interval=1) monitor_metric = MonitorSeries("SWD", monitor, interval=1) nhoods_per_image = 128 nhood_size = 7 level_list = [128, 64, 32, 16] # TODO: use argument dir_repeats = 4 dirs_per_repeat = 128 from sliced_wasserstein import compute_metric score = compute_metric(di, gen, args.latent, num_batches, nhoods_per_image, nhood_size, level_list, dir_repeats, dirs_per_repeat, args.hyper_sphere) monitor_time.add(0) monitor_metric.add(0, score) # averaged in the log else: logger.info("Set `validation-metric` as either `ms-ssim` or `swd`.") logger.info(score) logger.info("End validation")
def main(opt): ''' NNabla configuration ''' # os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str type_config = 'half' if opt.mixed_precision else 'float' comm = init_nnabla(ext_name=opt.extension_module, device_id='0', type_config=type_config) nn.set_auto_forward(True) output_folder = os.path.join( opt.save_dir, "tmp.monitor.{}_{}".format(opt.arch, opt.num_layers)) monitor = Monitor(output_folder) monitor_loss = None monitor_hm_loss = None monitor_wh_loss = None monitor_off_loss = None monitor_acc = None monitor_val_loss = None monitor_val_hm_loss = None monitor_val_wh_loss = None monitor_val_off_loss = None monitor_map = None monitor_time = None Detector = detector_factory[opt.task] detector = Detector(opt) interval = 1 if comm.rank == 0: monitor_loss = MonitorSeries( "Training Loss", monitor, interval=interval, verbose=False) monitor_hm_loss = MonitorSeries( "hm_loss", monitor, interval=interval, verbose=False) monitor_wh_loss = MonitorSeries( "wh_loss", monitor, interval=interval, verbose=False) monitor_off_loss = MonitorSeries( "off_loss", monitor, interval=interval, verbose=False) monitor_val_loss = MonitorSeries( "Validation Loss", monitor, interval=interval, verbose=False) monitor_val_hm_loss = MonitorSeries( "val_hm_loss", monitor, interval=interval, verbose=False) monitor_val_wh_loss = MonitorSeries( "val_wh_loss", monitor, interval=interval, verbose=False) monitor_val_off_loss = MonitorSeries( "val_off_loss", monitor, interval=interval, verbose=False) monitor_map = MonitorSeries( "Val mAP", monitor, interval=interval, verbose=False) monitor_time = MonitorTimeElapsed( "time", monitor, interval=1, verbose=False) ''' Data Iterators ''' seed = opt.seed rng = np.random.RandomState(seed) source_factory = get_data_source(opt.dataset) train_source = source_factory(opt, 'train', shuffle=True, rng=rng, mixed_precision=opt.mixed_precision, channel_last=opt.channel_last) train_loader = data_iterator(train_source, opt.batch_size, with_memory_cache=False, with_file_cache=False ) train_loader = train_loader.slice(rng, comm.n_procs, slice_pos=comm.rank) val_source = source_factory(opt, 'val', shuffle=False, rng=rng, mixed_precision=opt.mixed_precision, channel_last=opt.channel_last) val_loader = data_iterator(val_source, opt.batch_size, with_memory_cache=False, with_file_cache=False ) logger.info('Creating model...') logger.info(opt.heads) logger.info(f"batch size per gpu: {opt.batch_size}") model = create_model(opt.arch, opt.heads, opt.head_conv, opt.num_layers, training=True, channel_last=opt.channel_last, pretrained_model_dir=opt.pretrained_model_dir) if opt.checkpoint != '': load_model(model, opt.checkpoint, clear=True) start_epoch = 0 loss_func = CtdetLoss(opt) lr_sched = create_learning_rate_scheduler( opt.train_config.learning_rate_config) solver = S.Adam(alpha=lr_sched.get_lr()) trainer = Trainer( model, loss_func, solver, train_loader, train_source, [ monitor_loss, monitor_hm_loss, monitor_wh_loss, monitor_off_loss, monitor_val_loss, monitor_val_hm_loss, monitor_val_wh_loss, monitor_val_off_loss], opt, comm) root_dir = opt.save_dir checkpoint_dir = os.path.join(root_dir, output_folder, 'checkpoints') start_epoch = 0 if opt.resume_from is not None: start_epoch = trainer.load_checkpoint(checkpoint_dir, opt.resume_from) logger.info('resuming from the epoch {}'.format(start_epoch)) for epoch in range(start_epoch, opt.num_epochs): lr_sched.set_epoch(epoch) trainer.solver.set_learning_rate(lr_sched.get_lr()) iteration = trainer.update(epoch) if comm.rank == 0: if epoch % opt.save_intervals == 0 or epoch == (opt.num_epochs-1): monitor_time.add(epoch) trainer.save_checkpoint(checkpoint_dir, epoch) if epoch % opt.val_intervals == 0 or epoch == (opt.num_epochs-1): model.training = False trainer.evaluate(val_loader, epoch) if not opt.val_calc_map: num_iters = val_loader.size pbar = trange(num_iters, desc="[Test][exp_id:{} epoch:{}/{}]".format( opt.exp_id, epoch, opt.num_epochs), disable=comm.rank > 0) if comm.rank == 0: results = {} for ind in pbar: img_id = val_source.images[ind] img_info = val_source.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join( val_source.img_dir, img_info['file_name']) with nn.context_scope(comm.ctx_float): ret = detector.run(img_path) results[img_id] = ret['results'] val_map = val_source.run_eval( results, opt.save_dir, opt.data_dir) monitor_map.add(epoch, val_map) model.training = True if comm.n_procs > 1: # Required to prevent timeout error of allreduce # at the first iteration of the next epoch. comm.comm.barrier()