def train_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) class TrainConfig: pass config = TrainConfig() info = load.load(files) logger.log(99, 'Train with contexts {}'.format(available_contexts)) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Training max_iter = config.training_config.max_epoch * \ config.training_config.iter_per_epoch if max_iter > 0: data_iterators = {'optimizer': {}, 'monitor': {}} with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) for name, m in config.monitors.items(): m.data_iterator = stack.enter_context( m.monitor.data_iterator()) train(args, config) else: # save parameters without training (0 epoch learning) save_parameters(os.path.join(args.outdir, 'parameters.h5')) logger.log(99, 'Training Completed.') progress(None)
def profile_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) class TrainConfig: pass config = TrainConfig() info = load.load(files) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m ext_module = import_extension_module( config.global_config.default_context.backend[0].split(':')[0]) def synchronize(): return ext_module.synchronize( device_id=config.global_config.default_context.device_id) result_array = [['time in ms']] # Profile Optimizer with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context(o.optimizer.data_iterator()) result_array = profile_optimizer(config, result_array, synchronize) # Write profiling result import csv with open(args.outdir + os.sep + 'profile.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(result_array) logger.log(99, 'Profile Completed.') progress(None) return True
def train_command(args): logger.log(99, 'Train with contexts {}'.format(available_contexts)) configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) class TrainConfig: pass config = TrainConfig() info = load.load(files) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Training max_iter = config.training_config.max_epoch * \ config.training_config.iter_per_epoch if max_iter > 0: data_iterators = {'optimizer': {}, 'monitor': {}} with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) for name, m in config.monitors.items(): m.data_iterator = stack.enter_context( m.monitor.data_iterator()) train(args, config) else: # save parameters without training (0 epoch learning) save_parameters(os.path.join( args.outdir, 'parameters.h5')) logger.log(99, 'Training Completed.') progress(None)
def infer_command(args): files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig # To improve load performance os.environ['NNABLA_CUDNN_ALGORITHM_BY_HEURISTIC'] = '1' info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} is not found.'.format( config.executor.network.name)) return False normalize = True for d in info.datasets.values(): normalize = d.normalize input_file_index = 0 inputs = [] for e in config.executors: for v, d in e.dataset_assign.items(): input_filename = args.inputs[input_file_index] if "int32" in input_filename: data = np.fromfile(input_filename, np.int32).reshape( v.variable_instance.d.shape) else: data = np.fromfile(input_filename, np.float32).reshape( v.variable_instance.d.shape) inputs.append((d, data)) input_file_index += 1 data = [] variables = [] for v, d in inputs: variables.append(v) data.append(d) result, outputs = _forward(args, 0, config, data, variables, False) for i, o in enumerate(outputs): if args.output is not None: (np.array(o).astype(np.float32)).tofile("{}_{}.bin".format( args.output, i)) return True
def _get_current_parameter(args): globname = os.path.join(args.outdir, 'results_current_*.nnp') exists = glob.glob(globname) if len(exists) > 0: ex_list = {} for ex in exists: n = int(ex.rsplit('_', 1)[1].rsplit('.', 1)[0]) ex_list[n] = ex last_epoch = sorted(ex_list.keys())[0] last_parameter = ex_list[last_epoch] logger.log(99, "Load parameter from [{}]".format( os.path.basename(last_parameter))) load.load([last_parameter], parameter_only=True) return last_epoch return 0
def infer_command(args): files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} does not found.'.format( config.executor.network.name)) return normalize = True for d in info.datasets.values(): normalize = d.normalize input_file_index = 0 inputs = [] for e in config.executors: for v, d in e.dataset_assign.items(): data = np.fromfile(args.inputs[input_file_index], np.float32).reshape(v.variable_instance.d.shape) inputs.append((d, data)) input_file_index += 1 data = [] variables = [] for v, d in inputs: variables.append(v) data.append(d) result, outputs = forward(args, 0, config, data, variables, False) for i, o in enumerate(outputs): if args.output is None: print(o) else: print(o) (np.array(o).astype(np.float32)).tofile("{}_{}.bin".format( args.output, i))
def test_load_and_infer_improvement(nntxt_idx, parameter_format, dataset_sample_num): '''This case tests improvement features, comparing legacy implementation, legacy cannot load or infer successfully, while refactor-ed is OK. ''' with generate_case_from_nntxt_str(NNTXT_IMPROVEMENT_CASES[nntxt_idx], parameter_format, dataset_sample_num) as nnp_file: with pytest.raises(ValueError) as excinfo: ref_info = ref_load(nnp_file) ref_result = partial( common_forward, forward_func=_ref_forward)(ref_info) print(excinfo) info = load.load(nnp_file) result = partial(common_forward, forward_func=_forward)(info)
def get_context(device_id): # for cli app use try: context = 'cudnn' ctx = get_extension_context(context, device_id=device_id) except ModuleNotFoundError: context = 'cpu' ctx = get_extension_context(context, device_id=device_id) # for nnc use config_filename = 'net.nntxt' if os.path.isfile(config_filename): config_info = load([config_filename]) ctx = config_info.global_config.default_context return ctx
def profile_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) class TrainConfig: pass config = TrainConfig() info = load.load(files) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m result_array = [['time in ms']] # Profile Optimizer with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) result_array = profile_optimizer(config, result_array) # Write profiling result import csv with open(args.outdir + os.sep + 'profile.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(result_array) logger.log(99, 'Profile Completed.') progress(None)
def test_load_and_infer_equivalence(nntxt_idx, parameter_format, dataset_sample_num): '''These cases tends to test equivalence before and after refactoring NNP load functions. The scope of refactor includes network part and load function. This test firstly generated .nnp from nntxt_str, according to specified parameter_format and replace dataset's uri with a temporarily generated random dataset, then performs inferring operation similar to what is done in cli/forward.py. ''' with generate_case_from_nntxt_str(NNTXT_EQUIVALENCE_CASES[nntxt_idx], parameter_format, dataset_sample_num) as nnp_file: ref_info = ref_load(nnp_file) ref_result = partial( common_forward, forward_func=_ref_forward)(ref_info) info = load.load(nnp_file) result = partial(common_forward, forward_func=_forward)(info) assert_tensor_equal(result, ref_result)
def load_model_from_utils_load_and_forward(nnp_file, batch_size): g = load.load(nnp_file, batch_size=batch_size).proto_graph inputs = [ g.default_graph().variables[i].variable_instance for i in g.default_graph().inputs ] for i in inputs: i.d = np.random.random(i.d.shape) outputs = [ g.default_graph().variables[i].variable_instance for i in g.default_graph().outputs ] y = F.sink(*outputs) y.forward() out = outputs[0].d return out
def infer_command(args): files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig # To improve load performance os.environ['NNABLA_CUDNN_ALGORITHM_BY_HEURISTIC'] = '1' info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) inputs = [] for input_filename in args.inputs: if args.data_type == 'uint8': inputs.append(np.fromfile(input_filename, np.uint8)) elif args.data_type == 'int32': inputs.append(np.fromfile(input_filename, np.int32)) elif args.data_type == 'float32': if 'int32' in input_filename: inputs.append(np.fromfile(input_filename, np.int32)) elif 'uint8' in input_filename: inputs.append(np.fromfile(input_filename, np.uint8)) else: inputs.append(np.fromfile(input_filename, np.float32)) else: logger.critical('Type is one of ("uint8", "int32" or "float32").') return False result, outputs = infer(info, inputs) for i, o in enumerate(outputs): if args.output is not None: (np.array(o).astype(np.float32)).tofile("{}_{}.bin".format( args.output, i)) return True
def test_resume_suspend_equivalence(nntxt_idx, parameter_format, dataset_sample_num, batch_size): '''These cases tends to test equivalence before and after refactoring. ''' verbose = True a_few_iter = 10 half_iter = 5 output_network_topology = False with generate_case_from_nntxt_str(NNTXT_EQUIVALENCE_CASES[nntxt_idx], parameter_format, dataset_sample_num, batch_size) as nnp_file: with create_temp_with_dir( "saved_parameter.nnp") as saved_parameter_nnp: class Callback: pass class ModelSaver: def __init__(self, info): self.info = info def __call__(self, config): if config.iter != half_iter: return _save_parameters(saved_parameter_nnp, config, NNTXT_EQUIVALENCE_CASES[nntxt_idx]) new_config = TrainConfig() new_config.start_iteration = 0 new_config.end_iteration = a_few_iter new_config.save_optimizer_variable = False new_config.save_evaluation_variable = False new_cb = Callback() new_cb.forward = lambda x: x.target.forward(clear_no_need_grad=True ) new_cb.backward = lambda x, b: x.target.backward() new_config.cb = new_cb new_config.impl = "ref" ref_result = [] ref_info = load.load(nnp_file, batch_size=batch_size) print("load.load") if output_network_topology: for n, opt in ref_info.optimizers.items(): print(n) opt.network.execute_on_proto(Verifier()) new_config.on_iter = ModelSaver(ref_info) for cost, error in partial(train, config=new_config)(ref_info): ref_result.append((cost, error)) new_config.on_iter = None new_config.start_iteration = half_iter new_config.end_iteration = a_few_iter new_config.impl = "new" result = [] nn.clear_parameters() info = load.load(nnp_file, batch_size=batch_size, exclude_parameter=True) print("load.load") # Here, `info` is different `config`, but optimizer is same. load_train_state(saved_parameter_nnp, info) for cost, error in partial(train, config=new_config)(info): result.append((cost, error)) compare_info(ref_info, info) for i, ((cost_ref, error_ref), (cost, error)) in enumerate(zip(ref_result, result)): if verbose: print("{}: cost: {} <--> {}".format(i, cost_ref, cost)) print("{}: error: {} <--> {}".format(i, error_ref, error)) if i > new_config.start_iteration: assert_allclose(np.array([cost_ref, error_ref]), np.array([cost, error]), rtol=1e-2, atol=1e-5, err_msg="Error: {}".format(nntxt_idx))
def test_workingmemory_layer(): from nnabla.utils.load import load with create_temp_with_dir("network.nntxt") as fn: with open(fn, "w") as f: f.write(nntxt_mixup) load(fn)
def forward_command(args): callback.update_status(args) configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} is not found.'.format( config.executor.network.name)) return False normalize = True for d in info.datasets.values(): if d.uri == args.dataset or d.cache_dir == args.dataset: normalize = d.normalize for e in config.executors: normalize = normalize and not e.no_image_normalization orders = {} # With CSV if os.path.splitext(args.dataset)[1] == '.csv': data_iterator = (lambda: data_iterator_csv_dataset( uri=args.dataset, batch_size=config.networks[0].batch_size, shuffle=False, normalize=normalize, with_memory_cache=False, with_file_cache=False)) # load dataset as csv filereader = FileReader(args.dataset) with filereader.open(textmode=True, encoding='utf-8-sig') as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) if args.replace_path: root_path = os.path.dirname(args.dataset) root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep)) else: root_path = '.' rows = [row for row in rows if len(row)] rows = list( map( lambda row: list( map( lambda i, x: x if row0[i][0] == '#' or is_float( x) else compute_full_path(root_path, x), range(len(row)), row)), rows)) for i in range(len(rows)): orders[i] = i # With Cache elif os.path.splitext(args.dataset)[1] == '.cache': data_iterator = (lambda: data_iterator_cache(uri=args.dataset, batch_size=config. networks[0].batch_size, shuffle=False, normalize=normalize)) # Get original CSV original_csv = os.path.join(args.dataset, 'original.csv') try: # load dataset as csv filereader = FileReader(original_csv) with filereader.open(textmode=True, encoding='utf-8-sig') as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) root_path = '.' rows = list( map( lambda row: list( map( lambda x: x if is_float(x) else compute_full_path( root_path, x), row)), rows)) except: print('Cannot open', original_csv) pass # Get original Data order. order_csv = os.path.join(args.dataset, 'order.csv') try: filereader = FileReader(order_csv) with filereader.open(textmode=True) as f: for original, shuffled in [[int(x) for x in row] for row in csv.reader(f)]: orders[original] = shuffled except: print('Cannot open', order_csv) for i in range(len(rows)): orders[i] = i else: print('Unsupported extension "{}" in "{}".'.format( os.path.splitext(args.dataset)[1], args.dataset)) callback.update_status(('data.max', len(rows))) callback.update_status(('data.current', 0)) callback.update_status('processing', True) result_csv_filename = os.path.join(args.outdir, args.outfile) with open(result_csv_filename, 'w', encoding='utf-8') as f: writer = csv.writer(f, lineterminator='\n') with data_iterator() as di: index = 0 while index < di.size: data = di.next() result, outputs = _forward(args, index, config, data, di.variables) if index == 0: for name, dim in zip(result.names, result.dims): if dim == 1: if e.repeat_evaluation_type == "std": name = "Uncertainty(Std)" row0.append(name) else: for d in range(dim): row0.append(name + '__' + str(d)) writer.writerow(row0) for i, output in enumerate(outputs): if index + i < len(rows): import copy row = copy.deepcopy(rows[orders[index + i]]) row.extend(output) writer.writerow(row) index += len(outputs) callback.update_status(('data.current', min([index, len(rows)]))) callback.update_forward_time() callback.update_status() logger.log( 99, 'data {} / {}'.format(min([index, len(rows)]), len(rows))) callback.process_evaluation_result(args.outdir, result_csv_filename) logger.log(99, 'Forward Completed.') progress(None) callback.update_status(('output_result.csv_header', ','.join(row0))) callback.update_status(('output_result.column_num', len(row0))) callback.update_status(('output_result.data_num', len(rows))) callback.update_status('finished') return True
def train_command(args): if single_or_rankzero(): configure_progress(os.path.join(args.outdir, 'progress.txt')) info = load.load([args.config], exclude_parameter=True) # Check dataset uri is empty. dataset_error = False for dataset in info.datasets.values(): if dataset.uri.strip() == '': dataset_error = True if dataset_error: logger.log(99, 'Fatal error. Dataset URI is empty.') return False class TrainConfig: pass config = TrainConfig() config.timelimit = -1 if args.param: load.load([args.param], parameter_only=True) config.global_config = info.global_config config.training_config = info.training_config if single_or_rankzero(): logger.log(99, 'Train with contexts {}'.format(available_contexts)) class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Training comm = current_communicator() config.training_config.iter_per_epoch //= comm.size if comm else 1 max_iteration = config.training_config.max_epoch * \ config.training_config.iter_per_epoch global _save_parameter_info _save_parameter_info = {} _, config_ext = os.path.splitext(args.config) if config_ext == '.prototxt' or config_ext == '.nntxt': _save_parameter_info['config'] = args.config elif config_ext == '.nnp': with zipfile.ZipFile(args.config, 'r') as nnp: for name in nnp.namelist(): _, ext = os.path.splitext(name) if ext == '.nntxt' or ext == '.prototxt': nnp.extract(name, args.outdir) _save_parameter_info['config'] = os.path.join( args.outdir, name) result = False if max_iteration > 0: data_iterators = {'optimizer': {}, 'monitor': {}} rng = np.random.RandomState(comm.rank if comm else 0) with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) if comm and comm.size > 1: o.data_iterator = o.data_iterator.slice( rng, comm.size, comm.rank) for name, m in config.monitors.items(): m.data_iterator = stack.enter_context( m.monitor.data_iterator()) if comm and comm.size > 1: m.data_iterator = m.data_iterator.slice( rng, comm.size, comm.rank) result = _train(args, config) else: # save parameters without training (0 epoch learning) logger.log(99, '0 epoch learning. (Just save parameter.)') if single_or_rankzero(): _save_parameters(args, 'current', 0, True) result = True if single_or_rankzero(): if result: logger.log(99, 'Training Completed.') else: logger.log(99, 'Training Incompleted.') if single_or_rankzero(): progress(None) return True
def main(): print(sys.argv) print(load([sys.argv[1]]))
def compare_with_cpu_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) class TrainConfig: pass class OptConfig: pass class MonConfig: pass # Load config with current context files = [] files.append(args.config) with nn.parameter_scope('current'): info = load.load(files) parameters = get_parameters(grad_only=False) config = TrainConfig() config.global_config = info.global_config config.training_config = info.training_config config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Load config with cpu context files = [] files.append(args.config2) with nn.parameter_scope('cpu'): info_cpu = load.load(files) cpu_parameters = get_parameters(grad_only=False) config_cpu = TrainConfig() config_cpu.global_config = info_cpu.global_config config_cpu.training_config = info_cpu.training_config config_cpu.optimizers = OrderedDict() for name, opt in info_cpu.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config_cpu.optimizers[name] = o config_cpu.monitors = OrderedDict() for name, mon in info_cpu.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config_cpu.monitors[name] = m result_array = [['1-Correl']] # Profile Optimizer with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) for name, o in config_cpu.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) result_array = compare_optimizer( config, parameters, config_cpu, cpu_parameters, result_array) # Write profiling result import csv with open(args.outdir + os.sep + 'compare_with_cpu.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(result_array) logger.log(99, 'Compare with CPU Completed.') progress(None)
def forward_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) batch_size = args.batch_size if batch_size < 1: batch_size = None class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False, batch_size=batch_size) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} is not found.'.format( config.executor.network.name)) return False normalize = True for d in info.datasets.values(): if d.uri == args.dataset: normalize = d.normalize for e in config.executors: normalize = normalize and not e.no_image_normalization data_iterator = (lambda: data_iterator_csv_dataset(uri=args.dataset, batch_size=config. networks[0].batch_size, shuffle=False, normalize=normalize, with_memory_cache=False, with_file_cache=False)) # load dataset as csv filereader = FileReader(args.dataset) with filereader.open(textmode=True) as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) root_path = os.path.dirname(args.dataset) root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep)) rows = list( map( lambda row: list( map( lambda x: x if is_float(x) else compute_full_path(root_path, x), row)), rows)) with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f: writer = csv.writer(f, lineterminator='\n') with data_iterator() as di: index = 0 while index < di.size: data = di.next() result, outputs = _forward(args, index, config, data, di.variables) if index == 0: for name, dim in zip(result.names, result.dims): if dim == 1: row0.append(name) else: for d in range(dim): row0.append(name + '__' + str(d)) writer.writerow(row0) for i, output in enumerate(outputs): if index + i < len(rows): import copy row = copy.deepcopy(rows[index + i]) row.extend(output) writer.writerow(row) index += len(outputs) logger.log( 99, 'data {} / {}'.format(min([index, len(rows)]), len(rows))) logger.log(99, 'Forward Completed.') progress(None) return True
def profile_command(args): callback.update_status(args) configure_progress(os.path.join(args.outdir, 'progress.txt')) class TrainConfig: pass config = TrainConfig() info = load.load(args.config) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterators = [] config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterators = [] config.monitors[name] = m ext_module = import_extension_module( config.global_config.default_context.backend[0].split(':')[0]) def synchronize(): return ext_module.synchronize( device_id=config.global_config.default_context.device_id) result_array = [['time in ms']] callback.update_status('processing', True) # Profile Optimizer with ExitStack() as stack: # Create data_iterator instance only once for each dataset in optimizers optimizer_data_iterators = {} for name, o in config.optimizers.items(): for di in o.optimizer.data_iterators.values(): if di not in optimizer_data_iterators: di_instance = stack.enter_context(di()) optimizer_data_iterators[di] = di_instance else: di_instance = optimizer_data_iterators[di] o.data_iterators.append(di_instance) result_array = profile_optimizer(config, result_array, synchronize) # Write profiling result import csv with open(args.outdir + os.sep + 'profile.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(result_array) logger.log(99, 'Profile Completed.') progress(None) callback.update_status('finished') return True
def compare_with_cpu_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) class TrainConfig: pass class OptConfig: pass class MonConfig: pass # Load config with current context files = [] files.append(args.config) with nn.parameter_scope('current'): info = load.load(files) parameters = get_parameters(grad_only=False) config = TrainConfig() config.global_config = info.global_config config.training_config = info.training_config config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Load config with cpu context files = [] files.append(args.config2) with nn.parameter_scope('cpu'): info_cpu = load.load(files) cpu_parameters = get_parameters(grad_only=False) config_cpu = TrainConfig() config_cpu.global_config = info_cpu.global_config config_cpu.training_config = info_cpu.training_config config_cpu.optimizers = OrderedDict() for name, opt in info_cpu.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config_cpu.optimizers[name] = o config_cpu.monitors = OrderedDict() for name, mon in info_cpu.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config_cpu.monitors[name] = m result_array = [['1-Correl']] # Profile Optimizer with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context(o.optimizer.data_iterator()) for name, o in config_cpu.optimizers.items(): o.data_iterator = stack.enter_context(o.optimizer.data_iterator()) result_array = compare_optimizer(config, parameters, config_cpu, cpu_parameters, result_array) # Write profiling result import csv with open(args.outdir + os.sep + 'compare_with_cpu.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(result_array) logger.log(99, 'Compare with CPU Completed.') progress(None) return True
def forward_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} does not found.'.format( config.executor.network.name)) return normalize = True for d in info.datasets.values(): if d.uri == args.dataset: normalize = d.normalize data_iterator = (lambda: data_iterator_csv_dataset( args.dataset, config.networks[0].batch_size, False, padding=True, normalize=normalize)) # load dataset as csv with open(args.dataset, 'rt') as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) root_path = os.path.dirname(args.dataset) root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep)) rows = map(lambda row: map(lambda x: x if is_float( x) else compute_full_path(root_path, x), row), rows) with data_iterator() as di: index = 0 while index < di.size: data = di.next() result, outputs = forward(args, index, config, data, di.variables) if index == 0: for name, dim in zip(result.names, result.dims): if dim == 1: row0.append(name) else: for d in range(dim): row0.append(name + '__' + str(d)) for i, output in enumerate(outputs): if index + i < len(rows): rows[index + i].extend(output) index += len(outputs) logger.log( 99, 'data {} / {}'.format(min([index, len(rows)]), len(rows))) with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(row0) writer.writerows(rows) logger.log(99, 'Forward Completed.') progress(None)
def test_load_and_train_equivalence(nntxt_idx, parameter_format, dataset_sample_num, batch_size): '''These cases tends to test equivalence before and after refactoring. The operation is similar to what is done in cli/train.py. ''' # for debugging save_v = False output_network_topology = False verbose = False m_iter = 10 class Callback: pass legacy_config = TrainConfig() legacy_config.on_iter = None legacy_config.save_optimizer_variable = False legacy_config.save_evaluation_variable = False legacy_config.start_iteration = 0 legacy_config.end_iteration = 10 legacy_config.enable_save_variable = save_v legacy_cb = Callback() legacy_cb.forward = lambda o: o.network.forward(o.forward_sequence) legacy_cb.backward = lambda o, b: o.network.backward( o.backward_sequence, b) legacy_config.cb = legacy_cb legacy_config.impl = "legacy" new_config = TrainConfig() new_config.on_iter = None new_config.save_optimizer_variable = False new_config.save_evaluation_variable = False new_config.start_iteration = 0 new_config.end_iteration = 10 new_config.enable_save_variable = save_v new_cb = Callback() new_cb.forward = lambda x: x.target.forward(clear_no_need_grad=True) new_cb.backward = lambda x, b: x.target.backward(clear_buffer=True) new_config.cb = new_cb new_config.impl = "new" with generate_case_from_nntxt_str(NNTXT_EQUIVALENCE_CASES[nntxt_idx], parameter_format, dataset_sample_num, batch_size) as nnp_file: ref_result = [] result = [] nn.clear_parameters() info = ref_load(nnp_file, batch_size=batch_size) for cost, error in partial(train, config=legacy_config)(info): ref_result.append((cost, error)) nn.clear_parameters() info = load.load(nnp_file, batch_size=batch_size) if output_network_topology: for n, opt in info.optimizers.items(): print(n) opt.network.execute_on_proto(Verifier()) for cost, error in partial(train, config=new_config)(info): result.append((cost, error)) for i, ((cost_ref, error_ref), (cost, error)) in enumerate(zip(ref_result, result)): if verbose: print("{}: cost: {} <--> {}".format(i, cost_ref, cost)) print("{}: error: {} <--> {}".format(i, error_ref, error)) assert_allclose(np.array([cost_ref, error_ref]), np.array([cost, error]), rtol=1e-2, atol=1e-3, err_msg="Error: {}".format(nntxt_idx))
def test_load_and_save_equivalence(nntxt_idx, parameter_format, dataset_sample_num, batch_size, include_params, variable_batch_size): '''These cases tends to test equivalence before and after refactoring. ''' verbose = True a_few_iter = 10 half_iter = 5 output_network_topology = True with generate_case_from_nntxt_str(NNTXT_EQUIVALENCE_CASES[nntxt_idx], parameter_format, dataset_sample_num, batch_size) as nnp_file: with create_temp_with_dir("saved.nnp") as saved_nnp_file: class Callback: pass class ModelSaver: def __init__(self, info): self.info = info def __call__(self, config): if config.iter != half_iter: return info = self.info datasets = [] with ExitStack() as stack: for d_name, d in info.datasets.items(): ds = {} ds['name'] = d_name ds['uri'] = d.uri ds['cache_dir'] = d.cache_dir di_instance = stack.enter_context( d.data_iterator()) ds['variables'] = [ var_name for var_name in di_instance.variables ] ds['batch_size'] = di_instance.batch_size ds['no_image_normalization'] = not d.normalize ds['shuffle'] = di_instance._shuffle datasets.append(ds) dataset_assign = set() for obj in itertools.chain(info.monitors.values(), info.executors.values(), info.optimizers.values()): for pv in obj.dataset_assign.keys(): dataset_assign.add(pv.name) contents = { 'global_config': { 'default_context': info.global_config.default_context }, 'training_config': { 'max_epoch': info.training_config.max_epoch, 'iter_per_epoch': info.training_config.iter_per_epoch, 'save_best': info.training_config.save_best }, 'networks': [{ 'name': n_name, 'batch_size': n.batch_size, 'outputs': { out: n.variables[out].variable_instance for out in n.outputs }, 'names': { inp: n.variables[inp].variable_instance for inp in itertools.chain( n.inputs, n.outputs) } } for n_name, n in info.networks.items()], 'executors': [{ 'name': e_name, 'network': e.network.name, 'data': [pv.name for pv in e.dataset_assign.keys()], 'generator_variables': [pv.name for pv in e.generator_assign.keys()], 'output': [pv.name for pv in e.output_assign.keys()] } for e_name, e in info.executors.items()], 'optimizers': [{ 'name': o_name, 'solver': o.solver, 'network': o.network.name, 'data_variables': {pv.name: d for pv, d in o.dataset_assign.items()}, 'generator_variables': [pv.name for pv in o.generator_assign.keys()], 'loss_variables': [pv.name for pv in o.loss_variables], 'dataset': [ds_name for ds_name in o.data_iterators.keys()], 'weight_decay': o.weight_decay, 'lr_decay': o.lr_decay, 'lr_decay_interval': o.lr_decay_interval, 'update_interval': o.update_interval } for o_name, o in info.optimizers.items()], 'datasets': datasets, 'monitors': [{ 'name': m_name, 'network': m.network.name, 'data_variables': {pv.name: d for pv, d in m.dataset_assign.items()}, 'generator_variables': [pv.name for pv in m.generator_assign.keys()], 'monitor_variables': [pv.name for pv in m.monitor_variables], 'dataset': [ds_name for ds_name in m.data_iterators.keys()] } for m_name, m in info.monitors.items()], } save.save(saved_nnp_file, contents, include_params, variable_batch_size) new_config = TrainConfig() new_config.start_iteration = 0 new_config.end_iteration = a_few_iter new_config.save_optimizer_variable = False new_config.save_evaluation_variable = False new_cb = Callback() new_cb.forward = lambda x: x.target.forward(clear_no_need_grad=True ) new_cb.backward = lambda x, b: x.target.backward(clear_buffer=True) new_config.cb = new_cb new_config.impl = "ref" ref_result = [] ref_info = load.load(nnp_file, batch_size=batch_size) if output_network_topology: for n, opt in ref_info.optimizers.items(): print(n) opt.network.execute_on_proto(Verifier()) new_config.on_iter = ModelSaver(ref_info) for cost, error in partial(train, config=new_config)(ref_info): ref_result.append((cost, error)) new_config.on_iter = None new_config.start_iteration = half_iter new_config.end_iteration = a_few_iter new_config.impl = "new" result = [] nn.clear_parameters() info = load.load(saved_nnp_file, batch_size=batch_size) if output_network_topology: for n, opt in info.optimizers.items(): print(n) opt.network.execute_on_proto(Verifier()) for cost, error in partial(train, config=new_config)(info): result.append((cost, error)) compare_info(ref_info, info) for i, ((cost_ref, error_ref), (cost, error)) in enumerate(zip(ref_result, result)): if verbose: print("{}: cost: {} <--> {}".format(i, cost_ref, cost)) print("{}: error: {} <--> {}".format(i, error_ref, error)) if i > new_config.start_iteration: assert_allclose(np.array([cost_ref, error_ref]), np.array([cost, error]), rtol=1e-2, atol=1e-5, err_msg="Error: {}".format(nntxt_idx))
def train_command(args): callback.update_status(args) if single_or_rankzero(): configure_progress(os.path.join(args.outdir, 'progress.txt')) info = load.load([args.config], prepare_data_iterator=None, exclude_parameter=True) # Check dataset uri is empty. dataset_error = False for dataset in info.datasets.values(): if dataset.uri.strip() == '': dataset_error = True if dataset_error: logger.log(99, 'Fatal error. Dataset URI is empty.') return False class TrainConfig: pass config = TrainConfig() config.timelimit = -1 if args.param: load.load([args.param], parameter_only=True) config.timelimit = callback.get_timelimit(args) config.global_config = info.global_config config.training_config = info.training_config if single_or_rankzero(): logger.log(99, 'Train with contexts {}'.format(available_contexts)) class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterators = [] config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterators = [] config.monitors[name] = m # Training comm = current_communicator() config.training_config.iter_per_epoch //= comm.size if comm else 1 max_iteration = config.training_config.max_epoch * \ config.training_config.iter_per_epoch global _save_parameter_info _save_parameter_info = {} _, config_ext = os.path.splitext(args.config) if config_ext == '.prototxt' or config_ext == '.nntxt': _save_parameter_info['config'] = args.config elif config_ext == '.nnp': with zipfile.ZipFile(args.config, 'r') as nnp: for name in nnp.namelist(): _, ext = os.path.splitext(name) if ext == '.nntxt' or ext == '.prototxt': nnp.extract(name, args.outdir) _save_parameter_info['config'] = os.path.join( args.outdir, name) result = False restart = False if max_iteration > 0: rng = np.random.RandomState(comm.rank if comm else 0) with ExitStack() as stack: # Create data_iterator instance only once for each dataset in optimizers optimizer_data_iterators = {} for name, o in config.optimizers.items(): for di in o.optimizer.data_iterators.values(): if di not in optimizer_data_iterators: di_instance = stack.enter_context(di()) if comm and comm.size > 1: di_instance = di_instance.slice( rng, comm.size, comm.rank) optimizer_data_iterators[di] = di_instance else: di_instance = optimizer_data_iterators[di] o.data_iterators.append(di_instance) # Create data_iterator instance only once for each dataset in monitors monitor_data_iterators = {} for name, m in config.monitors.items(): for di in m.monitor.data_iterators.values(): if di not in monitor_data_iterators: di_instance = stack.enter_context(di()) if comm and comm.size > 1: di_instance = di_instance.slice( rng, comm.size, comm.rank) monitor_data_iterators[di] = di_instance else: di_instance = monitor_data_iterators[di] m.data_iterators.append(di_instance) monitor_data_iterators.update(optimizer_data_iterators) result, restart = _train(args, config) else: # save parameters without training (0 epoch learning) logger.log(99, '0 epoch learning. (Just save parameter.)') if single_or_rankzero(): _save_parameters(args, None, 0, config, True) result = True if single_or_rankzero() and not restart: if result: logger.log(99, 'Training Completed.') callback.update_status('finished') else: logger.log(99, 'Training Incompleted.') callback.update_status('failed') if single_or_rankzero(): progress(None) return True
def forward_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) class ForwardConfig: pass config = ForwardConfig info = load.load(files, prepare_data_iterator=False) config.global_config = info.global_config config.executors = info.executors.values() config.networks = [] for e in config.executors: if e.network.name in info.networks.keys(): config.networks.append(info.networks[e.network.name]) else: logger.critical('Network {} does not found.'.format( config.executor.network.name)) return normalize = True for d in info.datasets.values(): if d.uri == args.dataset: normalize = d.normalize data_iterator = (lambda: data_iterator_csv_dataset( args.dataset, config.networks[0].batch_size, False, normalize=normalize)) # load dataset as csv with open(args.dataset, 'rt') as f: rows = [row for row in csv.reader(f)] row0 = rows.pop(0) root_path = os.path.dirname(args.dataset) root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep)) rows = list(map(lambda row: list(map(lambda x: x if is_float( x) else compute_full_path(root_path, x), row)), rows)) with data_iterator() as di: index = 0 while index < di.size: data = di.next() result, outputs = forward(args, index, config, data, di.variables) if index == 0: for name, dim in zip(result.names, result.dims): if dim == 1: row0.append(name) else: for d in range(dim): row0.append(name + '__' + str(d)) for i, output in enumerate(outputs): if index + i < len(rows): rows[index + i].extend(output) index += len(outputs) logger.log( 99, 'data {} / {}'.format(min([index, len(rows)]), len(rows))) with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(row0) writer.writerows(rows) logger.log(99, 'Forward Completed.') progress(None)