def train_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) class TrainConfig: pass config = TrainConfig() info = load.load(files) logger.log(99, 'Train with contexts {}'.format(available_contexts)) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Training max_iter = config.training_config.max_epoch * \ config.training_config.iter_per_epoch if max_iter > 0: data_iterators = {'optimizer': {}, 'monitor': {}} with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) for name, m in config.monitors.items(): m.data_iterator = stack.enter_context( m.monitor.data_iterator()) train(args, config) else: # save parameters without training (0 epoch learning) save_parameters(os.path.join(args.outdir, 'parameters.h5')) logger.log(99, 'Training Completed.') progress(None)
def train_command(args): logger.log(99, 'Train with contexts {}'.format(available_contexts)) configure_progress(os.path.join(args.outdir, 'progress.txt')) files = [] files.append(args.config) if args.param: files.append(args.param) class TrainConfig: pass config = TrainConfig() info = load.load(files) config.global_config = info.global_config config.training_config = info.training_config class OptConfig: pass config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o class MonConfig: pass config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Training max_iter = config.training_config.max_epoch * \ config.training_config.iter_per_epoch if max_iter > 0: data_iterators = {'optimizer': {}, 'monitor': {}} with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) for name, m in config.monitors.items(): m.data_iterator = stack.enter_context( m.monitor.data_iterator()) train(args, config) else: # save parameters without training (0 epoch learning) save_parameters(os.path.join( args.outdir, 'parameters.h5')) logger.log(99, 'Training Completed.') progress(None)
def _save_parameters(args, suffix, epoch, force=False): global _save_parameter_info if suffix not in _save_parameter_info: _save_parameter_info[suffix] = {} _save_parameter_info[suffix]['epoch'] = 0 _save_parameter_info[suffix]['time'] = 0 current_time = time.time() timediff = current_time - _save_parameter_info[suffix]['time'] epochdiff = epoch - _save_parameter_info[suffix]['epoch'] globname = os.path.join(args.outdir, 'results_{}_*.nnp'.format(suffix)) exists = glob.glob(globname) base = os.path.join(args.outdir, 'results_{}_{}'.format(suffix, epoch)) base_candidate = callback.result_base(base, suffix, args.outdir) if base_candidate is None: if suffix is None or suffix == 'best': base = os.path.join(args.outdir, 'results') else: base = base_candidate filename = base + '.nnp' if force or (not os.path.exists(filename) and (timediff > 180.0 or epochdiff > 10)): # Remove existing nnp before saving new file. for exist in exists: os.unlink(exist) version_filename = base + '_version.txt' with open(version_filename, 'w') as file: file.write('{}\n'.format(nnp_version())) param_filename = base + '_param.protobuf' save_parameters(param_filename) with zipfile.ZipFile(filename, 'w') as nnp: nnp.write(version_filename, 'nnp_version.txt') nnp.write(_save_parameter_info['config'], os.path.basename(_save_parameter_info['config'])) nnp.write(param_filename, 'parameter.protobuf') os.unlink(version_filename) os.unlink(param_filename) _save_parameter_info[suffix]['epoch'] = epoch _save_parameter_info[suffix]['time'] = current_time callback.save_train_snapshot()
def encode_param_command(args, **kwargs): # Load Parameters in_files = [f for f in os.listdir( args.indir) if os.path.isfile(os.path.join(args.indir, f))] logger.log(99, 'Loading parameters...') for file_path in in_files: logger.log(99, file_path) load_param_in_txt(os.path.splitext(file_path)[0].replace( '~', '/'), os.path.join(args.indir, file_path)) # Save prameter logger.log(99, 'Saving parameters...') save_parameters(args.param) logger.log(99, 'Encode Parameter Completed.')
def _save_parameters(args, suffix, epoch, force=False): global _save_parameter_info if suffix not in _save_parameter_info: _save_parameter_info[suffix] = {} _save_parameter_info[suffix]['epoch'] = 0 _save_parameter_info[suffix]['time'] = 0 current_time = time.time() timediff = current_time - _save_parameter_info[suffix]['time'] epochdiff = epoch - _save_parameter_info[suffix]['epoch'] globname = os.path.join(args.outdir, 'results_{}_*.nnp'.format(suffix)) exists = glob.glob(globname) base = os.path.join(args.outdir, 'results_{}_{}'.format(suffix, epoch)) if suffix == 'best': base = os.path.join(args.outdir, 'results') filename = base + '.nnp' if not os.path.exists(filename) and \ (force or timediff > 180.0 or epochdiff > 10): version_filename = base + '_version.txt' with open(version_filename, 'w') as file: file.write('{}\n'.format(nnp_version())) param_filename = base + '_param.protobuf' save_parameters(param_filename) with zipfile.ZipFile(filename, 'w') as nnp: nnp.write(version_filename, 'nnp_version.txt') nnp.write(_save_parameter_info['config'], os.path.basename(_save_parameter_info['config'])) nnp.write(param_filename, 'parameter.protobuf') os.unlink(version_filename) os.unlink(param_filename) for exist in exists: os.unlink(exist) _save_parameter_info[suffix]['epoch'] = epoch _save_parameter_info[suffix]['time'] = current_time
def _save_parameters(args, suffix, epoch, train_config, force=False): global _save_parameter_info if suffix not in _save_parameter_info: _save_parameter_info[suffix] = {} _save_parameter_info[suffix]['epoch'] = 0 _save_parameter_info[suffix]['time'] = 0 current_time = time.time() timediff = current_time - _save_parameter_info[suffix]['time'] epochdiff = epoch - _save_parameter_info[suffix]['epoch'] globname = os.path.join(args.outdir, 'results_{}_*.nnp'.format(suffix)) exists = glob.glob(globname) base = os.path.join(args.outdir, 'results_{}_{}'.format(suffix, epoch)) base_candidate = callback.result_base(base, suffix, args.outdir) if base_candidate is None: if suffix is None or suffix == 'best': base = os.path.join(args.outdir, 'results') else: base = base_candidate filename = base + '.nnp' if force or (not os.path.exists(filename) and (timediff > 180.0 or epochdiff > 10)): # Remove existing nnp before saving new file. for exist in exists: os.unlink(exist) version_filename = base + '_version.txt' with open(version_filename, 'w') as file: file.write('{}\n'.format(nnp_version())) param_filename = base + '_param.h5' save_parameters(param_filename) need_save_opti = train_config.optimizers and epoch % _OPTIMIZER_CHECKPOINT_INTERVAL == 0 if need_save_opti: opti_filenames = save_optimizer_states(base, '.h5', train_config) with zipfile.ZipFile(filename, 'w') as nnp: nnp.write(version_filename, 'nnp_version.txt') nnp.write(_save_parameter_info['config'], os.path.basename(_save_parameter_info['config'])) nnp.write(param_filename, 'parameter.h5') if need_save_opti: for f in opti_filenames: nnp.write(f, f[len(base) + 1:]) os.unlink(version_filename) os.unlink(param_filename) if need_save_opti: for f in opti_filenames: os.unlink(f) _save_parameter_info[suffix]['epoch'] = epoch _save_parameter_info[suffix]['time'] = current_time callback.save_train_snapshot()
def _evaluate(args, config, monitoring_report, best_error): error_str = '' valid_error = 0.0 for name, mon in config.monitors.items(): m = mon.monitor error_sum_monitor = 0.0 error_count = 0 di = mon.data_iterator dp_epoch = di.epoch while dp_epoch == di.epoch: # Set data to variable datas = di.next() for v, d in m.dataset_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, datas[di.variables.index(d)], ctx=dest_context) # Generate data for v, generator in m.generator_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context) # Sum error before forward to prepare input data while processing # on GPU if error_count > 0: for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) progress( 'Evaluating "{0}"'.format(name) + ' : error={0:0.6f}'.format( error_sum_monitor / error_count), di.position * 1.0 / di.size) error_count += 1 # Forward recursive m.network.forward(m.forward_sequence) # Sum error at the end of dataset for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) error = error_sum_monitor / error_count monitoring_report.append(' {}: {}\n'.format(name, error)) if error_str != '': error_str += ', ' else: error_str = ' {' error_str += '{}={:.6f}'.format(name, error) if name == 'valid_error': valid_error = error if error_str != '': error_str += '}' # Save Parameters if (not config.training_config.save_best) or \ (not best_error) or \ (best_error is not None and valid_error <= best_error): best_error = valid_error save_parameters(os.path.join(args.outdir, 'parameters.h5')) return best_error, error_str
def _evaluate(args, config, monitoring_report, best_error): error_str = '' valid_error = 0.0 for name, mon in config.monitors.items(): m = mon.monitor error_sum_monitor = 0.0 error_count = 0 di = mon.data_iterator dp_epoch = di.epoch while dp_epoch == di.epoch: # Set data to variable datas = di.next() for v, d in m.dataset_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, datas[ di.variables.index(d)], ctx=dest_context) # Generate data for v, generator in m.generator_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context) # Sum error before forward to prepare input data while processing # on GPU if error_count > 0: for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) progress('Evaluating "{0}"'.format( name) + ' : error={0:0.6f}'.format( error_sum_monitor / error_count), di.position * 1.0 / di.size) error_count += 1 # Forward recursive m.network.forward(m.forward_sequence) # Sum error at the end of dataset for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) error = error_sum_monitor / error_count monitoring_report.append(' {}: {}\n'.format(name, error)) if error_str != '': error_str += ', ' else: error_str = ' {' error_str += '{}={:.6f}'.format(name, error) if name == 'valid_error': valid_error = error if error_str != '': error_str += '}' # Save Parameters if (not config.training_config.save_best) or \ (not best_error) or \ (best_error is not None and valid_error <= best_error): best_error = valid_error save_parameters(os.path.join(args.outdir, 'parameters.h5')) return best_error, error_str