def _evaluate(config): for name, mon in config.monitors.items(): m = mon.monitor error_sum_monitor = 0.0 data_size = max([di.size for di in mon.data_iterators]) batch_size = max([di.batch_size for di in mon.data_iterators]) if config.save_evaluation_variable: for pv in m.net_variables.values(): if pv not in itertools.chain(m.dataset_assign.keys(), m.generator_assign.keys()): pv.variable_instance.persistent = True for i in range(data_size // batch_size): data = OrderedDict() for di in mon.data_iterators: data.update(zip(di.variables, di.next())) for v, d in m.dataset_assign.items(): dest_context = config.global_config.default_context let_data_to_variable(v.variable_instance, data[d], ctx=dest_context, data_name=d, variable_name=v.name) for v, generator in m.generator_assign.items(): dest_context = config.global_config.default_context let_data_to_variable(v.variable_instance, data=generator( v.variable_instance.d.shape), ctx=dest_context, variable_name=v.name) if config.iter >= config.start_iteration: # m.network.forward(m.forward_sequence) config.cb.forward(m) error_sum = 0.0 for v in m.monitor_variables: error_sum += np.mean(v.variable_instance.d) # v.variable_instance.data.zero() error_sum_monitor += error_sum if config.save_evaluation_variable: for k, v in m.net_variables.items(): fn_d = os.path.join( current_path, "logdata", "{}-{}-{}-{}.npy".format(config.impl, name, k.replace('/', '-'), config.iter)) fn_g = os.path.join( current_path, "logdata", "{}-{}-{}-g.npy".format(config.impl, name, k.replace('/', '-'), config.iter)) np.save(fn_d, v.variable_instance.d) np.save(fn_g, v.variable_instance.g) print("save evaluation, iter: {}, name: {}".format( config.iter, name)) return error_sum_monitor
def let_data(): if d not in data: raise ValueError('Data "' + d + '" is not found in dataset.') let_data_to_variable(v.variable_instance, data=data[d], data_name=d, variable_name=v.name)
def _forward(args, index, config, data, variables, output_image=True): class ForwardResult: pass result = ForwardResult() result.dims = [] result.types = [] result.names = [] output_index = 0 for e in config.executors: for v, d in e.dataset_assign.items(): vind = variables.index(d) if v.variable_instance.d.shape != data[vind].shape: let_data_to_variable( v.variable_instance, np.reshape(data[vind], v.variable_instance.d.shape)) else: let_data_to_variable( v.variable_instance, data[vind].astype(v.variable_instance.d.dtype)) # Generate data for v, generator in e.generator_assign.items(): v.variable_instance.d = generator(v.shape) # Forward recursive sum = [np.zeros(o.shape) for o in e.output_assign.keys()] for i in range(e.num_evaluations): e.network.forward(e.forward_sequence) if e.need_back_propagation: e.network.backward(e.backward_sequence) for o_index, o in enumerate(e.output_assign.keys()): if e.repeat_evaluation_type == "last": sum[o_index] = o.variable_instance.d else: sum[o_index] += o.variable_instance.d if e.repeat_evaluation_type == "last": avg = sum else: avg = [s / e.num_evaluations for s in sum] result_1, outputs_1 = _update_result(args, index, result, avg, output_index, e.output_assign.values(), output_image) if 'outputs' in locals(): outputs = [ output + output_1 for output, output_1 in zip(outputs, outputs_1) ] else: outputs = outputs_1 result = result_1 output_index += len(avg) return result, outputs
def let_data(): try: data = loaded_data[opt.data_iterator][ opt.data_iterator.variables.index(d)] except: print(opt.data_iterator.variables) raise ValueError( 'Data "' + d + '" is not found in dataset.') let_data_to_variable(v.variable_instance, data=data)
def let_data(): try: data = loaded_datas[opt.data_iterator][ opt.data_iterator.variables.index(d)] except: print(opt.data_iterator.variables) raise ValueError( 'Data "' + d + '" is not found in dataset.') let_data_to_variable(v.variable_instance, data=data)
def forward(args, index, config, data, variables): class ForwardResult: pass result = ForwardResult() result.dims = [] result.types = [] result.names = [] output_index = 0 for e in config.executors: for v, d in e.dataset_assign.items(): vind = variables.index(d) if v.variable_instance.d.shape != data[vind].shape: let_data_to_variable(v.variable_instance, np.reshape(data[vind], v.variable_instance.d.shape)) else: let_data_to_variable(v.variable_instance, data[vind].astype(v.variable_instance.d.dtype)) # Generate data for v, generator in e.generator_assign.items(): v.variable_instance.d = generator(v.shape) # Forward recursive sum = [np.zeros(o.shape) for o in e.output_assign.keys()] for i in range(e.num_evaluations): e.network.forward(e.forward_sequence) if e.need_back_propagation: e.network.backward(e.backward_sequence) for o_index, o in enumerate(e.output_assign.keys()): if e.repeat_evaluation_type == "last": sum[o_index] = o.variable_instance.d else: sum[o_index] += o.variable_instance.d if e.repeat_evaluation_type == "last": avg = sum else: avg = [s / e.num_evaluations for s in sum] result_1, outputs_1 = update_result( args, index, result, avg, output_index, e.output_assign.values()) if 'outputs' in locals(): outputs = [output + output_1 for output, output_1 in zip(outputs, outputs_1)] else: outputs = outputs_1 result = result_1 output_index += len(outputs_1[0]) return result, outputs
def _forward(args, index, config, data, variables, output_image=True): for e in config.executors: for v, d in e.dataset_assign.items(): vind = variables.index(d) if v.variable_instance.d.shape != data[vind].shape: let_data_to_variable(v.variable_instance, np.reshape(data[vind], v.variable_instance.d.shape), data_name=d, variable_name=v.name) else: let_data_to_variable(v.variable_instance, data[vind].astype( v.variable_instance.d.dtype), data_name=d, variable_name=v.name) # Generate data for v, generator in e.generator_assign.items(): v.variable_instance.d = generator(v.variable_instance.d.shape) # Forward recursive sum = [ np.zeros(o.variable_instance.d.shape, dtype=o.variable_instance.d.dtype) for o in e.output_assign.keys() ] for i in range(e.num_evaluations): e.forward_target.forward(clear_buffer=True) if e.need_back_propagation: e.backward_target.backward(clear_buffer=True) for o_index, o in enumerate(e.output_assign.keys()): if e.repeat_evaluation_type == "last": sum[o_index] = o.variable_instance.d else: sum[o_index] += o.variable_instance.d if e.repeat_evaluation_type == "last": avg = sum else: avg = [s / e.num_evaluations for s in sum] return avg
def _forward(args, index, config, data, variables, output_image=True): class ForwardResult: pass result = ForwardResult() result.dims = [] result.types = [] result.names = [] output_index = 0 for e in config.executors: for v, d in e.dataset_assign.items(): vind = variables.index(d) if v.variable_instance.d.shape != data[vind].shape: let_data_to_variable(v.variable_instance, np.reshape(data[vind], v.variable_instance.d.shape), data_name=d, variable_name=v.name) else: let_data_to_variable(v.variable_instance, data[vind].astype( v.variable_instance.d.dtype), data_name=d, variable_name=v.name) # Generate data for v, generator in e.generator_assign.items(): v.variable_instance.d = generator(v.variable_instance.d.shape) # Forward recursive sum = [ np.zeros(o.variable_instance.d.shape, dtype=o.variable_instance.d.dtype) for o in e.output_assign.keys() ] sum_mux = [ np.zeros(o.variable_instance.d.shape, dtype=o.variable_instance.d.dtype) for o in e.output_assign.keys() ] for i in range(e.num_evaluations): e.forward_target.forward(clear_buffer=True) if e.need_back_propagation: e.backward_target.backward(clear_buffer=True) for o_index, o in enumerate(e.output_assign.keys()): if e.repeat_evaluation_type == "last": sum[o_index] = o.variable_instance.d else: sum[o_index] += o.variable_instance.d sum_mux[o_index] += (o.variable_instance.d)**2 if e.repeat_evaluation_type == "last": avg = sum elif e.repeat_evaluation_type == "std": std_result = [ np.nan_to_num( np.sqrt(x / e.num_evaluations - (y / e.num_evaluations)**2)) for x, y in zip(sum_mux, sum) ] avg = std_result else: avg = [s / e.num_evaluations for s in sum] result_1, outputs_1 = _update_result(args, index, result, avg, output_index, e.output_assign.values(), output_image) if 'outputs' in locals(): outputs = [ output + output_1 for output, output_1 in zip(outputs, outputs_1) ] else: outputs = outputs_1 result = result_1 output_index += len(avg) return result, outputs
def generate_data(): let_data_to_variable(v.variable_instance, data=generator(v.shape), variable_name=v.name)
def _update(iter, config, cost): comm = current_communicator() loaded_data = {} is_first_optimizer = True def _sum_cost(): if comm: # logger.log(99, "Calc cost with communicator") var = [nn.NdArray()] var[0].data = cost.sum_iteration _all_reduce(comm, var, division=False, inplace=True) cost.sum_epoch += var[0].data cost.num_iteration += comm.size else: cost.sum_epoch += cost.sum_iteration cost.num_iteration += 1 for opt in config.optimizers.values(): o = opt.optimizer # Load dataset di = opt.data_iterator if o.data_iterator not in loaded_data: loaded_data[o.data_iterator] = di.next() data = loaded_data[o.data_iterator] for v, d in o.dataset_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data[di.variables.index(d)], ctx=dest_context, data_name=d, variable_name=v.name) # Generate data for v, generator in o.generator_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context, variable_name=v.name) # Monitor loss before forward to prepare input data while processing on # GPU if cost.variables: for l in cost.variables: cost.sum_iteration += np.mean(l.variable_instance.d) l.variable_instance.data.zero() if is_first_optimizer: is_first_optimizer = False _sum_cost() if single_or_rankzero(): progress( "Training : cost={0:0.6f}".format(cost.sum_iteration), (iter % config.training_config.iter_per_epoch) * 1.0 / config.training_config.iter_per_epoch) cost.sum_iteration = 0.0 # Forward o.network.forward(o.forward_sequence) # Backward o.network.backward(o.backward_sequence, iter % o.update_interval == 0) # Update if iter % o.update_interval == o.update_interval - 1: if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) if o.comm: # Updated param with communicator params = [x.grad for x in o.parameters.values()] _all_reduce(o.comm, params, division=True, inplace=True) if o.scheduler is not None: o.solver.set_learning_rate(o.scheduler.get_learning_rate(iter)) o.solver.update() # Sync w sometimes if iter % 10 == 9: # TODO: change the interval if o.comm: params = [x.data for x in o.parameters.values()] _all_reduce(o.comm, params, division=True, inplace=True) # Reserve monitor loss cost.variables = o.loss_variables # Monitor loss at the end of iteration if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables: for l in cost.variables: cost.sum_iteration += np.mean(l.variable_instance.d) l.variable_instance.data.zero() _sum_cost() cost.variables = None cost.sum_iteration = 0.0 return cost
def compare_optimizer(config, parameters, config_cpu, parameters_cpu, result_array): loaded_datas = {} for opt, opt_cpu in zip(config.optimizers.values(), config_cpu.optimizers.values()): o = opt.optimizer o_cpu = opt.optimizer opts = [o, o_cpu] result_name = "optimizer '%s' with network '%s'" % ( o.name, o.network.name) result_dict = OrderedDict() logger.log(99, 'Comparing ' + result_name + ' ...') logger.log( 99, 'process(func, variable), norm_diff, current_context_std, cpu_std, diff_std') # Start comparison with same parameters for p, p_cpu in zip(parameters.values(), parameters_cpu.values()): p_cpu.d = p.d # Load dataset di = opt.data_iterator if di not in loaded_datas: loaded_datas[di] = di.next() datas = loaded_datas[di] for v, d in o.dataset_assign.items(): let_data_to_variable(v.variable_instance, datas[ di.variables.index(d)]) for v, d in o_cpu.dataset_assign.items(): let_data_to_variable(v.variable_instance, datas[ di.variables.index(d)]) # Generate data generated = {} for v, generator in o.generator_assign.items(): generated[v.name] = generator(v.shape) dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generated[v.name], ctx=dest_context) for v, generator in o_cpu.generator_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generated[v.name], ctx=dest_context) last_max_diff = 1e-5 # Forward for func, func_cpu in zip(o.forward_sequence, o_cpu.forward_sequence): o.network.forward_function(func) o_cpu.network.forward_function(func_cpu) large_diff = False for v, v_cpu in zip(func.outputs, func_cpu.outputs): name = 'forward_function (%s, %s)' % (func.name, v.name) if v.variable_instance.d.shape != v_cpu.variable_instance.d.shape: logger.log(99, 'Variable shape is different in %s (current_context=%s, cpu=%s)' % ( v.name, str(v.variable_instance.d.shape), str(v_cpu.variable_instance.d.shape))) norm_diff, std1, std2, diff_std = calc_norm_diff( v.variable_instance.d, v_cpu.variable_instance.d) logger.log(99, '%s, %f, %f, %f, %f' % (name, norm_diff, std1, std2, diff_std)) result_dict[name] = norm_diff if norm_diff > last_max_diff: if norm_diff > last_max_diff * 10: logger.log(99, ' current_context(data)=' + str(v.variable_instance.d.flatten())) logger.log(99, ' cpu(data)=' + str(v_cpu.variable_instance.d.flatten())) large_diff = True last_max_diff = norm_diff if large_diff: logger.log(99, ' x_data:') for v, v_cpu in zip(func.inputs, func_cpu.inputs): logger.log(99, ' current_context(%s.d)=%s' % (v.name, str(v.variable_instance.d.flatten()))) logger.log(99, ' cpu(%s.d)=%s' % ( v_cpu.name, str(v_cpu.variable_instance.d.flatten()))) # Backward o.network.prepare_backward(o.backward_sequence) o_cpu.network.prepare_backward(o_cpu.backward_sequence) for seq, seq_cpu in zip(o.backward_sequence.sequence, o_cpu.backward_sequence.sequence): o.network.backward_function(seq) o_cpu.network.backward_function(seq_cpu) large_diff = False for v, v_cpu in zip(seq.func.inputs, seq_cpu.func.inputs): if v.variable_instance.need_grad: name = 'backward_function (%s, %s)' % ( seq.func.name, v.name) norm_diff, std1, std2, diff_std = calc_norm_diff( v.variable_instance.g, v_cpu.variable_instance.g) logger.log(99, '%s, %f, %f, %f, %f' % (name, norm_diff, std1, std2, diff_std)) result_dict[name] = norm_diff if norm_diff > last_max_diff: if norm_diff > last_max_diff * 10: logger.log(99, ' current_context(diff)=' + str( v.variable_instance) + str(v.variable_instance.g.flatten())) logger.log(99, ' cpu(diff)=' + str(v_cpu.variable_instance) + str(v_cpu.variable_instance.g.flatten())) large_diff = True last_max_diff = norm_diff if large_diff: logger.log(99, ' x_data:') for v, v_cpu in zip(seq.func.inputs, seq_cpu.func.inputs): logger.log(99, ' current_context(%s.d)=%s' % (v.name, str(v.variable_instance.d.flatten()))) logger.log(99, ' cpu(%s.d)=%s' % ( v_cpu.name, str(v_cpu.variable_instance.d.flatten()))) logger.log(99, ' y_diff:') for v, v_cpu in zip(seq.func.outputs, seq_cpu.func.outputs): logger.log(99, ' current_context(%s.g)=%s' % (v.name, str(v.variable_instance.g.flatten()))) logger.log(99, ' cpu(%s.g)=%s' % ( v_cpu.name, str(v_cpu.variable_instance.g.flatten()))) # Update (weight decay) if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) o_cpu.solver.weight_decay(o_cpu.weight_decay) # Update o.solver.update() o_cpu.solver.update() for i, (v, lr) in enumerate(o.parameter_learning_rate_multipliers.items()): v_cpu = o_cpu.parameter_learning_rate_multipliers.items()[i][0] if lr > 0: name = 'update (%s, %s)' % (o.solver.name, v.name) norm_diff, std1, std2, diff_std = calc_norm_diff( v.variable_instance.d, v_cpu.variable_instance.d) logger.log(99, '%s, %f, %f, %f, %f' % (name, norm_diff, std1, std2, diff_std)) result_dict[name] = norm_diff result_array = add_result(result_name, result_dict, result_array) return result_array
def _evaluate(args, config, monitoring_report, best_error): error_str = '' valid_error = 0.0 for name, mon in config.monitors.items(): m = mon.monitor error_sum_monitor = 0.0 error_count = 0 di = mon.data_iterator dp_epoch = di.epoch while dp_epoch == di.epoch: # Set data to variable datas = di.next() for v, d in m.dataset_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, datas[ di.variables.index(d)], ctx=dest_context) # Generate data for v, generator in m.generator_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context) # Sum error before forward to prepare input data while processing # on GPU if error_count > 0: for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) progress('Evaluating "{0}"'.format( name) + ' : error={0:0.6f}'.format( error_sum_monitor / error_count), di.position * 1.0 / di.size) error_count += 1 # Forward recursive m.network.forward(m.forward_sequence) # Sum error at the end of dataset for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) error = error_sum_monitor / error_count monitoring_report.append(' {}: {}\n'.format(name, error)) if error_str != '': error_str += ', ' else: error_str = ' {' error_str += '{}={:.6f}'.format(name, error) if name == 'valid_error': valid_error = error if error_str != '': error_str += '}' # Save Parameters if (not config.training_config.save_best) or \ (not best_error) or \ (best_error is not None and valid_error <= best_error): best_error = valid_error save_parameters(os.path.join(args.outdir, 'parameters.h5')) return best_error, error_str
def _update(iter, config, cost): comm = current_communicator() loaded_data = {} is_first_optimizer = True def _sum_cost(): if comm: # logger.log(99, "Calc cost with communicator") var = [nn.NdArray()] var[0].data = cost.sum_iteration _all_reduce(comm, var, division=False, inplace=True) cost.sum_epoch += var[0].data cost.num_iteration += comm.size else: cost.sum_epoch += cost.sum_iteration cost.num_iteration += 1 def _get_reserved_variable(shape, reserved_variable_name, iter, iter_per_epoch, max_epoch): if reserved_variable_name == "%iter": value = iter elif reserved_variable_name == "%max_iter": value = max_epoch * iter_per_epoch elif reserved_variable_name == "%epoch": value = iter // iter_per_epoch elif reserved_variable_name == "%epochf": value = iter * 1.0 / iter_per_epoch elif reserved_variable_name == "%max_epoch": value = max_epoch elif reserved_variable_name == "%progress": value = (iter * 1.0 / iter_per_epoch) / max_epoch else: raise ValueError( "Unknown reserved variable {}".format(reserved_variable_name)) return value for opt in config.optimizers.values(): o = opt.optimizer if (o.start_iter == 0 or iter + 1 >= o.start_iter) and (o.end_iter == 0 or iter + 1 <= o.end_iter): # Load dataset data = OrderedDict() for di in opt.data_iterators: if di not in loaded_data: loaded_data[di] = di.next() data.update(zip(di.variables, loaded_data[di])) for v, d in o.dataset_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None if d not in data and d[0] == "%": value = _get_reserved_variable( v.variable_instance.shape, d, iter, config.training_config.iter_per_epoch, config.training_config.max_epoch) v.variable_instance.data.fill(value) elif d in data: let_data_to_variable(v.variable_instance, data[d], ctx=dest_context, data_name=d, variable_name=v.name) else: raise ValueError( 'Variable "{}" is not found in dataset "{}", optimizer "{}"' .format(d, ', '.join(o.data_iterators.keys()), o.name)) # Generate data for v, generator in o.generator_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context, variable_name=v.name) # Monitor loss before forward to prepare input data while processing on # GPU if cost.variables: for l in cost.variables: cost.sum_iteration += np.mean(l.variable_instance.d) # l.variable_instance.data.zero() if is_first_optimizer: is_first_optimizer = False _sum_cost() if single_or_rankzero(): progress( "Training : cost={0:0.6f}".format( cost.sum_iteration), (iter % config.training_config.iter_per_epoch) * 1.0 / config.training_config.iter_per_epoch) cost.sum_iteration = 0.0 with nodeTimeCollector.collect_cost_time(comm, iter): # Forward o.network.forward(o.forward_sequence) # Backward o.network.backward(o.backward_sequence, iter % o.update_interval == 0) # Update if iter % o.update_interval == o.update_interval - 1: if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) if o.comm: # Updated param with communicator params = [x.grad for x in o.parameters.values()] _all_reduce(o.comm, params, division=True, inplace=True) if o.scheduler is not None: o.solver.set_learning_rate( o.scheduler.get_learning_rate(iter)) o.solver.update() # Sync w sometimes if iter % 10 == 9: # TODO: change the interval if o.comm: params = [x.data for x in o.parameters.values()] _all_reduce(o.comm, params, division=True, inplace=True) # Reserve monitor loss cost.variables = o.loss_variables # Monitor loss at the end of epoch if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables: for l in cost.variables: cost.sum_iteration += np.mean(l.variable_instance.d) # l.variable_instance.data.zero() _sum_cost() cost.variables = None cost.sum_iteration = 0.0 return cost
def _update(config): loaded_data = {} sum_iteration = 0.0 for opt in config.optimizers.values(): o = opt.optimizer data = OrderedDict() for di in opt.data_iterators: if di not in loaded_data: loaded_data[di] = di.next() data.update(zip(di.variables, loaded_data[di])) for v, d in o.dataset_assign.items(): dest_context = config.global_config.default_context if d in data: let_data_to_variable(v.variable_instance, data[d], ctx=dest_context, data_name=d, variable_name=v.name) else: raise ValueError( 'Variable "{}" is not found in dataset "{}", optimizer "{}"' .format(d, ', '.join(o.data_iterators.keys()), o.name)) for v, generator in o.generator_assign.items(): dest_context = config.global_config.default_context let_data_to_variable(v.variable_instance, data=generator(v.variable_instance.d.shape), ctx=dest_context, variable_name=v.name) # Notice: here is special place for new version. if config.iter % o.update_interval == 0: o.solver.zero_grad() if config.save_optimizer_variable: for pv in o.net_variables.values(): if pv not in itertools.chain(o.dataset_assign.keys(), o.generator_assign.keys()): pv.variable_instance.persistent = True # call on iteration. if config.on_iter: config.on_iter(config) # o.network.forward(o.forward_sequence) # o.network.backward(o.backward_sequence, iter % # o.update_interval == 0) if config.iter >= config.start_iteration: config.cb.forward(o) config.cb.backward(o, config.iter % o.update_interval == 0) if config.save_optimizer_variable: for k, v in o.net_variables.items(): fn_d = os.path.join( current_path, "logdata", "{}-{}-{}.npy".format(config.impl, k.replace('/', '-'), config.iter)) fn_g = os.path.join( current_path, "logdata", "{}-{}-{}-g.npy".format(config.impl, k.replace('/', '-'), config.iter)) np.save(fn_d, v.variable_instance.d) np.save(fn_g, v.variable_instance.g) # params = o.solver.get_parameters() # params = nn.get_parameters() # for k, v, in params.items(): # fn_d = os.path.join(current_path, "logdata", "{}-{}-{}.npy".format( # config.impl, k.replace('/', '-'), config.iter)) # fn_g = os.path.join(current_path, "logdata", "{}-{}-{}-g.npy".format( # config.impl, k.replace('/', '-'), config.iter)) # np.save(fn_d, v.d) # np.save(fn_g, v.g) print("iter: {}".format(config.iter)) if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) if o.scheduler is not None: o.solver.set_learning_rate( o.scheduler.get_learning_rate(config.iter)) o.solver.update() variables = o.loss_variables for l in variables: sum_iteration += np.mean(l.variable_instance.d) return sum_iteration
def generate_data(): let_data_to_variable(v.variable_instance, data=generator(v.shape))
def _update(iter, config, cost): loaded_datas = {} is_first_optimizer = True for opt in config.optimizers.values(): o = opt.optimizer # Load dataset di = opt.data_iterator if o.data_iterator not in loaded_datas: loaded_datas[o.data_iterator] = di.next() datas = loaded_datas[o.data_iterator] for v, d in o.dataset_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, datas[di.variables.index(d)], ctx=dest_context) # Generate data for v, generator in o.generator_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context) # Monitor loss before forward to prepare input data while processing on # GPU if cost.variables: for l in cost.variables: cost.sum_iter += np.mean(l.variable_instance.d) if is_first_optimizer: is_first_optimizer = False progress("Training : cost={0:0.6f}".format(cost.sum_iter), (iter % config.training_config.iter_per_epoch) * 1.0 / config.training_config.iter_per_epoch) cost.sum_epoch += cost.sum_iter cost.sum_iter = 0.0 # Forward o.network.forward(o.forward_sequence) # Backward o.network.backward(o.backward_sequence, iter % o.update_interval == 0) # Update if iter % o.update_interval == o.update_interval - 1: if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) o.solver.update() if o.lr_decay != 1.0 and iter % o.lr_decay_interval == o.lr_decay_interval - 1: o.solver.set_learning_rate(o.solver.learning_rate() * o.lr_decay) # Reserve monitor loss cost.variables = o.loss_variables # Monitor loss at the end of iteration if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables: for l in cost.variables: cost.sum_iter += np.mean(l.variable_instance.d) cost.sum_epoch += cost.sum_iter cost.variables = None cost.sum_iter = 0.0 return cost
def _evaluate(args, config, monitoring_report, best_error): error_str = '' valid_error = 0.0 for name, mon in config.monitors.items(): m = mon.monitor error_sum_monitor = 0.0 error_count = 0 di = mon.data_iterator dp_epoch = di.epoch while dp_epoch == di.epoch: # Set data to variable datas = di.next() for v, d in m.dataset_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, datas[di.variables.index(d)], ctx=dest_context) # Generate data for v, generator in m.generator_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context) # Sum error before forward to prepare input data while processing # on GPU if error_count > 0: for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) progress( 'Evaluating "{0}"'.format(name) + ' : error={0:0.6f}'.format( error_sum_monitor / error_count), di.position * 1.0 / di.size) error_count += 1 # Forward recursive m.network.forward(m.forward_sequence) # Sum error at the end of dataset for v in m.monitor_variables: error_sum_monitor += np.mean(v.variable_instance.d) error = error_sum_monitor / error_count monitoring_report.append(' {}: {}\n'.format(name, error)) if error_str != '': error_str += ', ' else: error_str = ' {' error_str += '{}={:.6f}'.format(name, error) if name == 'valid_error': valid_error = error if error_str != '': error_str += '}' # Save Parameters if (not config.training_config.save_best) or \ (not best_error) or \ (best_error is not None and valid_error <= best_error): best_error = valid_error save_parameters(os.path.join(args.outdir, 'parameters.h5')) return best_error, error_str
def compare_optimizer(config, parameters, config_cpu, parameters_cpu, result_array): loaded_data = {} for opt, opt_cpu in zip(config.optimizers.values(), config_cpu.optimizers.values()): o = opt.optimizer o_cpu = opt_cpu.optimizer opts = [o, o_cpu] result_name = "optimizer '%s' with network '%s'" % (o.name, o.network.name) result_dict = OrderedDict() logger.log(99, 'Comparing ' + result_name + ' ...') logger.log( 99, 'process(func, variable), norm_diff, current_context_std, cpu_std, diff_std' ) # Start comparison with same parameters for p, p_cpu in zip(parameters.values(), parameters_cpu.values()): p_cpu.d = p.d # Load dataset di = opt.data_iterator if di not in loaded_data: loaded_data[di] = di.next() data = loaded_data[di] for v, d in o.dataset_assign.items(): let_data_to_variable(v.variable_instance, data[di.variables.index(d)], data_name=d, variable_name=v.name) for v, d in o_cpu.dataset_assign.items(): let_data_to_variable(v.variable_instance, data[di.variables.index(d)], data_name=d, variable_name=v.name) # Generate data generated = {} for v, generator in o.generator_assign.items(): generated[v.name] = generator(v.shape) dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generated[v.name], ctx=dest_context, variable_name=v.name) for v, generator in o_cpu.generator_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generated[v.name], ctx=dest_context, variable_name=v.name) last_max_diff = 1e-5 # Forward for func, func_cpu in zip(o.forward_sequence, o_cpu.forward_sequence): o.network.forward_function(func) o_cpu.network.forward_function(func_cpu) large_diff = False for v, v_cpu in zip(func.outputs, func_cpu.outputs): name = 'forward_function (%s, %s)' % (func.name, v.name) if v.variable_instance.d.shape != v_cpu.variable_instance.d.shape: logger.log( 99, 'Variable shape is different in %s (current_context=%s, cpu=%s)' % (v.name, str(v.variable_instance.d.shape), str(v_cpu.variable_instance.d.shape))) norm_diff, std1, std2, diff_std = calc_norm_diff( v.variable_instance.d, v_cpu.variable_instance.d) logger.log( 99, '%s, %f, %f, %f, %f' % (name, norm_diff, std1, std2, diff_std)) result_dict[name] = norm_diff if norm_diff > last_max_diff: if norm_diff > last_max_diff * 10: logger.log( 99, ' current_context(data)=' + str(v.variable_instance.d.flatten())) logger.log( 99, ' cpu(data)=' + str(v_cpu.variable_instance.d.flatten())) large_diff = True last_max_diff = norm_diff if large_diff: logger.log(99, ' x_data:') for v, v_cpu in zip(func.inputs, func_cpu.inputs): logger.log( 99, ' current_context(%s.d)=%s' % (v.name, str(v.variable_instance.d.flatten()))) logger.log( 99, ' cpu(%s.d)=%s' % (v_cpu.name, str(v_cpu.variable_instance.d.flatten()))) # Backward o.network.prepare_backward(o.backward_sequence) o_cpu.network.prepare_backward(o_cpu.backward_sequence) for seq, seq_cpu in zip(o.backward_sequence.sequence, o_cpu.backward_sequence.sequence): o.network.backward_function(seq) o_cpu.network.backward_function(seq_cpu) large_diff = False for v, v_cpu in zip(seq.func.inputs, seq_cpu.func.inputs): if v.variable_instance.need_grad: name = 'backward_function (%s, %s)' % (seq.func.name, v.name) norm_diff, std1, std2, diff_std = calc_norm_diff( v.variable_instance.g, v_cpu.variable_instance.g) logger.log( 99, '%s, %f, %f, %f, %f' % (name, norm_diff, std1, std2, diff_std)) result_dict[name] = norm_diff if norm_diff > last_max_diff: if norm_diff > last_max_diff * 10: logger.log( 99, ' current_context(diff)=' + str(v.variable_instance) + str(v.variable_instance.g.flatten())) logger.log( 99, ' cpu(diff)=' + str(v_cpu.variable_instance) + str(v_cpu.variable_instance.g.flatten())) large_diff = True last_max_diff = norm_diff if large_diff: logger.log(99, ' x_data:') for v, v_cpu in zip(seq.func.inputs, seq_cpu.func.inputs): logger.log( 99, ' current_context(%s.d)=%s' % (v.name, str(v.variable_instance.d.flatten()))) logger.log( 99, ' cpu(%s.d)=%s' % (v_cpu.name, str(v_cpu.variable_instance.d.flatten()))) logger.log(99, ' y_diff:') for v, v_cpu in zip(seq.func.outputs, seq_cpu.func.outputs): logger.log( 99, ' current_context(%s.g)=%s' % (v.name, str(v.variable_instance.g.flatten()))) logger.log( 99, ' cpu(%s.g)=%s' % (v_cpu.name, str(v_cpu.variable_instance.g.flatten()))) # Update (weight decay) if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) o_cpu.solver.weight_decay(o_cpu.weight_decay) # Update o.solver.update() o_cpu.solver.update() for i, ((v, lr), (v_cpu, lr_cpu)) in enumerate( zip(o.parameter_learning_rate_multipliers.items(), o_cpu.parameter_learning_rate_multipliers.items())): if lr > 0: name = 'update (%s, %s)' % (o.solver.name, v.name) norm_diff, std1, std2, diff_std = calc_norm_diff( v.variable_instance.d, v_cpu.variable_instance.d) logger.log( 99, '%s, %f, %f, %f, %f' % (name, norm_diff, std1, std2, diff_std)) result_dict[name] = norm_diff result_array = add_result(result_name, result_dict, result_array) return result_array
def _evaluate(args, config, monitoring_report, best_error, epoch): comm = current_communicator() error_str = '' valid_error = 0.0 def _sum_error(sum, error): ret = None if comm: # logger.log(99, "Calc error with communicator") var = [nn.NdArray()] var[0].data = error _all_reduce(comm, var, division=False, inplace=True) ret = sum + var[0].data else: ret = sum + error return ret for name, mon in config.monitors.items(): m = mon.monitor error_sum_monitor = 0.0 error_count = 0 data_size = max([di.size for di in mon.data_iterators]) batch_size = max([di.batch_size for di in mon.data_iterators]) for i in range(data_size // batch_size): # Load dataset data = OrderedDict() for di in mon.data_iterators: data.update(zip(di.variables, di.next())) # Set data to variable for v, d in m.dataset_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data[d], ctx=dest_context, data_name=d, variable_name=v.name) # Generate data for v, generator in m.generator_assign.items(): dest_context = config.global_config.default_context if not m.forward_sequence or v not in m.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context, variable_name=v.name) # Sum error before forward to prepare input data while processing # on GPU if error_count > 0: error_sum = 0.0 for v in m.monitor_variables: error_sum += np.mean(v.variable_instance.d) # v.variable_instance.data.zero() error_sum_monitor = _sum_error(error_sum_monitor, error_sum) if single_or_rankzero(): progress( 'Evaluating "{0}"'.format(name) + ' : error={0:0.6f}'.format( error_sum_monitor / error_count), di.position * 1.0 / di.size) error_count += comm.size if comm else 1 # Forward recursive m.network.forward(m.forward_sequence) # Sum error at the end of dataset error_sum = 0.0 for v in m.monitor_variables: error_sum += np.mean(v.variable_instance.d) # v.variable_instance.data.zero() error_sum_monitor = _sum_error(error_sum_monitor, error_sum) if error_count == 0: error = 0 else: error = error_sum_monitor / error_count if np.isnan(error) or np.isinf(error): logger.log(99, 'Validation error is Nan') error = 0.0 monitoring_report.append(' {}: {}\n'.format(name, error)) callback.update_status((['monitoring_report', epoch, name], error)) callback.update_status((['last', name], error)) # save last value if error_str != '': error_str += ', ' else: error_str = ' {' error_str += '{}={:.6f}'.format(name, error) if name == 'valid_error': valid_error = error if error_str != '': error_str += '}' # Save Parameters if single_or_rankzero(): if (not config.training_config.save_best) or \ (not best_error) or \ (best_error is not None and valid_error <= best_error): best_error = valid_error callback.update_status(('best.valid_error', best_error)) callback.update_status(('best.epoch', epoch)) _save_parameters(args, 'best', epoch, config, True) return best_error, error_str
def _update(iter, config, cost): loaded_datas = {} is_first_optimizer = True for opt in config.optimizers.values(): o = opt.optimizer # Load dataset di = opt.data_iterator if o.data_iterator not in loaded_datas: loaded_datas[o.data_iterator] = di.next() datas = loaded_datas[o.data_iterator] for v, d in o.dataset_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, datas[ di.variables.index(d)], ctx=dest_context) # Generate data for v, generator in o.generator_assign.items(): dest_context = config.global_config.default_context if not o.forward_sequence or v not in o.forward_sequence[ 0].inputs else None let_data_to_variable(v.variable_instance, data=generator(v.shape), ctx=dest_context) # Monitor loss before forward to prepare input data while processing on # GPU if cost.variables: for l in cost.variables: cost.sum_iter += np.mean(l.variable_instance.d) if is_first_optimizer: is_first_optimizer = False progress("Training : cost={0:0.6f}".format(cost.sum_iter), (iter % config.training_config.iter_per_epoch) * 1.0 / config.training_config.iter_per_epoch) cost.sum_epoch += cost.sum_iter cost.sum_iter = 0.0 # Forward o.network.forward(o.forward_sequence) # Backward o.network.backward(o.backward_sequence, iter % o.update_interval == 0) # Update if iter % o.update_interval == o.update_interval - 1: if o.weight_decay > 0: o.solver.weight_decay(o.weight_decay) o.solver.update() if o.lr_decay != 1.0 and iter % o.lr_decay_interval == o.lr_decay_interval - 1: o.solver.set_learning_rate(o.solver.learning_rate() * o.lr_decay) # Reserve monitor loss cost.variables = o.loss_variables # Monitor loss at the end of iteration if iter % config.training_config.iter_per_epoch == config.training_config.iter_per_epoch - 1 and cost.variables: for l in cost.variables: cost.sum_iter += np.mean(l.variable_instance.d) cost.sum_epoch += cost.sum_iter cost.variables = None cost.sum_iter = 0.0 return cost