def _to_variable(x, x_desc=None, x_name=None, x_idx=None): if isinstance(x, np.ndarray): out = fluid.data(name=x_name if x_idx is None else (x_name + "_" + str(x_idx)), shape=([None] + list(x.shape[1:])) if x_desc is None else x_desc.shape, dtype=x.dtype) # set the way to get input data, then we can use it to # extract data from args and kwargs when running __call__ if is_default: # for defaults if x_idx is None: # if input is plain data_extracter = lambda args, kwargs: input else: # if input is nested structure data_extracter = lambda args, kwargs: flatten( input)[x_idx] elif input_idx is None: # for named arg if x_idx is None: # if input is plain data_extracter = lambda args, kwargs: kwargs[ input_name] else: # if input is nested structure data_extracter = lambda args, kwargs: flatten( kwargs[input_name])[x_idx] else: # for positional arg if x_idx is None: # if input is plain data_extracter = lambda args, kwargs: args[ input_idx] else: # if input is nested structure data_extracter = lambda args, kwargs: flatten( args[input_idx])[x_idx] self._inputs[out.name] = data_extracter else: out = x return out
def _hash_spec_names(args_specs, kwargs_specs): """ Generater hash spec with args/kwargs InputSpec names. Consider the following InputSpecs with same shape/dtype except for name: 1. [InputSpec([3,3], 'float32', 'x'), InputSpec([3,3], 'float32', 'x')] 2. [InputSpec([3,3], 'float32', 'x'), InputSpec([3,3], 'float32', 'y')] Under @to_static, we should generate two different program not just one, because the former has one input ('x'), but the latter has two input ('x', 'y'). """ spec_names = [ spec.name for spec in flatten(args_specs) if isinstance(spec, paddle.static.InputSpec) ] spec_names += [ spec.name for spec in flatten(kwargs_specs) if isinstance(spec, paddle.static.InputSpec) ] i, name_ids = 0, {} def to_idx(name): nonlocal i if name not in name_ids: name_ids[name] = i i += 1 return name_ids[name] value = [to_idx(name) for name in spec_names] return tuple(value)
def _convert_input(self, input, input_name, input_idx, is_default=False): def _to_variable(x, x_desc=None, x_name=None, x_idx=None): if isinstance(x, np.ndarray): out = fluid.data(name=x_name if x_idx is None else (x_name + "_" + str(x_idx)), shape=([None] + list(x.shape[1:])) if x_desc is None else x_desc.shape, dtype=x.dtype) # set the way to get input data, then we can use it to # extract data from args and kwargs when running __call__ if is_default: # for defaults if x_idx is None: # if input is plain data_extracter = lambda args, kwargs: input else: # if input is nested structure data_extracter = lambda args, kwargs: flatten( input)[x_idx] elif input_idx is None: # for named arg if x_idx is None: # if input is plain data_extracter = lambda args, kwargs: kwargs[ input_name] else: # if input is nested structure data_extracter = lambda args, kwargs: flatten( kwargs[input_name])[x_idx] else: # for positional arg if x_idx is None: # if input is plain data_extracter = lambda args, kwargs: args[ input_idx] else: # if input is nested structure data_extracter = lambda args, kwargs: flatten( args[input_idx])[x_idx] self._inputs[out.name] = data_extracter else: out = x return out input_desc = model_self._data_descs.get(input_name, None) if not utils.is_sequence(input): return _to_variable(input, input_desc, input_name) flat_output = [] if input_desc is None: for i, x in enumerate(flatten(input)): out = _to_variable(x, x_name=input_name, x_idx=i) flat_output.append(out) else: for i, x in enumerate( zip(flatten(input), flatten(input_desc))): out = _to_variable(*x, x_name=input_name, x_idx=i) flat_output.append(out) output = pack_sequence_as(input, flat_output) return output
def concrete_program_specify_input_spec(self, input_spec=None): """ Returns recent ConcreteProgram instance of decorated function while specifying input_spec. If the self._function_spec already has input_spce, it will check the compatibility of input input_spec and the self._function_spec.input_spec. If input input_spec=None, then this method uses self._function_spec.input_spec args: input_spec (list[InputSpec], optional): Describes the input of the translate function. """ # if specific the `input_spec`, the length of program_cache will always 1, # else, return the last one. cached_program_len = len(self._program_cache) # If specific `input_spec`, apply convertion from dygraph layers into static Program. if cached_program_len == 0: desired_input_spec = input_spec if self._function_spec.input_spec is not None: if input_spec is not None and not input_specs_compatible( flatten(input_spec), flatten(self._function_spec.input_spec)): raise ValueError( "The `input_spec`: {} used to construct concrete_program is conflict with the `input_spec`: {} in `@paddle.jit.to_static`" .format(input_spec, self._function_spec.input_spec)) # NOTE(chenweihang): we should always translated program based on the `input_spec` # decorated on forward if it is valid desired_input_spec = self._function_spec.input_spec if input_spec is not None: logging_utils.warn( "\n\nYou have specified `input_spec` both in function definition (higher priority) and `paddle.jit.save` (will be ignored.)\n\n\t Using: {}\n\n\t Ignore: {}\n" .format(desired_input_spec, input_spec)) has_input_spec = (desired_input_spec is not None) if has_input_spec: concrete_program, _ = self.get_concrete_program( *desired_input_spec) return concrete_program else: raise ValueError( "No valid transformed program for {}.\n\t Please specific `input_spec` in `@paddle.jit.to_static` or feed input tensor to call the decorated function at once.\n" .format(self._function_spec)) # If more than one programs have been cached, return the recent converted program by default. elif cached_program_len > 1: logging_utils.warn( "Current {} has more than one cached programs: {}, the last traced progam will be return by default." .format(self._function_spec, cached_program_len)) cache_key, (concrete_program, partial_layer) = self._program_cache.last() return concrete_program
def test_nest(self): dygraph_res = self._run(to_static=False) dygraph_res = flatten(dygraph_res) static_res = self._run(to_static=True) static_res = flatten(static_res) self.assertTrue(len(dygraph_res) == len(static_res)) for dy_var, st_var in zip(dygraph_res, static_res): if isinstance(dy_var, fluid.core.VarBase): self.assertTrue(np.allclose(dy_var.numpy(), st_var.numpy())) else: self.assertTrue(dy_var, st_var)
def to_static_inputs_with_spec(self, input_with_spec, main_program): """ Constructs feed layer by inputs with InputSpec information for main program. Args: input_with_spec(tuple): input arguments by replacing argument with InputSpec. main_program(Program): main program for inserting feed layer. """ flat_input_spec = flatten(input_with_spec) inputs = [] block = main_program.global_block() for i, var_spec in enumerate(flat_input_spec): if isinstance(var_spec, paddle.static.InputSpec): feed_layer = block.create_var( # TODO(Aurelius84): consider a more elegant way to name this name=var_spec.name or "feed_%s" % i, shape=var_spec.shape, dtype=var_spec.dtype, is_data=True, need_check_feed=False) else: feed_layer = var_spec inputs.append(feed_layer) return pack_sequence_as(input_with_spec, inputs)
def main(args): place = set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ Input( [None, None], 'int64', name='words'), Input( [None], 'int64', name='length') ] dataset = LacDataset(args) predict_dataset = LacDataLoader(args, place, phase="predict") vocab_size = dataset.vocab_size num_labels = dataset.num_labels model = SeqTagging(args, vocab_size, num_labels, mode="predict") model.mode = "test" model.prepare(inputs=inputs) model.load(args.init_from_checkpoint, skip_mismatch=True) f = open(args.output_file, "wb") for data in predict_dataset.dataloader: if len(data) == 1: input_data = data[0] else: input_data = data results, length = model.test_batch(inputs=flatten(input_data)) for i in range(len(results)): word_len = length[i] word_ids = results[i][:word_len] tags = [dataset.id2label_dict[str(id)] for id in word_ids] f.write("\002".join(tags) + "\n")
def _prepare(self, inputs): """ Prepare inputs, outputs, attrs. """ assert isinstance(inputs, (tuple, list)) # Flatten inputs with nested structure into single list. flatten_inputs = flatten(inputs) # Convert variable into VarBase and feed in training data. input_vars = [] expected_place = framework._current_expected_place() for i, value in enumerate(flatten_inputs): if isinstance(value, np.ndarray): var = None if not framework._in_eager_mode_: var = core.VarBase(value=value, name=self._inputs[i].desc.name(), persistable=False, place=expected_place, zero_copy=True) else: var = core.eager.Tensor(value=value, name=self._inputs[i].desc.name(), persistable=False, place=expected_place, zero_copy=True) elif isinstance(value, (core.VarBase, core.eager.Tensor)): # NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times # into CUDAPlace when it's as input of multi Ops. so we move it in advance # to avoid this problem. if value.stop_gradient and not value.place._equals( expected_place): var = value._copy_to(expected_place, False) var.stop_gradient = True else: var = value var.name = self._inputs[i].desc.name() else: continue input_vars.append(var) def create_out(var_id): var = self._outputs[var_id] assert isinstance(var, framework.Variable) var_desc = var.desc varbase = None if not framework._in_eager_mode_: var_base = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) else: var_base = core.eager.Tensor(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) return var_base # Create VarBase to receive output data. out_vars = list(map(create_out, self._outputs.var_ids)) return input_vars, out_vars
def test_nest_output(self): x = fluid.dygraph.to_variable( np.random.random((4, 8)).astype('float32')) net = LinearNetWithNestOut(8, 8) dy_outs = flatten(net(x)) net = declarative(net, input_spec=[InputSpec([None, 8], name='x')]) model_path = "net_with_nest_out/model" paddle.jit.save(net, model_path) load_net = paddle.jit.load(model_path) load_outs = flatten(load_net(x)) self.assertTrue(len(dy_outs) == 4) for dy_out, load_out in zip(dy_outs, load_outs): self.assertTrue(np.allclose(dy_out.numpy(), load_out.numpy()))
def batch_data_generator(): for data in self.dataset: data = flatten(data) partial_data = [] for d in data: assert d.shape[0] % self.dp_world_size == 0, \ "Please padding dataset with data parallel size" partial_data.append( np.split(d, self.dp_world_size)[self.dp_rank]) yield partial_data[:len(self.feed_list)]
def inputs(self): """ Returns input tensors of recent converted static program. """ concrete_program = self.concrete_program inputs = [ var for var in flatten(concrete_program.inputs) if isinstance(var, framework.Variable) ] return inputs
def __init__(self, function, input_spec=None): self._dygraph_function = function if input_spec is None: self._input_spec = None self._flat_input_spec = None else: self._input_spec = self._verify_input_spec(input_spec) self._flat_input_spec = flatten(self._input_spec) # parse full argument names list. self._arg_names, self._default_kwargs = parse_arg_and_kwargs(function)
def test_run(self): # Must using with program_guard(), otherwise prim ops will append other block with paddle.static.program_guard(self.main_program, self.startup_program): ad = Transform(self.main_program.block(0)) orig_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(orig_ops), sorted(self.orig_ops)) # Test orig2prim orig2prim(block=self.main_program.block(0)) orig2prim_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(orig2prim_ops), sorted(self.orig2prim_ops)) # Test linearize xs_dot, ys_dot = ad.linearize(self.orig_xs, self.orig_ys) linearize_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(linearize_ops), sorted(self.linearize_ops)) flatten_xs_dot = flatten(xs_dot) for k, v in self.xs_shape_map.items(): self.assertEqual(flatten_xs_dot[k].shape, v) flatten_ys_dot = flatten(ys_dot) for k, v in self.ys_shape_map.items(): self.assertEqual(flatten_ys_dot[k].shape, v) # Test transpose ys_bar, xs_bar = ad.transpose(ys_dot, xs_dot, retain_fwd=False) transpose_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(transpose_ops), sorted(self.transpose_ops)) flatten_xs_bar = flatten(xs_bar) for k, v in self.xs_shape_map.items(): # There may be None in the result of transpose like gather op if flatten_xs_bar[k] is not None: self.assertEqual(flatten_xs_bar[k].shape, v) flatten_ys_bar = flatten(ys_bar) for k, v in self.ys_shape_map.items(): self.assertEqual(flatten_ys_bar[k].shape, v) # Test prim2orig prim2orig(block=self.main_program.block(0)) prim2orig_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(prim2orig_ops), sorted(self.prim2orig_ops))
def _replace_value_with_input_spec(self, args): args_with_spec = [] for idx, input_var in enumerate(flatten(args)): if isinstance(input_var, np.ndarray): input_var = paddle.static.InputSpec.from_numpy(input_var) elif isinstance(input_var, core.VarBase): input_var = paddle.static.InputSpec.from_tensor(input_var) args_with_spec.append(input_var) args_with_spec = pack_sequence_as(args, args_with_spec) return args_with_spec
def test_op(self): with paddle.static.program_guard(self.main_program, self.startup_program): op = self.layer_help.append_op(type=self.op_type, inputs=self.prim_input, outputs=self.prim_output, attrs=self.prim_attrs) jvp_out = _jvp(op, *self.jvp_args) jvp_out = flatten(jvp_out) for k, v in self.jvp_out_shape_map.items(): self.assertEqual(jvp_out[k].shape, v.shape) # Some prim ops dont have transpose rule if hasattr(self, 'transpose_args'): transpose_out = _transpose(op, *self.transpose_args) transpose_out = flatten(transpose_out) for k, v in self.transpose_out_shape_map.items(): self.assertEqual(transpose_out[k].shape, v.shape) all_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(all_ops), sorted(self.all_ops))
def args_to_input_spec(self, args, kwargs): """ Converts input arguments into InputSpec. 1. If specific input_spec, use them to construct feed layers. 2. If input_spec is None, consider all Tensor and Numpy.ndarray as feed layers Args: args(tuple): tuple of input arguments value of function containing default kwargs value. kwargs(dict): kwargs arguments received by **kwargs. Return: Same nest structure with args by replacing value with InputSpec. """ input_with_spec = [] if self._input_spec is not None: # Note: Because the value type and length of `kwargs` is uncertain. # So we don't support to deal this case while specificing `input_spec` currently. if kwargs: raise ValueError( "{} got unexpected keyword arguments: {}. Cannot trace the function when `input_spec` is specificed." .format(self._dygraph_function.__name__, kwargs)) # Note: The length of `input_spec` can be greater than `args`, # because `args` may contains non-tensor value merged form `kwargs` # after `unified_args_and_kwargs`. if len(args) < len(self._input_spec): raise ValueError( "Requires len(arguments) >= len(input_spec), but received len(args):{} < len(InputSpec): {}" .format(len(args), len(self._input_spec))) # replace argument with corresponding InputSpec. input_with_spec = convert_to_input_spec(args, self._input_spec) else: for idx, input_var in enumerate(flatten(args)): if isinstance(input_var, np.ndarray): input_var = paddle.static.InputSpec.from_numpy(input_var) elif isinstance(input_var, core.VarBase): input_var = paddle.static.InputSpec.from_tensor(input_var) input_with_spec.append(input_var) input_with_spec = pack_sequence_as(args, input_with_spec) # If without specificing name in input_spec, add default name # according to argument name from decorated function. input_with_spec = replace_spec_empty_name(self._arg_names, input_with_spec) return input_with_spec
def test_op(self): with paddle.static.program_guard(self.main_program, self.startup_program): op = self.layer_help.append_op(type=self.op_type, inputs=self.input, outputs=self.output, attrs=self.attrs) orig_out = _prim2orig(op, *self.prim2orig_args) all_ops = [op.type for op in self.main_program.block(0).ops] self.assertEqual(sorted(all_ops), sorted(self.all_ops)) orig_out = flatten(orig_out) for k, v in self.out_map.items(): self.assertEqual(k.shape, orig_out[v].shape)
def _replace_value_with_input_spec(self, args): args_with_spec = [] for idx, input_var in enumerate(flatten(args)): if isinstance(input_var, np.ndarray): input_var = paddle.static.InputSpec.from_numpy(input_var) _set_spec_stop_gradient(input_var, True) elif isinstance(input_var, (core.VarBase, core.eager.Tensor)): stop_gradient = input_var.stop_gradient input_var = paddle.static.InputSpec.from_tensor(input_var) _set_spec_stop_gradient(input_var, stop_gradient) args_with_spec.append(input_var) args_with_spec = pack_sequence_as(args, args_with_spec) return args_with_spec
def _get_input_var_names(inputs, input_spec): name_none_error = "The %s's name is None. " \ "When using jit.save, please set InputSepc's name in " \ "to_static(input_spec=[]) and jit.save(input_spec=[]) " \ "and make sure they are consistent." name_no_exists_error = "The tensor `%s` does not exists. " \ "Please make sure the name of InputSpec or example Tensor " \ "in input_spec is the same as the name of InputSpec in " \ "`to_static` decorated on the Layer.forward method." result_list = [] input_var_names = [ var.name for var in flatten(inputs) if isinstance(var, Variable) ] if input_spec is None: # no prune return input_var_names else: # fileter out non-tensor type spec infos. input_spec = [ spec for spec in input_spec if isinstance(spec, paddle.static.InputSpec) ] if len(input_spec) == len(input_var_names): # no prune result_list = input_var_names # if input spec name not in input_var_names, only raise warning for spec in input_spec: if spec.name is None: warnings.warn(name_none_error % spec) elif spec.name not in input_var_names: warnings.warn(name_no_exists_error % spec.name) else: # do nothing pass else: # prune for spec in input_spec: if spec.name is None: # name is None, the input_spec only can be InputSpec raise ValueError(name_none_error % spec) elif spec.name not in input_var_names: # the input_spec can be `InputSpec` or `VarBase` raise ValueError(name_no_exists_error % spec.name) else: result_list.append(spec.name) return result_list
def sample_data_generator(): batch_data = None for step, data in enumerate(self.dataset): data = flatten(data) if batch_data is None: batch_data = [[] for i in range(len(data))] for idx in range(len(data)): batch_data[idx].append(data[idx]) if (step + 1) % self.batch_size == 0: partial_data = [] for d in batch_data: array = np.array(d) partial_data.append( np.split(array, self.dp_world_size)[self.dp_rank]) yield partial_data[:len(self.feed_list)] batch_data = None
def hasher(self, version_field): from paddle.fluid.layers.utils import flatten md5 = hashlib.md5() for field in version_field._fields: elem = getattr(version_field, field) if not elem: continue if isinstance(elem, (list, tuple, dict)): flat_elem = flatten(elem) md5 = combine_hash(md5, tuple(flat_elem)) else: raise RuntimeError( "Support types with list, tuple and dict, but received {} with {}." .format(type(elem), elem)) return md5.hexdigest()
def _verify_input_spec(self, input_spec): """ Verifies the `input_spec` and its element type is valid. """ if not isinstance(input_spec, (tuple, list)): raise TypeError( "The type(input_spec) should be one of (tuple, list), but received {}." .format(type_name(input_spec))) input_spec = tuple(input_spec) for spec in flatten(input_spec): if not isinstance(spec, paddle.static.InputSpec): raise ValueError( "The type(elem) from input_spec should be `InputSpec`, but received {}." .format(type_name(spec))) return input_spec
def __init__(self, function, input_spec=None): self._dygraph_function = function if input_spec is None: self._input_spec = None self._flat_input_spec = None else: self._input_spec = self._verify_input_spec(input_spec) self._flat_input_spec = flatten(self._input_spec) # parse full argument names list. self._arg_names, self._default_kwargs = parse_arg_and_kwargs(function) # parse *args self.varargs_name = parse_varargs_name(function) if self.varargs_name is not None and isinstance( function.__self__, TranslatedLayer): self._arg_names += function.__self__._input_args_names
def _build(self): for mode in self._modes: serial_main_prog = self._serial_main_progs.get(mode, None) if serial_main_prog is not None: return losses = [] metrics = [] serial_main_prog = self._orig_main_prog.clone() serial_startup_prog = self._orig_startup_prog.clone() with static.program_guard(serial_main_prog, serial_startup_prog): inputs_spec = self.inputs_spec labels_spec = self.labels_spec if self.labels_spec else [] inputs = [s._create_feed_layer() for s in inputs_spec] labels = [s._create_feed_layer() for s in labels_spec] outputs = to_list(self.model(*inputs)) if mode != "predict" and self._loss: losses = to_list(self._loss(*(outputs + labels))) if mode != "predict": for metric in self._metrics: metrics.extend( to_list(metric.compute(*(outputs + labels)))) default_ctx = get_default_distributed_context() if not default_ctx.has_annotation or self._default_strategy: inputs = [self._set_data_parallel(var) for var in inputs] labels = [self._set_data_parallel(var) for var in labels] # self._feed_vars[mode] = {"inputs": inputs, "labels": labels} feed_vars = {"inputs": inputs, "labels": labels} # self._fetch_vars[mode] = { # "outputs": flatten(outputs), # "loss": losses, # "metrics": metrics # } fetch_vars = { "outputs": flatten(outputs), "loss": losses, "metrics": metrics } self._dist_contexts[mode] = DistributedContext( serial_main_prog, serial_startup_prog, self._optimizer, losses, feed_vars, fetch_vars, self.cluster, self.strategy) self._dist_contexts[mode].gradient_scale = self._gradient_scale
def _prepare(self, inputs): """ Prepare inputs, outputs, attrs. """ assert isinstance(inputs, (tuple, list)) # Flatten inputs with nested structure into single list. flatten_inputs = flatten(inputs) # Convert variable into VarBase and feed in training data. input_vars = [] for i, value in enumerate(flatten_inputs): if isinstance(value, np.ndarray): var = core.VarBase(value=value, name=self._inputs[i].desc.name(), persistable=False, place=framework._current_expected_place(), zero_copy=True) elif isinstance(value, core.VarBase): var = value var.name = self._inputs[i].desc.name() else: continue input_vars.append(var) # Create VarBase to receive output data. out_vars = [] for idx in self._outputs.var_ids: var = self._outputs[idx] assert isinstance(var, framework.Variable) var_desc = var.desc var_base = core.VarBase(var_desc.dtype(), var_desc.shape(), var_desc.name(), var_desc.type(), False) out_vars.append(var_base) # Hold forward variables tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [], "program_out_scope", core.VarDesc.VarType.STEP_SCOPES, True) tmp_scope_vec.value().set_scope(self._inner_scope) return input_vars, out_vars, tmp_scope_vec
def _get_output_vars(outputs, output_spec): name_no_exists_error = "The tensor `%s` does not exists. " \ "Please make sure the name of example Tensor " \ "in configs.output_spec is the output tensor of " \ "Layer.forward method." result_list = [] output_vars_dict = OrderedDict() for var in flatten(outputs): if isinstance(var, Variable): output_vars_dict[var.name] = var if output_spec is None: result_list = output_vars_dict.values() elif output_spec is not None and len(output_spec) == len(output_vars_dict): result_list = output_vars_dict.values() for var in output_spec: if var.name not in output_vars_dict: warnings.warn(name_no_exists_error % var.name) else: for var in output_spec: if var.name not in output_vars_dict: raise ValueError(name_no_exists_error % var.name) else: result_list.append(output_vars_dict[var.name]) return result_list
def tolist(self): """ Flattens the nested sequences into single list. """ return flatten(self.__raw_input)
def do_predict(args): device = paddle.set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None inputs = [ Input([None, None], "int64", name="src_word"), Input([None, None], "int64", name="src_pos"), Input([None, args.n_head, None, None], "float32", name="src_slf_attn_bias"), Input([None, args.n_head, None, None], "float32", name="trg_src_attn_bias"), ] # define data dataset = Seq2SeqDataset(fpattern=args.predict_file, src_vocab_fpath=args.src_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath, token_delimiter=args.token_delimiter, start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], byte_data=True) args.src_vocab_size, args.trg_vocab_size, args.bos_idx, args.eos_idx, \ args.unk_idx = dataset.get_vocab_summary() trg_idx2word = Seq2SeqDataset.load_dict(dict_path=args.trg_vocab_fpath, reverse=True, byte_data=True) batch_sampler = Seq2SeqBatchSampler(dataset=dataset, use_token_batch=False, batch_size=args.batch_size, max_length=args.max_length) data_loader = DataLoader(dataset=dataset, batch_sampler=batch_sampler, places=device, collate_fn=partial(prepare_infer_input, bos_idx=args.bos_idx, eos_idx=args.eos_idx, src_pad_idx=args.eos_idx, n_head=args.n_head), num_workers=0, return_list=True) # define model model = paddle.Model( InferTransformer(args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len), inputs) model.prepare() # load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model.load(args.init_from_params) # TODO: use model.predict when support variant length f = open(args.output_file, "wb") for data in data_loader(): finished_seq = model.test_batch(inputs=flatten(data))[0] finished_seq = np.transpose(finished_seq, [0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = [trg_idx2word[id] for id in id_list] sequence = b" ".join(word_list) + b"\n" f.write(sequence)
def do_predict(args): device = paddle.set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # define model inputs = [ Input( [None, None], "int64", name="src_word"), Input( [None], "int64", name="src_length"), ] # def dataloader dataset = Seq2SeqDataset( fpattern=args.infer_file, src_vocab_fpath=args.vocab_prefix + "." + args.src_lang, trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang, token_delimiter=None, start_mark="<s>", end_mark="</s>", unk_mark="<unk>") trg_idx2word = Seq2SeqDataset.load_dict( dict_path=args.vocab_prefix + "." + args.tar_lang, reverse=True) (args.src_vocab_size, args.trg_vocab_size, bos_id, eos_id, unk_id) = dataset.get_vocab_summary() batch_sampler = Seq2SeqBatchSampler( dataset=dataset, use_token_batch=False, batch_size=args.batch_size) data_loader = DataLoader( dataset=dataset, batch_sampler=batch_sampler, places=device, collate_fn=partial( prepare_infer_input, bos_id=bos_id, eos_id=eos_id, pad_id=eos_id), num_workers=0, return_list=True) model_maker = AttentionInferModel if args.attention else BaseInferModel model = paddle.Model( model_maker( args.src_vocab_size, args.tar_vocab_size, args.hidden_size, args.hidden_size, args.num_layers, args.dropout, bos_id=bos_id, eos_id=eos_id, beam_size=args.beam_size, max_out_len=256), inputs=inputs) model.prepare() # load the trained model assert args.reload_model, ( "Please set reload_model to load the infer model.") model.load(args.reload_model) # TODO(guosheng): use model.predict when support variant length with io.open(args.infer_output_file, 'w', encoding='utf-8') as f: for data in data_loader(): finished_seq = model.test_batch(inputs=flatten(data))[0] finished_seq = finished_seq[:, :, np.newaxis] if len( finished_seq.shape) == 2 else finished_seq finished_seq = np.transpose(finished_seq, [0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): id_list = post_process_seq(beam, bos_id, eos_id) word_list = [trg_idx2word[id] for id in id_list] sequence = " ".join(word_list) + "\n" f.write(sequence) break
def _run_one_epoch(self, data_loader, callbacks, mode, metrics_name, epoch=None): size = len(data_loader) if hasattr(data_loader, '__len__') else None logs = { 'steps': size, 'metrics_name': metrics_name, } if mode == 'train': assert epoch is not None, 'when mode is train, epoch must be given' callbacks.on_epoch_begin(epoch) for step, data in enumerate(data_loader): # data might come from different types of data_loader and have # different format, as following: # 1. DataLoader in static graph: # [[input1, input2, ..., label1, lable2, ...]] # 2. DataLoader in dygraph # [input1, input2, ..., label1, lable2, ...] # 3. custumed iterator yield concated inputs and labels: # [input1, input2, ..., label1, lable2, ...] # 4. custumed iterator yield seperated inputs and labels: # ([input1, input2, ...], [label1, lable2, ...]) # To handle all of these, flatten (nested) list to list. data = flatten(data) # LoDTensor.shape is callable, where LoDTensor comes from # DataLoader in static graph batch_size = data[0].shape()[0] if callable( data[0].shape) else data[0].shape[0] callbacks.on_batch_begin(mode, step, logs) if mode == 'train': outs = self.train(data[:len(self._inputs)], data[len(self._inputs):]) else: outs = self.eval(data[:len(self._inputs)], data[len(self._inputs):]) # losses loss = outs[0] if self._metrics else outs metrics = [[l[0] for l in loss]] # metrics for metric in self._metrics: res = metric.accumulate() metrics.extend(to_list(res)) assert len(metrics_name) == len(metrics) for k, v in zip(metrics_name, metrics): logs[k] = v logs['step'] = step if mode == 'train' or self._adapter._merge_count.get( mode + '_batch', 0) <= 0: logs['batch_size'] = batch_size * ParallelEnv().nranks else: logs['batch_size'] = self._adapter._merge_count[mode + '_batch'] callbacks.on_batch_end(mode, step, logs) self._reset_metrics() if mode == 'train': assert epoch is not None, 'when mode is train, epoch must be given' callbacks.on_epoch_end(epoch) return logs