def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None): dtype = input.dtype if dtype not in ["float32", "float64"]: raise ValueError("Layer tensor.var() only supports floating-point " "dtypes, but received {}.".format(dtype)) rank = len(input.shape) axes = axis if axis != None and axis != [] else range(rank) axes = [e if e >= 0 else e + rank for e in axes] inp_shape = input.shape if fluid.in_dygraph_mode() else layers.shape(input) mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name) tmp = layers.reduce_mean((input - mean)**2, dim=axis, keep_dim=keepdim, name=name) if unbiased: n = 1 for i in axes: n *= inp_shape[i] if not fluid.in_dygraph_mode(): n = layers.cast(n, dtype) zero_const = layers.fill_constant(shape=[1], dtype=dtype, value=0.0) factor = layers.where(n > 1.0, n / (n - 1.0), zero_const) else: factor = n / (n - 1.0) if n > 1.0 else 0.0 tmp *= factor if out: layers.assign(input=tmp, output=out) return out else: return tmp
def predict(self, test_data, batch_size=1, num_workers=0): """ FIXME: add more comments and usage Args: test_data (Dataset|DataLoader): An iterable data loader is used for predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. num_workers (int): the number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. """ if fluid.in_dygraph_mode(): feed_list = None else: feed_list = [x.forward() for x in self._inputs + self._labels] if test_data is not None and isinstance(test_data, Dataset): test_sampler = DistributedBatchSampler(test_data, batch_size=batch_size) test_loader = DataLoader(test_data, batch_sampler=test_sampler, places=self._place, feed_list=feed_list, num_workers=num_workers, return_list=True) else: test_loader = test_data self._test_dataloader = test_loader loader = test_loader if not isinstance(test_loader, Iterable): loader = test_loader() outputs = None for data in tqdm.tqdm(loader): if not fluid.in_dygraph_mode(): data = data[0] outs = self.test(*data) if outputs is None: outputs = outs else: outputs = [ np.vstack([x, outs[i]]) for i, x in enumerate(outputs) ] self._test_dataloader = None if test_loader is not None and self._adapter._nranks > 1 \ and isinstance(test_loader, DataLoader): outputs = [o[:len(test_loader.dataset)] for o in outputs] return outputs
def __call__(self, *args, **kwargs): """ Supports to call the returned instance with input `args` and `kwargs` directly. Args: *args(tuple): tuple of all input arguments from original decorated function. **kwargs(dict): dict of all input keyward arguments from original decorated function. Return: Outputs of decorated function. """ # 1. call dygraph function directly if not enable `declarative` if not self._program_trans.enable_to_static: # NOTE(liym27): # Here calls `warnings.warn` but not `logging_utils.warn` because by default warnings.warn(message) # will show up **only once**. StaticFunction.__call__ will run many times, it is appropriate to # display this warning message only once. warnings.warn( "The decorator '@paddle.jit.to_static' does NOT work when setting ProgramTranslator.enable to False. " "We will just return dygraph output. If you would like to get static graph output, please call API " "ProgramTranslator.enable(True)") return self._call_dygraph_function(*args, **kwargs) if not in_dygraph_mode(): raise RuntimeError( "Failed to run the callable object {} decorated by '@paddle.jit.to_static', " "because it is NOT in dynamic mode. Please disable the static mode to enter dynamic mode with the " "following API: paddle.disable_static().".format( self.dygraph_function)) # 2. trace ops from dygraph layers and cache the generated program. args, kwargs = self._function_spec.unified_args_and_kwargs( args, kwargs) try: concrete_program, partial_program_layer = self.get_concrete_program( *args, **kwargs) # 3. synchronize self.training attribute. if isinstance(self._class_instance, layers.Layer): partial_program_layer.training = self._class_instance.training # 4. return outputs. try: return partial_program_layer(args) except Exception as e: if not hasattr(e, error.ERROR_DATA): # runtime error error.attach_error_data(e, in_runtime=True) raise except Exception as e: error_data = getattr(e, error.ERROR_DATA, None) if error_data: error_data.raise_new_exception() else: logging_utils.warn( "Please file an issue at 'https://github.com/PaddlePaddle/Paddle/issues'" " if you can't handle this {} yourself.".format(type(e))) raise e
def optimizer_setting(parameter_list=None): total_images = IMAGENET1000 step = int(math.ceil(float(total_images) / batch_size)) epochs = [30, 60, 90] bd = [step * e for e in epochs] lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] if fluid.in_dygraph_mode(): optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay), parameter_list=parameter_list) else: optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay)) return optimizer
def optimizer_setting(params, parameter_list=None): ls = params["learning_strategy"] if "total_images" not in params: total_images = 6149 else: total_images = params["total_images"] batch_size = ls["batch_size"] step = int(math.ceil(float(total_images) / batch_size)) bd = [step * e for e in ls["epochs"]] lr = params["lr"] num_epochs = params["num_epochs"] if fluid.in_dygraph_mode(): optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.cosine_decay(learning_rate=lr, step_each_epoch=step, epochs=num_epochs), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay), parameter_list=parameter_list) else: optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.cosine_decay(learning_rate=lr, step_each_epoch=step, epochs=num_epochs), momentum=momentum_rate, regularization=fluid.regularizer.L2Decay(l2_decay)) return optimizer
def optimizer_setting(params, parameter_list=None): ls = params["learning_strategy"] if ls["name"] == "piecewise_decay": if "total_images" not in params: total_images = 1281167 else: total_images = params["total_images"] batch_size = ls["batch_size"] step = int(total_images / batch_size + 1) bd = [step * e for e in ls["epochs"]] base_lr = params["lr"] lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] if fluid.in_dygraph_mode(): optimizer = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=parameter_list) else: optimizer = fluid.optimizer.SGD(learning_rate=0.01) # TODO(minqiyang): Add learning rate scheduler support to dygraph mode # optimizer = fluid.optimizer.Momentum( # learning_rate=params["lr"], # learning_rate=fluid.layers.piecewise_decay( # boundaries=bd, values=lr), # momentum=0.9, # regularization=fluid.regularizer.L2Decay(1e-4)) return optimizer
def check_type(op_str, x, y, binary_op): op = getattr(paddle, op_str) error_type = TypeError if isinstance(x, np.ndarray): x = paddle.to_tensor(x) y = paddle.to_tensor(y) error_type = BaseException if binary_op: if type_str_map['x'] != 'bool' or type_str_map['y'] != 'bool': unit_test.assertRaises(error_type, op, x=x, y=y) if not fluid.in_dygraph_mode(): unit_test.assertRaises(error_type, op, x=x, y=y, out=1) else: if type_str_map['x'] != 'bool': unit_test.assertRaises(error_type, op, x=x) if not fluid.in_dygraph_mode(): unit_test.assertRaises(error_type, op, x=x, out=1)
def __call__(self, *args, **kwargs): """ Supports to call the returned instance with input `args` and `kwargs` directly. Args: *args(tuple): tuple of all input arguments from original decorated function. **kwargs(dict): dict of all input keyward arguments from original decorated function. Return: Outputs of decorated function. """ # 1. call dygraph function directly if not enable `declarative` if not self._program_trans.enable_declarative: logging_utils.warn( "The decorator '@paddle.jit.to_static' does NOT work when setting ProgramTranslator.enable=False. " "We will just return dygraph output.") return self._call_dygraph_function(*args, **kwargs) if not in_dygraph_mode() and self._program_trans.enable_declarative: raise RuntimeError( "Failed to run the callable object {} decorated by '@paddle.jit.to_static', " "because it does NOT in dynamic mode. Please disable the static mode to enter dynamic mode with the " "following API: paddle.disable_static().".format( self.dygraph_function)) # 2. trace ops from dygraph layers and cache the generated program. args, kwargs = self._function_spec.unified_args_and_kwargs(args, kwargs) try: concrete_program, partial_program_layer = self.get_concrete_program( *args, **kwargs) # 3. synchronize self.training attribute. if isinstance(self._class_instance, layers.Layer): partial_program_layer.training = self._class_instance.training # 4. return outputs. return partial_program_layer(args) except Exception as e: if not hasattr(e, ERROR_DATA): # runtime error attach_error_data(e, in_runtime=True) error_data = getattr(e, ERROR_DATA, None) if error_data: new_exception = error_data.create_exception() if six.PY3: # NOTE(liym27): # 1. Why `raise new_exception from None`? # In Python 3, by default, an new exception is raised with trace information of the caught exception. # This only raises new_exception and hides unwanted implementation details from tracebacks of the # caught exception. # 2. Use exec to bypass syntax error checking in Python 2. six.exec_("raise new_exception from None") else: raise new_exception else: raise
def calc_gradients(outputs, inputs, no_grad_set): if fluid.in_dygraph_mode(): return fluid.dygraph.grad(outputs=outputs, inputs=inputs, no_grad_vars=no_grad_set, create_graph=True) else: return fluid.gradients(targets=outputs, inputs=inputs, no_grad_set=no_grad_set)
def forward(self, input): if fluid.in_dygraph_mode(): out, _, _ = fluid.core.ops.instance_norm( input, self.scale, self.bias, 'epsilon', self.epsilon) return out else: return fluid.layers.instance_norm( input, epsilon=self.epsilon, param_attr=fluid.ParamAttr(self.scale.name), bias_attr=fluid.ParamAttr(self.bias.name))
def build_optimizer(layer, cfg, loss=None): learning_rate = 1e-3 beta1 = 0.5 beta2 = 0.999 if fluid.in_dygraph_mode(): return fluid.optimizer.Adam(learning_rate=learning_rate, beta1=beta1, beta2=beta2, parameter_list=layer.parameters()) else: optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, beta1=beta1, beta2=beta2) optimizer.minimize(loss, parameter_list=layer.parameters()) return optimizer
def __init__(self): super(Model, self).__init__(self.__class__.__name__) self.mode = 'train' self._inputs = None self._labels = None self._loss_function = None self._loss_weights = None self._optimizer = None self._device = None self._optimizer = None self._test_dataloader = None # init backend if fluid.in_dygraph_mode(): self._adapter = DynamicGraphAdapter(self) else: self._adapter = StaticGraphAdapter(self)
def start(self, places=None): """start Pyreader""" if places is None: places = F.cuda_places() if F.core.is_compiled_with_cuda( ) else F.cpu_places() #assert self.pyreader is not None, 'use Dataset.features to build net first, then start dataset' def _gen(): try: for idx, i in enumerate(self.generator()): yield i except Exception as e: log.exception(e) raise e r = F.io.PyReader(feed_list=self.placeholders(), capacity=50, iterable=True, return_list=F.in_dygraph_mode()) r.decorate_batch_generator(_gen, places=places) return r()
def optimizer_setting(params, parameter_list=None): ls = params["learning_strategy"] if ls["name"] == "piecewise_decay": if "total_images" not in params: total_images = 6149 else: total_images = params["total_images"] # TODO(Yancey1989): using lr decay if it is ready. #batch_size = ls["batch_size"] #step = int(total_images / batch_size + 1) #bd = [step * e for e in ls["epochs"]] #base_lr = params["lr"] #lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] if fluid.in_dygraph_mode(): optimizer = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=parameter_list) else: optimizer = fluid.optimizer.SGD(learning_rate=0.01) return optimizer
def optimizer_setting(params, parameter_list=None): ls = params["learning_strategy"] if ls["name"] == "piecewise_decay": if "total_images" not in params: total_images = 1281167 else: total_images = params["total_images"] batch_size = ls["batch_size"] step = int(total_images / batch_size + 1) bd = [step * e for e in ls["epochs"]] base_lr = params["lr"] lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] if fluid.in_dygraph_mode(): optimizer = fluid.optimizer.SGD(learning_rate=0.01, parameter_list=parameter_list) else: optimizer = fluid.optimizer.SGD(learning_rate=0.01) return optimizer
def prepare_distributed_context(place=None): if place is None: place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \ else fluid.CUDAPlace(0) strategy = ParallelStrategy() strategy.nranks = ParallelEnv().nranks strategy.local_rank = ParallelEnv().local_rank strategy.trainer_endpoints = ParallelEnv().trainer_endpoints strategy.current_endpoint = ParallelEnv().current_endpoint if strategy.nranks < 2: return global _parallel_context_initialized if not _parallel_context_initialized and isinstance( place, fluid.CUDAPlace): def _init_context(): communicator_prog = fluid.Program() init_communicator(communicator_prog, strategy.local_rank, strategy.nranks, True, strategy.current_endpoint, strategy.trainer_endpoints) exe = fluid.Executor(place) exe.run(communicator_prog) if fluid.in_dygraph_mode(): fluid.disable_dygraph() _init_context() fluid.enable_dygraph(place) else: _init_context() else: assert ("Only support CUDAPlace for now.") _parallel_context_initialized = True return strategy
def func_hook_in_double_grad(self): def double_print_hook(grad): grad = grad * 2 print(grad) return grad x = paddle.ones(shape=[1], dtype='float32') x.stop_gradient = False # hook only works in backward # for forward var x, the x.grad generated in # paddle.grad will not deal with by hook x.register_hook(double_print_hook) y = x * x # Since y = x * x, dx = 2 * x dx = paddle.grad(outputs=[y], inputs=[x], create_graph=True, retain_graph=True)[0] z = y + dx self.assertTrue(x.grad is None) # If create_graph = True, the gradient of dx # would be backpropagated. Therefore, # z = x * x + dx = x * x + 2 * x, and # x.gradient() = 2 * x + 2 = 4.0 # after changed by hook: 8.0 # TODO(wuweilong): enable this case when DoubleGrad in eager mode is ready if fluid.in_dygraph_mode(): pass else: z.backward() self.assertTrue(np.array_equal(x.grad.numpy(), np.array([8.])))
def forward(self, inputs, initial_states=None, sequence_length=None, **kwargs): if fluid.in_dygraph_mode(): class OutputArray(object): def __init__(self, x): self.array = [x] def append(self, x): self.array.append(x) def _maybe_copy(state, new_state, step_mask): # TODO: use where_op new_state = fluid.layers.elementwise_mul( new_state, step_mask, axis=0) - fluid.layers.elementwise_mul(state, (step_mask - 1), axis=0) return new_state flat_inputs = flatten(inputs) batch_size, time_steps = ( flat_inputs[0].shape[self.batch_index], flat_inputs[0].shape[self.time_step_index]) if initial_states is None: initial_states = self.cell.get_initial_states( batch_ref=inputs, batch_dim_idx=self.batch_index) if not self.time_major: inputs = map_structure( lambda x: fluid.layers.transpose(x, [1, 0] + list( range(2, len(x.shape)))), inputs) if sequence_length is not None: mask = fluid.layers.sequence_mask( sequence_length, maxlen=time_steps, dtype=flatten(initial_states)[0].dtype) mask = fluid.layers.transpose(mask, [1, 0]) if self.is_reverse: inputs = map_structure( lambda x: fluid.layers.reverse(x, axis=[0]), inputs) mask = fluid.layers.reverse( mask, axis=[0]) if sequence_length is not None else None states = initial_states outputs = [] for i in range(time_steps): step_inputs = map_structure(lambda x: x[i], inputs) step_outputs, new_states = self.cell(step_inputs, states, **kwargs) if sequence_length is not None: new_states = map_structure( partial(_maybe_copy, step_mask=mask[i]), states, new_states) states = new_states if i == 0: outputs = map_structure(lambda x: OutputArray(x), step_outputs) else: map_structure(lambda x, x_array: x_array.append(x), step_outputs, outputs) final_outputs = map_structure( lambda x: fluid.layers.stack(x.array, axis=self.time_step_index), outputs) if self.is_reverse: final_outputs = map_structure( lambda x: fluid.layers.reverse(x, axis=self.time_step_index), final_outputs) final_states = new_states else: final_outputs, final_states = fluid.layers.rnn( self.cell, inputs, initial_states=initial_states, sequence_length=sequence_length, time_major=self.time_major, is_reverse=self.is_reverse, **kwargs) return final_outputs, final_states
def forward(self, inputs, initial_states=None, sequence_length=None, **kwargs): if F.in_dygraph_mode(): class OutputArray(object): def __init__(self, x): self.array = [x] def append(self, x): self.array.append(x) def _maybe_copy(state, new_state, step_mask): # TODO: use where_op new_state = L.elementwise_mul(new_state, step_mask, axis=0) - \ L.elementwise_mul(state, (step_mask - 1), axis=0) return new_state #logging.info("inputs shape: {}".format(inputs.shape)) flat_inputs = U.flatten(inputs) #logging.info("flat inputs len: {}".format(len(flat_inputs))) #logging.info("flat inputs[0] shape: {}".format(flat_inputs[0].shape)) batch_size, time_steps = ( flat_inputs[0].shape[self.batch_index], flat_inputs[0].shape[self.time_step_index]) #logging.info("batch_size: {}".format(batch_size)) #logging.info("time_steps: {}".format(time_steps)) if initial_states is None: initial_states = self.cell.get_initial_states( batch_ref=inputs, batch_dim_idx=self.batch_index) if not self.time_major: # 如果第一维不是时间步 则第一维和第二维交换 # 第一维为时间步 inputs = U.map_structure( lambda x: L.transpose(x, [1, 0] + list( range(2, len(x.shape)))), inputs) if sequence_length is not None: mask = L.sequence_mask( sequence_length, maxlen=time_steps, dtype=U.flatten(initial_states)[0].dtype) # 同样 第一维为时间步 mask = L.transpose(mask, [1, 0]) if self.is_reverse: # 如果反向 # 则第一维反向 inputs = U.map_structure(lambda x: L.reverse(x, axis=[0]), inputs) mask = L.reverse(mask, axis=[0]) if sequence_length is not None else None states = initial_states outputs = [] # 遍历时间步 for i in range(time_steps): # 取该时间步的输入 step_inputs = U.map_structure(lambda x: x[i], inputs) # 输入当前输入和状态 # 得到输出和新状态 step_outputs, new_states = self.cell(step_inputs, states, **kwargs) if sequence_length is not None: # 如果有mask 则被mask的地方 用原state的数 # _maybe_copy: 未mask的部分用new_states, 被mask的部分用states new_states = U.map_structure( partial(_maybe_copy, step_mask=mask[i]), states, new_states) states = new_states #logging.info("step_output shape: {}".format(step_outputs.shape)) if i == 0: # 初始时,各输出 outputs = U.map_structure(lambda x: OutputArray(x), step_outputs) else: # 各输出加入对应list中 U.map_structure(lambda x, x_array: x_array.append(x), step_outputs, outputs) # 最后按时间步的维度堆叠 final_outputs = U.map_structure( lambda x: L.stack(x.array, axis=self.time_step_index), outputs) #logging.info("final_outputs shape: {}".format(final_outputs.shape)) if self.is_reverse: # 如果是反向 则最后结果也反向一下 final_outputs = U.map_structure( lambda x: L.reverse(x, axis=self.time_step_index), final_outputs) final_states = new_states else: final_outputs, final_states = L.rnn( self.cell, inputs, initial_states=initial_states, sequence_length=sequence_length, time_major=self.time_major, is_reverse=self.is_reverse, **kwargs) return final_outputs, final_states
def do_predict(args): device = set_device("gpu" if args.use_gpu else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # define model inputs = [ Input( [None, None], "int64", name="src_word"), Input( [None], "int64", name="src_length"), ] # def dataloader dataset = Seq2SeqDataset( fpattern=args.infer_file, src_vocab_fpath=args.vocab_prefix + "." + args.src_lang, trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang, token_delimiter=None, start_mark="<s>", end_mark="</s>", unk_mark="<unk>") trg_idx2word = Seq2SeqDataset.load_dict( dict_path=args.vocab_prefix + "." + args.tar_lang, reverse=True) (args.src_vocab_size, args.trg_vocab_size, bos_id, eos_id, unk_id) = dataset.get_vocab_summary() batch_sampler = Seq2SeqBatchSampler( dataset=dataset, use_token_batch=False, batch_size=args.batch_size) data_loader = DataLoader( dataset=dataset, batch_sampler=batch_sampler, places=device, feed_list=None if fluid.in_dygraph_mode() else [x.forward() for x in inputs], collate_fn=partial( prepare_infer_input, bos_id=bos_id, eos_id=eos_id, pad_id=eos_id), num_workers=0, return_list=True) model_maker = AttentionInferModel if args.attention else BaseInferModel model = model_maker( args.src_vocab_size, args.tar_vocab_size, args.hidden_size, args.hidden_size, args.num_layers, args.dropout, bos_id=bos_id, eos_id=eos_id, beam_size=args.beam_size, max_out_len=256) model.prepare(inputs=inputs) # load the trained model assert args.reload_model, ( "Please set reload_model to load the infer model.") model.load(args.reload_model) # TODO(guosheng): use model.predict when support variant length with io.open(args.infer_output_file, 'w', encoding='utf-8') as f: for data in data_loader(): finished_seq = model.test_batch(inputs=flatten(data))[0] finished_seq = finished_seq[:, :, np.newaxis] if len( finished_seq.shape) == 2 else finished_seq finished_seq = np.transpose(finished_seq, [0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): id_list = post_process_seq(beam, bos_id, eos_id) word_list = [trg_idx2word[id] for id in id_list] sequence = " ".join(word_list) + "\n" f.write(sequence) break
def get_tracer_mode(self): assert fluid.in_dygraph_mode(), "Dygraph mode must be enabled"
def evaluate( self, eval_data, batch_size=1, log_freq=10, verbose=2, num_workers=0, callbacks=None, ): """ FIXME: add more comments and usage Args: eval_data (Dataset|DataLoader): An iterable data loader is used for evaluation. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. log_freq (int): The frequency, in number of steps, the eval logs are printed. verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, 1 = progress bar, 2 = one line per epoch. num_workers (int): The number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. callbacks (Callback|None): A list of `Callback` instances to apply during training. If None, `ProgBarLogger` and `ModelCheckpoint` are automatically inserted. """ if fluid.in_dygraph_mode(): feed_list = None else: feed_list = [x.forward() for x in self._inputs + self._labels] if eval_data is not None and isinstance(eval_data, Dataset): eval_sampler = DistributedBatchSampler(eval_data, batch_size=batch_size) eval_loader = DataLoader(eval_data, batch_sampler=eval_sampler, places=self._place, feed_list=feed_list, num_workers=num_workers, return_list=True) else: eval_loader = eval_data self._test_dataloader = eval_loader metrics_name = self._metrics_name() cbks = config_callbacks( callbacks, model=self, log_freq=log_freq, verbose=verbose, metrics=self._metrics_name(), ) loader = eval_loader if not isinstance(eval_loader, Iterable): loader = eval_loader() eval_steps = len(loader) if hasattr(loader, '__len__') else None cbks.on_begin('eval', { 'steps': eval_steps, 'metrics_name': metrics_name }) logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name) cbks.on_end('eval', logs) self._test_dataloader = None eval_result = {} for k in self._metrics_name(): eval_result[k] = logs[k] return eval_result
def prepare(self, optimizer=None, loss_function=None, metrics=None, inputs=None, labels=None, device=None): """ FIXME: add comments Args: optimizer (Optimizer|None): optimizer must be set in training and should be a Optimizer instance. It can be None in eval and test mode. loss_function (Loss|None): loss function must be set in training and should be a Loss instance. It can be None when there is no loss. metrics (Metric|list of Metric|None): if metrics is set, all metric will be calculate and output in train/eval mode. inputs (Input|list|dict|None): inputs, entry points of network, could be a Input layer, or lits of Input layers, or dict (name: Input), or None. For static graph, inputs must be set. For dynamic graph, it could be None. labels (Input|list|None): labels, entry points of network, could be a Input layer or lits of Input layers, or None. For static graph, if set loss_function in Model.prepare(), it must be set. Otherwise, it could be None. device (str|None): specify device type, 'CPU' or 'GPU'. If None, automatically select device according to installation package version. """ if isinstance(device, fluid.CUDAPlace) or \ (isinstance(device, six.string_types) and device.lower() == 'gpu') \ or (device is None and fluid.is_compiled_with_cuda()): if isinstance(device, fluid.CUDAPlace): self._place = device else: self._place = fluid.CUDAPlace(ParallelEnv().dev_id) \ if ParallelEnv().nranks > 1 else fluid.CUDAPlace(0) global _parallel_context_initialized if ParallelEnv().nranks > 1 and not _parallel_context_initialized: if fluid.in_dygraph_mode(): fluid.disable_dygraph() fluid.enable_dygraph(self._place) fluid.dygraph.parallel.prepare_context() else: prepare_distributed_context(self._place) _parallel_context_initialized = True elif isinstance(device, fluid.CPUPlace): self._place = device elif (isinstance(device, six.string_types) and device.lower() == 'cpu') \ or (device is None): self._place = fluid.CPUPlace() else: raise ValueError( "Expected device in ('gpu', 'cpu', fluid.CUDAPlace, fluid.CPUPlace, None), \ but got {}".format(device)) self._optimizer = optimizer if loss_function: if not isinstance(loss_function, Loss): raise TypeError( "'loss_function' must be sub classes of 'Loss'") self._loss_function = loss_function if not in_dygraph_mode(): if not isinstance(inputs, (list, dict, Input)): raise TypeError( "'inputs' must be list or dict in static graph mode") if loss_function and not isinstance(labels, (list, Input)): raise TypeError("'labels' must be list in static graph mode") metrics = metrics or [] for metric in to_list(metrics): assert isinstance(metric, Metric), \ "{} is not sub class of Metric".format( metric.__class__.__name__) self._metrics = to_list(metrics) self._inputs = to_list(inputs) if not isinstance(inputs, dict) else [ inputs[n] for n in extract_args(self.forward) if n != 'self' ] self._labels = to_list(labels) if not in_dygraph_mode(): self._adapter.prepare()
def train(self): if F.in_dygraph_mode(): super(ErnieModel, self).train() self.training = True for l in self.sublayers(): l.training = True
def eval(self): if F.in_dygraph_mode(): super(ErnieModel, self).eval() self.training = False for l in self.sublayers(): l.training = False
def __impl__(*args, **kwargs): if fluid.in_dygraph_mode(): return func(*args, **kwargs) else: with fluid.dygraph.guard(): return func(*args, **kwargs)
def test_func2(self): # After test_func1 executed, if fluid.dygraph.guard() in test_func1 safely exited, # fluid.in_dygraph_mode() should be false. self.assertEqual(fluid.in_dygraph_mode(), False)
def fit( self, train_data=None, eval_data=None, batch_size=1, epochs=1, eval_freq=1, log_freq=10, save_dir=None, save_freq=1, verbose=2, drop_last=False, shuffle=True, num_workers=0, callbacks=None, ): """ FIXME: add more comments and usage Args: train_data (Dataset|DataLoader): An iterable data loader is used for train. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader is recomended. eval_data (Dataset|DataLoader): An iterable data loader is used for evaluation at the end of epoch. If None, will not do evaluation. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. epochs (int): Integer number. The number of epochs to train the model. eval_freq (int): The frequency, in number of epochs, an evalutation is performed. log_freq (int): The frequency, in number of steps, the training logs are printed. save_dir(str|None): The directory to save checkpoint during training. If None, will not save checkpoint. save_freq (int): The frequency, in number of epochs, to save checkpoint. verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent, 1 = progress bar, 2 = one line per epoch. drop_last (bool): whether drop the last incomplete batch of train_data when dataset size is not divisible by the batch size. When train_data is an instance of Dataloader, this parameter will be ignored. shuffle (bool): whther to shuffle train_data. When train_data is an instance of Dataloader, this parameter will be ignored. num_workers (int): the number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are both the instance of Dataloader, this parameter will be ignored. callbacks (Callback|None): A list of `Callback` instances to apply during training. If None, `ProgBarLogger` and `ModelCheckpoint` are automatically inserted. """ assert train_data is not None, \ "train_data must be given!" if fluid.in_dygraph_mode(): feed_list = None else: feed_list = [x.forward() for x in self._inputs + self._labels] if isinstance(train_data, Dataset): train_sampler = DistributedBatchSampler(train_data, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last) train_loader = DataLoader(train_data, batch_sampler=train_sampler, places=self._place, feed_list=feed_list, num_workers=num_workers, return_list=True) else: train_loader = train_data if eval_data is not None and isinstance(eval_data, Dataset): eval_sampler = DistributedBatchSampler(eval_data, batch_size=batch_size) eval_loader = DataLoader(eval_data, batch_sampler=eval_sampler, places=self._place, feed_list=feed_list, num_workers=num_workers, return_list=True) elif eval_data is not None: eval_loader = eval_data else: eval_loader = None do_eval = eval_loader is not None self._test_dataloader = eval_loader metrics_name = self._metrics_name() steps = len(train_loader) if hasattr(train_loader, '__len__') else None cbks = config_callbacks( callbacks, model=self, epochs=epochs, steps=steps, log_freq=log_freq, save_freq=save_freq, save_dir=save_dir, verbose=verbose, metrics=self._metrics_name(), ) cbks.on_begin('train') for epoch in range(epochs): # FIXME: adapt to DataLoader loader = train_loader if not isinstance(train_loader, Iterable): loader = train_loader() logs = self._run_one_epoch(loader, cbks, 'train', metrics_name, epoch=epoch) if do_eval and epoch % eval_freq == 0: # FIXME: adapt to DataLoader loader = eval_loader if not isinstance(eval_loader, Iterable): loader = eval_loader() eval_steps = len(loader) if hasattr(loader, '__len__') else None cbks.on_begin('eval', { 'steps': eval_steps, 'metrics_name': metrics_name }) logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name) cbks.on_end('eval', logs) cbks.on_end('train', logs) self._test_dataloader = None