def _apply(self, fn): for module in self.children(): module._apply(fn) for key, param in self._parameters.items(): if param is not None: assert isinstance(param, Parameter) assert param.is_leaf with flow.no_grad(): # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring param_applied = Tensor(fn(param)) self._parameters[key] = Parameter(param_applied, param.requires_grad) if param.grad is not None: assert param.grad.is_leaf with flow.no_grad(): # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring grad_applied = Tensor(fn(param.grad)) self._parameters[key].grad = grad_applied.requires_grad_( param.grad.requires_grad) for key, buf in self._buffers.items(): if buf is not None: # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring self._buffers[key] = Tensor(fn(buf)) return self
def _apply(self, fn): for module in self.children(): module._apply(fn) for key, param in self._parameters.items(): if param is not None: assert isinstance(param, Parameter) assert param.is_leaf with flow.no_grad(): param_applied = fn(param) self._parameters[key] = Parameter(param_applied, param.requires_grad) if param.grad is not None: assert param.grad.is_leaf with flow.no_grad(): grad_applied = fn(param.grad) self._parameters[key].grad = grad_applied.requires_grad_( param.grad.requires_grad) for key, buf in self._buffers.items(): if buf is not None: self._buffers[key] = fn(buf) return self
def _apply(self, fn, applied_dict=None): # A dict to store tensors that has already been applied. # There is no need to apply multiple times on a same tensor. if applied_dict is None: applied_dict = dict() for module in self.children(): module._apply(fn, applied_dict) def can_use_assign_copy(tensor, tensor_applied): return tensor.is_local == tensor_applied.is_local for (key, param) in self._parameters.items(): if param is None: continue need_apply = False if param not in applied_dict: need_apply = True assert isinstance(param, Parameter) assert param.is_leaf with flow.no_grad(): param_applied = fn(param) param_applied.requires_grad = param.requires_grad if param.grad is not None: assert param.grad.is_leaf with flow.no_grad(): grad_applied = fn(param.grad) grad_applied.requires_grad = param.grad.requires_grad param_applied.grad = grad_applied else: param_applied = applied_dict[param] if can_use_assign_copy(param_applied, param): if need_apply: self._parameters[key].data = param_applied applied_dict[param] = param_applied else: # The parameter's data has already been set when it can use assign copy. pass else: if need_apply: new_param = Parameter(param_applied, param.requires_grad) self._parameters[key] = new_param applied_dict[param] = new_param else: self._parameters[key] = applied_dict[param] for (key, buf) in self._buffers.items(): if buf is not None: if buf not in applied_dict: buf_applied = fn(buf) self._buffers[key] = buf_applied applied_dict[buf] = buf_applied else: self._buffers[key] = applied_dict[buf] return self
def predict(config) -> None: """ Predict the emotion of the input audio Args: confguration items audio_path (str): path of input audio """ # utils.play_audio(audio_path) if config.feature_method == "o": of.get_data( config, config.audio_path, config.predict_feature_path_opensmile, train=False, ) test_feature = of.load_feature(config, config.predict_feature_path_opensmile, train=False) elif config.feature_method == "l": test_feature = lf.get_data(config, config.audio_path, config.predict_feature_path_librosa, train=False) test_feature = test_feature.reshape(1, test_feature.shape[0], test_feature.shape[1]) test_feature = flow.tensor(test_feature, dtype=flow.float32, device="cuda") n_feats = test_feature.shape[2] if config.model == "lstm": model = lstm_ser(n_feats, config.rnn_size, len(config.class_labels), 1) else: model = cnn1d_ser(1, config.n_kernels, n_feats, config.hidden_size, len(config.class_labels)) SER_model = model SER_model.to("cuda") model_path = os.path.join(config.checkpoint_path, config.checkpoint_name) SER_model.load_state_dict(flow.load(model_path)) flow.no_grad() logits = SER_model(test_feature) result = np.argmax(logits.numpy(), ) print("Recognition:", config.class_labels[int(result)]) result_prob = flow.softmax(logits, dim=1) utils.radar(result_prob.numpy().squeeze(), config.class_labels)
def xavier_normal_(tensor, gain=1.0, *, data_format="NCHW"): r""" The interface is consistent with PyTorch. The documentation is referenced from: https://pytorch.org/docs/stable/nn.init.html. Fills the input `Tensor` with values according to the method described in `Understanding the difficulty of training deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal distribution. The resulting tensor will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where .. math:: \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan_in} + \text{fan_out}}} Also known as Glorot initialization. Args: tensor: an n-dimensional `flow.Tensor` gain: an optional scaling factor Examples: >>> w = flow.empty(3, 5) >>> nn.init.xavier_normal_(w) """ with flow.no_grad(): return tensor.xavier_normal_(gain, data_format=data_format)
def test_tensor_autograd_related_methods(test_case): shape = (2, 3, 4, 5) x = flow.Tensor(*shape) y = flow.Tensor(*shape) y.requires_grad = True x.fill_(1.0) y.fill_(2.0) z = x + y test_case.assertFalse(x.requires_grad) test_case.assertTrue(x.is_leaf) test_case.assertTrue(y.requires_grad) test_case.assertTrue(y.is_leaf) test_case.assertTrue(z.requires_grad) test_case.assertFalse(z.is_leaf) with flow.no_grad(): m = x + y test_case.assertTrue(m.is_leaf) test_case.assertFalse(m.requires_grad) m.requires_grad = True v = flow.Tensor(*shape) v.requires_grad = True z.retain_grad() w = v + z grad = flow.Tensor(*shape) grad.fill_(1.0) w.backward(gradient=grad, retain_graph=True) test_case.assertTrue( np.allclose(v.grad.numpy(), np.ones(shape), atol=1e-4, rtol=1e-4)) test_case.assertTrue( np.allclose(y.grad.numpy(), np.ones(shape), atol=1e-4, rtol=1e-4)) test_case.assertTrue( np.allclose(z.grad.numpy(), np.ones(shape), atol=1e-4, rtol=1e-4)) test_case.assertIsNone(x.grad) w.backward(gradient=grad, retain_graph=True)
def main(args): if not os.path.exists(args.save_path): os.mkdir(args.save_path) net = UNet(n_channels=3, n_classes=1) checkpoint = flow.load(args.pretrained_path) net.load_state_dict(checkpoint) net.to("cuda") x_test_dir, y_test_dir = get_datadir_path(args, split="test") test_dataset = Dataset( x_test_dir, y_test_dir, augmentation=get_test_augmentation(), ) print("Begin Testing...") for i, (image, mask) in enumerate(tqdm(test_dataset)): show_image = image with flow.no_grad(): image = image / 255.0 image = image.astype(np.float32) image = flow.tensor(image, dtype=flow.float32) image = image.permute(2, 0, 1) image = image.to("cuda") pred = net(image.unsqueeze(0).to("cuda")) pred = pred.numpy() pred = pred > 0.5 save_picture_name = os.path.join(args.save_path, "test_image_" + str(i)) visualize( save_picture_name, image=show_image, GT=mask[0, :, :], Pred=pred[0, 0, :, :] )
def build(self): with flow.no_grad(): image, label = self.val_data_loader() image = image.to(args.device) logits = self.alexnet(image) predictions = logits.softmax() return predictions, label
def sample_sequence( model, length, context, num_samples=1, temperature=1, top_k=1, top_p=0.0, device="cuda", ): context = flow.tensor(context, dtype=flow.long, device=device) context = context.unsqueeze(0).repeat(num_samples, 1) generated = context past_key_values = None with flow.no_grad(): for _ in trange(length): outputs = model(generated, past_key_values=past_key_values, use_cache=True) logits, past_key_values = outputs[:2] next_token_logits = logits[:, -1, :] / temperature filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p) probs = filtered_logits.softmax(-1) next_token = probs.argmax(-1) # next_token = flow.multinomial(flow.softmax(filtered_logits, dim=-1), num_samples=1) generated = flow.cat((generated, next_token.unsqueeze(0)), dim=1) return generated
def step(self, closure: Callable = None): """Performs a single optimization step. Args: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ with flow.no_grad(): loss = None if closure is not None: loss = closure() for param_group in self.param_groups: kwargs = { "learning_rate_val": param_group["lr"], "scale": param_group["scale"], "weight_decay": param_group["weight_decay"], "beta1": param_group["betas"][0], "beta2": param_group["betas"][1], "epsilon": param_group["eps"], } for param in param_group.parameters: if param.grad is None: continue m_tensor = self._state[param]["exp_avg"] v_tensor = self._state[param]["exp_avg_sq"] self._op( param, param.grad, m_tensor, v_tensor, **kwargs, ) self._state["step"] = self._state["step"] + 1 return loss
def step(self, closure: Callable = None): with flow.no_grad(): loss = None if closure is not None: loss = closure() for param_group in self._param_groups: lr = param_group.options["lr"] for param in param_group.parameters: if param.grad is None: continue if param_group.options["momentum"] == 0.0: scale = param_group.options["scale"] self._sgd(param, param.grad, learning_rate_val=lr, scale=scale) else: momentum_buf = self._state[param]["momentum_buf"] scale = param_group.options["scale"] beta = param_group.options["momentum"] self._momentum_sgd( param, param.grad, momentum_buf, learning_rate_val=lr, scale=scale, beta=beta, ) self._state["step"] = self._state["step"] + 1 return loss
def step(self, closure: Callable = None): """Performs a single optimization step. Args: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ with flow.no_grad(): loss = None if closure is not None: loss = closure() for param_group in self.param_groups: kwargs = { "learning_rate": param_group["lr"], "l2": param_group["weight_decay"], "epsilon": param_group["eps"], "lr_decay": param_group["lr_decay"], "train_step_val": self._state["step"] + 1, } for param in param_group.parameters: if param.grad is None: continue sum_tensor = self._state[param]["sum"] flow._C.dispatch_adagrad_update( self._op, (param, param.grad, sum_tensor), **kwargs ) self._state["step"] = self._state["step"] + 1 return loss
def pre_forward_hook(module, input): with flow.no_grad(): buffers = list(module.buffers()) if len(buffers) > 0: flow._C.stream_touch(buffers) # for reusing soft syncs for x in buffers: flow._C.broadcast(x, inplace=True)
def recognize(args): # model char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) encoder = Encoder( args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen, ) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen, ) model = Transformer(encoder, decoder) model.load_state_dict(flow.load(args.model_path)) device = flow.device("cuda") model.eval() model.to(device) LFR_m = args.LFR_m LFR_n = args.LFR_n char_list, sos_id, eos_id = process_dict(args.dict) assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id # read json data with open(args.recog_json, "rb") as f: js = json.load(f)["utts"] # decode each utterance new_js = {} with flow.no_grad(): for idx, name in enumerate(js.keys(), 1): print("(%d/%d) decoding %s" % (idx, len(js.keys()), name), flush=True) input = kaldi_io.read_mat(js[name]["input"][0]["feat"]) input = build_LFR_features(input, LFR_m, LFR_n) input = flow.tensor(input).to(dtype=flow.float32) input_length = flow.tensor([input.size(0)], dtype=flow.int64) input = input.to(device) input_length = input_length.to(device) nbest_hyps = model.recognize(input, input_length, char_list, args) new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list) with open(args.result_label, "wb") as f: f.write(json.dumps({"utts": new_js}, indent=4, sort_keys=True).encode("utf_8"))
def main(args): test_x, test_y = load_image(args.image_path) test_inp = to_tensor(test_x.astype(np.float32)) test_target = to_tensor(test_y.astype(np.float32)) generator = Generator().to("cuda") start_t = time.time() pretrain_model = flow.load(args.model_path) generator.load_state_dict(pretrain_model) end_t = time.time() print("load params time : {}".format(end_t - start_t)) start_t = time.time() generator.eval() with flow.no_grad(): gout = to_numpy(generator(test_inp), False) end_t = time.time() print("infer time : {}".format(end_t - start_t)) # save images save_images( gout, test_inp.numpy(), test_target.numpy(), path=os.path.join("./testimage.png"), plot_size=1, )
def validate(val_list, model, criterion): print("begin test") test_loader = listDataset( val_list, shuffle=True, transform=ST.Compose([ ST.ToNumpy(), ST.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]), train=True, seen=model.seen, batch_size=args.batch_size, num_workers=args.workers, ) model.eval() mae = 0 for i, (img, target) in enumerate(test_loader): img = flow.Tensor(img, dtype=flow.float32, device="cuda") with flow.no_grad(): output = model(img).to("cuda") mae += abs(output.data.sum().numpy() - target.sum()) mae = mae / len(test_loader) print(" * MAE {mae:.3f} ".format(mae=mae)) return mae
def test(data_set, backbone, batch_size, nfolds=10, is_consistent=False): logging.info("testing verification..") data_list = data_set[0] issame_list = data_set[1] embeddings_list = [] time_consumed = 0.0 if is_consistent: placement = flow.env.all_device_placement("cpu") sbp = flow.sbp.split(0) for i in range(len(data_list)): data = data_list[i] embeddings = None ba = 0 while ba < data.shape[0]: bb = min(ba + batch_size, data.shape[0]) count = bb - ba img = data[bb - batch_size:bb] time0 = datetime.datetime.now() with flow.no_grad(): if is_consistent: img = img.to_consistent(placement=placement, sbp=sbp) net_out = backbone(img.to("cuda")) if is_consistent: _embeddings = net_out.to_local().numpy() else: _embeddings = net_out.detach().numpy() time_now = datetime.datetime.now() diff = time_now - time0 time_consumed += diff.total_seconds() if embeddings is None: embeddings = np.zeros((data.shape[0], _embeddings.shape[1])) embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :] ba = bb embeddings_list.append(embeddings) _xnorm = 0.0 _xnorm_cnt = 0 for embed in embeddings_list: for i in range(embed.shape[0]): _em = embed[i] _norm = np.linalg.norm(_em) _xnorm += _norm _xnorm_cnt += 1 _xnorm /= _xnorm_cnt embeddings = embeddings_list[0].copy() embeddings = sklearn.preprocessing.normalize(embeddings) acc1 = 0.0 std1 = 0.0 embeddings = embeddings_list[0] + embeddings_list[1] embeddings = sklearn.preprocessing.normalize(embeddings) logging.info(embeddings.shape) logging.info("infer time:%f" % time_consumed) _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds) acc2, std2 = np.mean(accuracy), np.std(accuracy) return acc1, std1, acc2, std2, _xnorm, embeddings_list
def test(epoch): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 with flow.no_grad(): for batch_idx, (torch_inputs, torch_targets) in enumerate(testloader): inputs = flow.tensor(torch_inputs.numpy()) targets = flow.tensor(torch_targets.numpy()) inputs, targets = inputs.to(device), targets.to(device) loss, outputs = resnet18_eval_graph(inputs, targets) # loss = criterion(outputs, targets) test_loss += loss.item() # _, predicted = outputs.max(1) predicted = flow.argmax(outputs, 1).to(flow.int64) total += targets.size(0) correct += predicted.eq(targets).to(flow.int32).sum().item() progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') best_acc = acc
def forward(self, logits, target, mask=None): """LabelSmoothing Function with Mask Args: logits ([tensor]): logits with shape [batch, length, vocab_size] target ([tensor]): target with shape [batch, length] mask ([tensor], optional): mask tensor (bool) with shape [batch, length] """ assert logits.dim() == 3 and logits.size(-1) == self.size pad_mask = target == self.padding_idx if mask is not None: mask = (pad_mask.int() + mask.int()) > 0 else: mask = pad_mask logits = logits.reshape(-1, self.size) with flow.no_grad(): confidence = logits.clone() confidence.fill_(self.smoothing / (self.size - 1)) confidence = flow.scatter(confidence, 1, target.reshape(-1).unsqueeze(1), 1 - self.smoothing) logsoftmax = nn.LogSoftmax(dim=-1) KLdiv = nn.KLDivLoss(reduction="none", log_target=False) loss = flow.sum(KLdiv(logsoftmax(logits), confidence), dim=-1) total = flow.sum(mask == 0) denom = total if self.normalize_length else logits.size(0) loss = flow.masked_fill(loss, mask.reshape(-1), 0.0) loss = flow.sum(loss) / denom return loss
def step(self, closure: Callable = None): """Performs a single optimization step. Args: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ with flow.no_grad(): loss = None if closure is not None: loss = closure() for param_group in self._param_groups: kwargs = { "learning_rate_val": param_group.options["lr"], "scale": param_group.options["scale"], "epsilon": param_group.options["eps"], "decay_rate": param_group.options["alpha"], "weight_decay": param_group.options["weight_decay"], } for param in param_group.parameters: if param.grad is None: continue ms_tensor = self._state[param]["square_avg"] if param_group.options["centered"]: mg_tensor = self._state[param]["grad_avg"] self._centered_rmsprop(param, param.grad, ms_tensor, mg_tensor, **kwargs) else: self._rmsprop(param, param.grad, ms_tensor, **kwargs) self._state["step"] = self._state["step"] + 1 return loss
def evaluate(self, data_loader, epoch=0): self.model.eval() losses = AverageMeter("loss") data_iter = tqdm.tqdm(data_loader, desc="Evaluate: ", total=len(data_loader)) for step, batch in enumerate(data_iter): with flow.no_grad(): inputs, labels = (batch, batch) inputs = inputs.cuda() labels = labels.cuda() outputs = self.model(inputs, labels=labels) loss = outputs[0] loss_item = loss.numpy().item() losses.update(loss_item) logging = { "epoch": epoch, "step": step, "avg_loss": losses.avg, "loss": losses.val, } data_iter.set_postfix(logging) print("Evaluating:%0d, avg_loss:%.4f" % (epoch, losses.avg))
def build(self): image, label = self.val_data_loader() image = image.to("cuda") with flow.no_grad(): logits = self.model(image) predictions = logits.softmax() return predictions, label
def step(self, closure: Callable = None): with flow.no_grad(): loss = None if closure is not None: loss = closure() for param_group in self.param_groups: lr = param_group["lr"] l2 = param_group["weight_decay"] for param in param_group.parameters: if param.grad is None: continue if param_group["momentum"] == 0.0: flow._C.dispatch_sgd_update(self._sgd, (param, param.grad), learning_rate=lr, l2=l2) else: if "momentum_buf" not in self._state[param]: self._state[param][ "momentum_buf"] = flow.zeros_like(param) momentum_buf = self._state[param]["momentum_buf"] beta = param_group["momentum"] flow._C.dispatch_momentum_update( self._momentum_sgd, (param, param.grad, momentum_buf), learning_rate=lr, l2=l2, beta=beta, ) self._state["step"] = self._state["step"] + 1 return loss
def inference(self): image, label = self.val_data_loader() image = image.to("cuda") label = label.to("cuda") with flow.no_grad(): logits = self.model(image) pred = logits.softmax() return pred, label
def test(self): """Translate speech using StarGAN .""" # Load the trained generator. self.restore_model(self.pretrain_models) norm = Normalizer() # Set data loader. d, speaker = TestSet(self.test_dir).test_data(self.src_speaker) targets = self.trg_speaker for target in targets: print(target) assert target in speakers label_t = self.spk_enc.transform([target])[0] label_t = np.asarray([label_t]) with flow.no_grad(): for filename, content in d.items(): f0 = content["f0"] ap = content["ap"] sp_norm_pad = self.pad_coded_sp(content["coded_sp_norm"]) convert_result = [] for start_idx in range(0, sp_norm_pad.shape[1] - FRAMES + 1, FRAMES): one_seg = sp_norm_pad[:, start_idx:start_idx + FRAMES] one_seg = flow.Tensor(one_seg).to(self.device) one_seg = one_seg.view(1, 1, one_seg.size(0), one_seg.size(1)) l = flow.Tensor(label_t) one_seg = one_seg.to(self.device) l = l.to(self.device) one_set_return = self.G(one_seg, l).detach().cpu().numpy() one_set_return = np.squeeze(one_set_return) one_set_return = norm.backward_process( one_set_return, target) convert_result.append(one_set_return) convert_con = np.concatenate(convert_result, axis=1) convert_con = convert_con[:, 0:content["coded_sp_norm"]. shape[1]] contigu = np.ascontiguousarray(convert_con.T, dtype=np.float64) decoded_sp = decode_spectral_envelope(contigu, SAMPLE_RATE, fft_size=FFTSIZE) f0_converted = norm.pitch_conversion(f0, speaker, target) wav = synthesize(f0_converted, decoded_sp, ap, SAMPLE_RATE) name = f"{speaker}-{target}_{filename}" path = os.path.join(self.result_dir, name) print(f"[save]:{path}") sf.write(path, wav, SAMPLE_RATE)
def forward(self, indices): if self.max_norm is not None: with flow.no_grad(): flow._C.embedding_renorm_(self.weight, indices, self.max_norm, self.norm_type) if self.padding_idx is None and not self.scale_grad_by_freq: return flow._C.gather(self.weight, indices, axis=0) else: return flow._C.embedding(self.weight, indices, self.padding_idx, self.scale_grad_by_freq)
def test_no_grad(test_case): with flow.no_grad(): test_case.assertFalse(flow.is_grad_enabled()) test_case.assertTrue(flow.is_grad_enabled()) @flow.no_grad() def func(): test_case.assertFalse(flow.is_grad_enabled()) func() test_case.assertTrue(flow.is_grad_enabled())
def test_consistent_tensor_autograd_related_methods(test_case): placement = flow.placement("cuda", {0: 0}) sbp = flow.sbp.split(0) shape = (2, 3, 4, 5) l_x = flow.Tensor(*shape) test_case.assertFalse(l_x.requires_grad) test_case.assertTrue(l_x.is_leaf) l_y = flow.Tensor(*shape) l_y.requires_grad = True test_case.assertTrue(l_y.requires_grad) test_case.assertTrue(l_y.is_leaf) x = l_x.to_consistent(placement=placement, sbp=sbp) test_case.assertTrue(x.is_leaf) y = l_y.to_consistent(placement=placement, sbp=sbp) test_case.assertFalse(y.is_leaf) z = x + y test_case.assertTrue(z.requires_grad) test_case.assertFalse(z.is_leaf) with flow.no_grad(): m = x + y test_case.assertTrue(m.is_leaf) test_case.assertFalse(m.requires_grad) l_v = flow.Tensor(*shape) l_v.requires_grad = True v = l_v.to_consistent(placement=placement, sbp=sbp) z.retain_grad() w = v + z l_grad = flow.ones(*shape) grad = l_grad.to_consistent(placement=placement, sbp=sbp) w.backward(gradient=grad) test_case.assertTrue( np.allclose(l_v.grad.numpy(), np.ones(shape), atol=1e-4, rtol=1e-4) ) test_case.assertTrue( np.allclose(l_y.grad.numpy(), np.ones(shape), atol=1e-4, rtol=1e-4) ) test_case.assertTrue( np.allclose( z.grad.to_consistent(sbp=flow.sbp.broadcast).to_local().numpy(), np.ones(shape), atol=1e-4, rtol=1e-4, ) ) test_case.assertIsNone(l_x.grad)
def _load_from_state_dict( self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs, ): for hook in self._load_state_dict_pre_hooks.values(): hook( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs, ) persistent_buffers = { k: v for (k, v) in self._buffers.items() if k not in self._non_persistent_buffers_set } local_name_params = itertools.chain(self._parameters.items(), persistent_buffers.items()) local_state = {k: v for (k, v) in local_name_params if v is not None} for (name, param) in local_state.items(): key = prefix + name if key in state_dict: input_param = state_dict[key] if tuple(input_param.shape) != tuple(param.shape): error_msgs.append( "size mismatch for {}: copying a param with shape {} from checkpoint, the shape in current model is {}." .format(key, input_param.shape, param.shape)) continue try: with flow.no_grad(): param.copy_(input_param) except Exception as ex: error_msgs.append( 'While copying the parameter named "{}", whose dimensions in the model are {} and whose dimensions in the checkpoint are {}, an exception occurred : {}.' .format(key, param.shape, input_param.shape, ex.args)) elif strict: missing_keys.append(key) if strict: for key in state_dict.keys(): if key.startswith(prefix): input_name = key[len(prefix):] input_name = input_name.split(".", 1)[0] if (input_name not in self._modules and input_name not in local_state): unexpected_keys.append(key)
def build(self, input_ids, input_masks, segment_ids): input_ids = input_ids.to(device=args.device) input_masks = input_masks.to(device=args.device) segment_ids = segment_ids.to(device=args.device) with flow.no_grad(): # 1. forward the next_sentence_prediction and masked_lm model _, seq_relationship_scores = self.bert(input_ids, input_masks, segment_ids) return seq_relationship_scores