def __init__(self, num_units, n_input, alpha, sigma_rec=0, activation='softplus', w_rec_init='diag', rng=None, reuse=None, name=None): super(LeakyRNNCell, self).__init__(_reuse=reuse, name=name) # Inputs must be 2-dimensional. # self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._w_rec_init = w_rec_init self._reuse = reuse if activation == 'softplus': self._activation = lambda x: F.softplus(x) self._w_in_start = 1.0 self._w_rec_start = 0.5 elif activation == 'tanh': self._activation = lambda x: F.tanh(x) self._w_in_start = 1.0 self._w_rec_start = 1.0 elif activation == 'relu': self._activation = lambda x: F.relu(x) self._w_in_start = 1.0 self._w_rec_start = 0.5 elif activation == 'power': self._activation = lambda x: torch.square(F.relu(x)) self._w_in_start = 1.0 self._w_rec_start = 0.01 elif activation == 'retanh': self._activation = lambda x: F.tanh(F.relu(x)) self._w_in_start = 1.0 self._w_rec_start = 0.5 else: raise ValueError('Unknown activation') self._alpha = alpha self._sigma = np.sqrt(2 / alpha) * sigma_rec if rng is None: self.rng = np.random.RandomState() else: self.rng = rng # Generate initialization matrix n_hidden = self._num_units w_in0 = (self.rng.randn(n_input, n_hidden) / np.sqrt(n_input) * self._w_in_start) if self._w_rec_init == 'diag': w_rec0 = self._w_rec_start * np.eye(n_hidden) elif self._w_rec_init == 'randortho': w_rec0 = self._w_rec_start * tools.gen_ortho_matrix(n_hidden, rng=self.rng) elif self._w_rec_init == 'randgauss': w_rec0 = (self._w_rec_start * self.rng.randn(n_hidden, n_hidden) / np.sqrt(n_hidden)) matrix0 = np.concatenate((w_in0, w_rec0), axis=0) self.w_rnn0 = matrix0 nn.init.constant_(self._initializer, matrix0, dtype=torch.float32)
def KL(self, mean, std): #epsilon = 1e-10 KLdiv = 0.5 * (torch.square(mean) + torch.exp(std) - std - 1.0).mean(dim=0) return torch.sum(KLdiv)
def MSE(x, y): errors = x - y return torch.mean(torch.square(errors.abs()))
def test_tlutlinear(): plot_en = False hwcfg = { "temporal": "w", "widtht": 4, "formati": "fxp", "widthi": 12, "quantilei": 1, "formatw": "fxp", "widthw": 12, "quantilew": 1, "cycle": None, "rounding": "round", "signmag": True } if hwcfg["formati"] == "bfloat16": dtype = torch.bfloat16 elif hwcfg["formati"] == "float16": dtype = torch.float16 elif hwcfg["formati"] == "float32": dtype = torch.float32 else: if hwcfg["formatw"] == "bfloat16": dtype = torch.bfloat16 elif hwcfg["formatw"] == "float16": dtype = torch.float16 else: dtype = torch.float32 batch = 16 in_feature = 256 out_feature = 256 bias = True input_int_bit = 3 input = ((torch.rand(batch, in_feature) - 0.5) * 2).to(device).type(dtype) if hwcfg["formati"] == "fxp": input = torch.trunc( input << hwcfg["widthi"]).round() >> hwcfg["widthi"] input = input << input_int_bit fc = torch.nn.Linear(in_feature, out_feature, bias=bias, dtype=dtype).to(device) if hwcfg["formatw"] == "fxp": fc.weight.data = torch.trunc( fc.weight << hwcfg["widthw"]).round() >> hwcfg["widthw"] if bias: fc.bias.data = torch.trunc( fc.bias << hwcfg["widthw"]).round() >> hwcfg["widthw"] fc_o = fc(input) ufc = TLUTLinear(in_feature, out_feature, bias=bias, weight_ext=fc.weight, bias_ext=fc.bias, hwcfg=hwcfg).to(device) ufc_o = ufc(input) print(ufc.hwcfg) fc_o.abs().mean().backward() ufc_o.abs().mean().backward() diff = (ufc_o - fc_o) print() print("diff max:", diff.max()) print("diff min:", diff.min()) print("diff mean:", diff.mean()) print("diff rmse:", torch.sqrt(torch.mean(torch.square(diff)))) diff_grad = (ufc.weight.grad - fc.weight.grad) print() print("diff grad max:", diff_grad.max()) print("diff grad min:", diff_grad.min()) print("diff grad mean:", diff_grad.mean()) print("diff grad rmse:", torch.sqrt(torch.mean(torch.square(diff_grad)))) if plot_en: fig = plt.hist(diff.cpu().detach().numpy().flatten(), bins='auto') # arguments are passed to np.histogram plt.title("Histogram for output error") plt.show() fig = plt.hist(diff_grad.cpu().detach().numpy().flatten(), bins='auto') # arguments are passed to np.histogram plt.title("Histogram for grad error") plt.show()
def loss(self, x, y): return torch.mean(torch.square(self.f(x) - y))
def forward(self, input): input = input.reshape(len(input), self.input_dim, -1) # [N, F, T] input = input.permute(0, 2, 1) # [N, T, F] time_step = input.shape[1] for ts in range(time_step): x = input[:, ts, :] if len(self.states) == 0: # hasn't initialized yet self.init_states(x) self.get_constants(x) p_tm1 = self.states[0] # noqa: F841 h_tm1 = self.states[1] S_re_tm1 = self.states[2] S_im_tm1 = self.states[3] time_tm1 = self.states[4] B_U = self.states[5] B_W = self.states[6] frequency = self.states[7] x_i = torch.matmul(x * B_W[0], self.W_i) + self.b_i x_ste = torch.matmul(x * B_W[0], self.W_ste) + self.b_ste x_fre = torch.matmul(x * B_W[0], self.W_fre) + self.b_fre x_c = torch.matmul(x * B_W[0], self.W_c) + self.b_c x_o = torch.matmul(x * B_W[0], self.W_o) + self.b_o i = self.inner_activation(x_i + torch.matmul(h_tm1 * B_U[0], self.U_i)) ste = self.inner_activation(x_ste + torch.matmul(h_tm1 * B_U[0], self.U_ste)) fre = self.inner_activation(x_fre + torch.matmul(h_tm1 * B_U[0], self.U_fre)) ste = torch.reshape(ste, (-1, self.hidden_dim, 1)) fre = torch.reshape(fre, (-1, 1, self.freq_dim)) f = ste * fre c = i * self.activation(x_c + torch.matmul(h_tm1 * B_U[0], self.U_c)) time = time_tm1 + 1 omega = torch.tensor(2 * np.pi) * time * frequency re = torch.cos(omega) im = torch.sin(omega) c = torch.reshape(c, (-1, self.hidden_dim, 1)) S_re = f * S_re_tm1 + c * re S_im = f * S_im_tm1 + c * im A = torch.square(S_re) + torch.square(S_im) A = torch.reshape(A, (-1, self.freq_dim)).float() A_a = torch.matmul(A * B_U[0], self.U_a) A_a = torch.reshape(A_a, (-1, self.hidden_dim)) a = self.activation(A_a + self.b_a) o = self.inner_activation(x_o + torch.matmul(h_tm1 * B_U[0], self.U_o)) h = o * a p = torch.matmul(h, self.W_p) + self.b_p self.states = [p, h, S_re, S_im, time, None, None, None] self.states = [] return self.fc_out(p).squeeze()
def heat_map_function(y_dist, x_dist, y_scale, x_scale): x = 1 / (1 + (torch.square(y_dist / (1e-6 + y_scale)) + torch.square( x_dist / (1e-6 + x_scale)))) return x
def predict(testloader, spoof_labels, rppg_label, cnn_model, rnn_model, device): threshold = 0.1 cnn_model = cnn_model.to(device) cnn_model.eval() rnn_model = rnn_model.to(device) rnn_model.eval() one = torch.ones(BATCH_SIZE, 1, 32, 32).to(device) zero = torch.zeros(BATCH_SIZE, 1, 32, 32).to(device) hidden = (torch.zeros(1, 1, 100, device=device), torch.zeros(1, 1, 100, device=device)) score_data = list() for i, data in tqdm(enumerate(testloader, 0), total=len(testloader)): images, labels_D = data images, labels_D = images.to(device), labels_D.to(device) with torch.no_grad(): D, T = cnn_model(images) # Non_rigid_registration_layer V = torch.where(D >= threshold, one, zero) U = T * V F = U hidden = repackage_hidden(hidden) outputs_F, hidden = rnn_model(F, hidden) outputs_F = outputs_F.view(50) label = spoof_labels[(i * BATCH_SIZE):(i * BATCH_SIZE) + BATCH_SIZE][-1] norm_D = torch.linalg.norm(D[-1, :, :, :]) norm_F = torch.linalg.norm(outputs_F) score = torch.square(norm_F) + (LAMBDA * torch.square(norm_D)) with open('./training_log/val_scores.csv', 'a') as fd: csv_row = '\n' + \ str(i) + ', ' + \ str(label) + ', ' + \ str(norm_D.cpu().detach().numpy()) + ', ' + \ str(norm_F.cpu().detach().numpy()) + ', ' + \ str(score.cpu().detach().numpy()) fd.write(csv_row) score_data.append( list((i, float(score.cpu().detach().numpy()), float(label)))) fig, ax = plt.subplots() ax.plot(outputs_F.cpu().detach().numpy()) ax.plot(rppg_label[i]) plt.grid(True) plt.ylabel('rppg_signal') plt.title( str('Label : ' + CLASS_NAMES[int(label)] + ' | rPPG Norm: ' + str(norm_F.cpu().detach().numpy()))) rppg_path = 'rppg_vis_oulu/' + str('val_batch_' + str(i) + '_' + CLASS_NAMES[int(label)]) plt.savefig(rppg_path) print(score_data) np.save('thresh_test.npy', score_data) thresh_plot(np.array(score_data))
def __call__(self, y_pred, y_true): # Calculate the Mean Squared Error and use it as loss. mse = torch.mean(torch.square(y_true - y_pred)) return mse
def mseloss(scores, labels): loss = torch.mean(torch.square(torch.sub(scores, labels))) return loss
def dist(bbox1, bbox2): return torch.sqrt(torch.sum(torch.square(bbox1[:2] - bbox2[:2])))
def g_loss_func(fake): return torch.mean(torch.square(fake - 1.0))
def d_loss_func(real, fake): return torch.mean(torch.square(real - 1.0)) + torch.mean( torch.square(fake))
def __init__(self, n_tasks: int, n_features: int, layer_sizes: Sequence[int] = [1000], weight_init_stddevs: OneOrMany[float] = 0.02, bias_init_consts: OneOrMany[float] = 1.0, weight_decay_penalty: float = 0.0, weight_decay_penalty_type: str = 'l2', dropouts: OneOrMany[float] = 0.5, activation_fns: OneOrMany[ActivationFn] = 'relu', n_classes: int = 2, residual: bool = False, **kwargs) -> None: """Create a MultitaskClassifier. In addition to the following arguments, this class also accepts all the keyword arguments from TensorGraph. Parameters ---------- n_tasks: int number of tasks n_features: int number of features layer_sizes: list the size of each dense layer in the network. The length of this list determines the number of layers. weight_init_stddevs: list or float the standard deviation of the distribution to use for weight initialization of each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. bias_init_consts: list or float the value to initialize the biases in each layer to. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. weight_decay_penalty: float the magnitude of the weight decay penalty to use weight_decay_penalty_type: str the type of penalty to use for weight decay, either 'l1' or 'l2' dropouts: list or float the dropout probablity to use for each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. activation_fns: list or object the PyTorch activation function to apply to each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. Standard activation functions from torch.nn.functional can be specified by name. n_classes: int the number of classes residual: bool if True, the model will be composed of pre-activation residual blocks instead of a simple stack of dense layers. """ self.n_tasks = n_tasks self.n_features = n_features self.n_classes = n_classes n_layers = len(layer_sizes) if not isinstance(weight_init_stddevs, SequenceCollection): weight_init_stddevs = [weight_init_stddevs] * n_layers if not isinstance(bias_init_consts, SequenceCollection): bias_init_consts = [bias_init_consts] * n_layers if not isinstance(dropouts, SequenceCollection): dropouts = [dropouts] * n_layers if isinstance(activation_fns, str) or not isinstance(activation_fns, SequenceCollection): activation_fns = [activation_fns] * n_layers activation_fns = [get_activation(f) for f in activation_fns] # Define the PyTorch Module that implements the model. class PytorchImpl(torch.nn.Module): def __init__(self): super(PytorchImpl, self).__init__() self.layers = torch.nn.ModuleList() prev_size = n_features for size, weight_stddev, bias_const in zip( layer_sizes, weight_init_stddevs, bias_init_consts): layer = torch.nn.Linear(prev_size, size) torch.nn.init.normal_(layer.weight, 0, weight_stddev) torch.nn.init.constant_(layer.bias, bias_const) self.layers.append(layer) prev_size = size self.output_layer = torch.nn.Linear(prev_size, n_tasks * n_classes) torch.nn.init.xavier_uniform_(self.output_layer.weight) torch.nn.init.constant_(self.output_layer.bias, 0) def forward(self, x): prev_size = n_features next_activation = None for size, layer, dropout, activation_fn, in zip( layer_sizes, self.layers, dropouts, activation_fns): y = x if next_activation is not None: y = next_activation(x) y = layer(y) if dropout > 0.0 and self.training: y = F.dropout(y, dropout) if residual and prev_size == size: y = x + y x = y prev_size = size next_activation = activation_fn if next_activation is not None: y = next_activation(y) neural_fingerprint = y y = self.output_layer(y) logits = torch.reshape(y, (-1, n_tasks, n_classes)) output = F.softmax(logits, dim=2) return (output, logits, neural_fingerprint) model = PytorchImpl() regularization_loss: Optional[Callable] if weight_decay_penalty != 0: weights = [layer.weight for layer in model.layers] if weight_decay_penalty_type == 'l1': regularization_loss = lambda: weight_decay_penalty * torch.sum(torch.stack([torch.abs(w).sum() for w in weights])) else: regularization_loss = lambda: weight_decay_penalty * torch.sum(torch.stack([torch.square(w).sum() for w in weights])) else: regularization_loss = None super(MultitaskClassifier, self).__init__( model, dc.models.losses.SoftmaxCrossEntropy(), output_types=['prediction', 'loss', 'embedding'], regularization_loss=regularization_loss, **kwargs)
def MockTrain(args, test_dataloader, model, save_pred_file, device): # eval_test model.train() test_loss, test_accuracy = 0, 0 nb_test_steps, nb_test_examples = 0, 0 grads_in_norm_list = [] with open(save_pred_file,"w") as f_test: for input_ids, input_mask, segment_ids, label_ids, seq_lens, \ context_ids, context_lens in test_dataloader: if torch.cuda.is_available(): torch.cuda.empty_cache() # truncate to save space and computing resource max_seq_lens = max(seq_lens)[0] input_ids = input_ids[:,:max_seq_lens] input_mask = input_mask[:,:max_seq_lens] segment_ids = segment_ids[:,:max_seq_lens] input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) seq_lens = seq_lens.to(device) # context fields context_ids = context_ids.to(device) # ok, we have to sperate the embeddings out tmp_test_loss, logits, _, embedding_output = \ model(input_ids, segment_ids, input_mask, seq_lens, device=device, labels=label_ids, context_ids=context_ids, context_lens=context_lens, include_headwise=args.head_sp_loss, headwise_weight=args.head_sp_loss_lambda) # mock gradient logits = F.softmax(logits, dim=-1) sensitivity_class = 0 sensitivity_grads = torch.zeros(logits.shape) sensitivity_grads[:,sensitivity_class] = 1.0 sensitivity_grads = sensitivity_grads.to(device) grads_in = torch.autograd.grad(logits, embedding_output, grad_outputs=sensitivity_grads)[0] grads_in_norm = torch.square(torch.norm(grads_in, dim=-1)) grads_in_norm_list.append(grads_in_norm) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() outputs = np.argmax(logits, axis=1) for output_i in range(len(outputs)): f_test.write(str(outputs[output_i])) for ou in logits[output_i]: f_test.write(" "+str(ou)) f_test.write("\n") tmp_test_accuracy=np.sum(outputs == label_ids) test_loss += tmp_test_loss.mean().item() test_accuracy += tmp_test_accuracy nb_test_examples += input_ids.size(0) nb_test_steps += 1 model.zero_grad() test_loss = test_loss / nb_test_steps test_accuracy = test_accuracy / nb_test_examples return test_loss, test_accuracy, grads_in_norm_list
def var(self): # at self._N = 0 # return self._M instead of self._S # to prevent division by 0 in state normalization return self._S / (self._n - 1) if self._n > 1 else t.square(self._M)
def run_epoch(experiment, network, optimizer, dataloader, config, use_tqdm = False, debug=False, plot=False): cum_loss = 0.0 cum_param_loss = 0.0 cum_position_loss = 0.0 cum_velocity_loss = 0.0 num_samples=0.0 if use_tqdm: t = tqdm(enumerate(dataloader), total=len(dataloader)) else: t = enumerate(dataloader) network.train() # This is important to call before training! dataloaderlen = len(dataloader) dev = next(network.parameters()).device # we are only doing single-device training for now, so this works fine. dtype = next(network.parameters()).dtype # we are only doing single-device training for now, so this works fine. loss_weights = config["loss_weights"] positionerror = loss_functions.SquaredLpNormLoss().type(dtype).to(dev) #_, _, _, _, _, _, sample_session_times,_,_ = dataloader.dataset[0] bezier_order = network.bezier_order d = network.output_dimension for (i, imagedict) in t: track_names = imagedict["track"] input_images = imagedict["images"].type(dtype).to(device=dev) batch_size = input_images.shape[0] session_times = imagedict["session_times"].type(dtype).to(device=dev) ego_positions = imagedict["ego_positions"].type(dtype).to(device=dev) ego_velocities = imagedict["ego_velocities"].type(dtype).to(device=dev) targets = ego_positions dt = session_times[:,-1]-session_times[:,0] s_torch_cur = (session_times - session_times[:,0,None])/dt[:,None] M, controlpoints_fit = deepracing_models.math_utils.bezier.bezierLsqfit(targets, bezier_order, t = s_torch_cur) Msquare = torch.square(M) means, varfactors, covarfactors = network(input_images) scale_trils = torch.diag_embed(varfactors) + torch.diag_embed(covarfactors, offset=-1) covars = torch.matmul(scale_trils, scale_trils.transpose(2,3)) covars_expand = covars.unsqueeze(1).expand(batch_size, Msquare.shape[1], Msquare.shape[2], d, d) poscovar = torch.sum(Msquare[:,:,:,None,None]*covars_expand, dim=2) posmeans = torch.matmul(M, means) initial_points = targets[:,0].unsqueeze(1) final_points = (dt[:,None]*ego_velocities[:,0]).unsqueeze(1) deltas = final_points - initial_points ds = torch.linspace(0.0,1.0,steps=means.shape[1]) straight_lines = torch.cat([initial_points + t.item()*deltas for t in ds], dim=1) priorscaletril = torch.diag_embed(torch.ones_like(straight_lines)) priorcurves = D.MultivariateNormal(controlpoints_fit, scale_tril=priorscaletril, validate_args=False) distcurves = D.MultivariateNormal(means, scale_tril=scale_trils, validate_args=False) distpos = D.MultivariateNormal(posmeans, covariance_matrix=poscovar, validate_args=False) position_error = positionerror(posmeans, targets) log_probs = distpos.log_prob(ego_positions) NLL = torch.mean(-log_probs) kl_divergences = D.kl_divergence(distcurves, priorcurves) mean_kl = torch.mean(kl_divergences) if debug and plot: fig, (ax1, ax2) = plt.subplots(1, 2, sharey=False) print("position_error: %f" % position_error.item() ) images_np = np.round(255.0*input_images[0].detach().cpu().numpy().copy().transpose(0,2,3,1)).astype(np.uint8) #image_np_transpose=skimage.util.img_as_ubyte(images_np[-1].transpose(1,2,0)) # oap = other_agent_positions[other_agent_positions==other_agent_positions].view(1,-1,60,2) # print(oap) ims = [] for i in range(images_np.shape[0]): ims.append([ax1.imshow(images_np[i])]) ani = animation.ArtistAnimation(fig, ims, interval=250, blit=True, repeat=True) fit_points = torch.matmul(M, controlpoints_fit) prior_points = torch.matmul(M, straight_lines) # gt_points_np = ego_positions[0].detach().cpu().numpy().copy() gt_points_np = targets[0].detach().cpu().numpy().copy() pred_points_np = posmeans[0].detach().cpu().numpy().copy() pred_control_points_np = means[0].detach().cpu().numpy().copy() fit_points_np = fit_points[0].cpu().numpy().copy() fit_control_points_np = controlpoints_fit[0].cpu().numpy().copy() prior_points_np = prior_points[0].cpu().numpy().copy() prior_control_points_np = straight_lines[0].cpu().numpy().copy() ymin = np.min(np.hstack([gt_points_np[:,1], pred_points_np[:,1] ])) - 2.5 ymax = np.max(np.hstack([gt_points_np[:,1], pred_points_np[:,1] ])) + 2.5 xmin = np.min(np.hstack([gt_points_np[:,0], fit_points_np[:,0] ])) - 2.5 xmax = np.max(np.hstack([gt_points_np[:,0], fit_points_np[:,0] ])) ax2.set_xlim(xmax,xmin) ax2.set_ylim(ymin,ymax) ax2.plot(gt_points_np[:,0],gt_points_np[:,1],'g+', label="Ground Truth Waypoints") ax2.plot(pred_points_np[:,0],pred_points_np[:,1],'r-', label="Predicted Bézier Curve") ax2.plot(prior_points_np[:,0],prior_points_np[:,1], label="Prior") # ax2.plot(fit_points_np[:,0],fit_points_np[:,1],'b-', label="Best-fit Bézier Curve") #ax2.scatter(fit_control_points_np[1:,0],fit_control_points_np[1:,1],c="b", label="Bézier Curve's Control Points") # ax2.plot(pred_points_np[:,1],pred_points_np[:,0],'r-', label="Predicted Bézier Curve") # ax2.scatter(pred_control_points_np[:,1],pred_control_points_np[:,0], c='r', label="Predicted Bézier Curve's Control Points") plt.legend() plt.show() # loss = position_error loss = loss_weights["position"]*position_error + loss_weights["nll"]*NLL + loss_weights["kl_divergence"]*mean_kl #loss = loss_weights["position"]*position_error optimizer.zero_grad() loss.backward() # Weight and bias updates. optimizer.step() # logging information current_position_loss_float = float(position_error.item()) num_samples += 1.0 if not debug: experiment.log_metric("current_position_loss", current_position_loss_float) experiment.log_metric("logprob", NLL.item()) experiment.log_metric("kl_divergence", mean_kl.item()) if use_tqdm: t.set_postfix({"current_position_loss" : current_position_loss_float})
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data if grad.is_sparse: raise RuntimeError( 'cosangulargrad does not support sparse gradients, please consider SparseAdam instead' ) state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 # Exponential moving average of gradient values state['exp_avg'] = torch.zeros_like(p.data) # Exponential moving average of squared gradient values state['exp_avg_sq'] = torch.zeros_like(p.data) # Previous gradient state['previous_grad'] = torch.zeros_like(p.data) # temporary minimum value for comparison state['min'] = torch.zeros_like(p.data) # temporary difference between gradients for comparison state['diff'] = torch.zeros_like(p.data) # final cos value to be used state['final_cos_theta'] = torch.zeros_like(p.data) exp_avg, exp_avg_sq, previous_grad, min, diff, final_cos_theta = state['exp_avg'], state['exp_avg_sq'], \ state['previous_grad'], state['min'], \ state['diff'], state['final_cos_theta'] beta1, beta2 = group['betas'] state['step'] += 1 if group['weight_decay'] != 0: grad.add_(group['weight_decay'], p.data) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) denom = exp_avg_sq.sqrt().add_(group['eps']) bias_correction1 = 1 - beta1**state['step'] bias_correction2 = 1 - beta2**state['step'] tan_theta = abs( (previous_grad - grad) / (1 + previous_grad * grad)) cos_theta = 1 / torch.sqrt(1 + torch.square(tan_theta)) angle = torch.atan(tan_theta) * (180 / 3.141592653589793238) ans = torch.gt(angle, min) ans1, count = torch.unique(ans, return_counts=True) try: if (count[1] < count[0]): min = angle diff = abs(previous_grad - grad) final_cos_theta = cos_theta.clone() except: if (ans1[0] == "False"): min = angle diff = abs(previous_grad - grad) final_cos_theta = cos_theta.clone() angular_coeff = torch.tanh( abs(final_cos_theta )) * 0.5 + 0.5 # Calculating Angular coefficient state['previous_grad'] = grad.clone() state['min'] = min.clone() state['diff'] = diff.clone() state['final_cos_theta'] = final_cos_theta.clone() # update momentum with angular_coeff exp_avg1 = exp_avg * angular_coeff step_size = group['lr'] * math.sqrt( bias_correction2) / bias_correction1 p.data.addcdiv_(-step_size, exp_avg1, denom) return loss
def __call__(self, preds, targets): _check_same_shape(preds, targets) self.sum_squared_error += torch.sum( torch.square(torch.sub(preds, targets))) self.total += targets.numel()
def calculate_loss(net, x, y_targ): y = net(x) return y, tr.sum(tr.square(tr.sub(y, y_targ)))
def forward(self, x): square = torch.square(x) #(x,y) -> (x^2, y^2) prod = torch.prod(x, 3).unsqueeze(3) #(x, y) -> x*y output = torch.cat((x, square, prod), 3) return output
def truth(x): # paraboloid return torch.square(x).sum(dim=1)
def forward(self, x): square = torch.square(x) prod = torch.prod(x, 2).unsqueeze(2) output = torch.cat((square, prod), 2) #output is: (x,y) -> (x^2, y^2, xy) return output
def forward(self, input): weight_centered = self.weight - torch.mean(self.weight, dim=1, keepdim=True) frob = torch.sqrt(torch.sum(torch.square(weight_centered), dim=[0,1], keepdim=True)*(1/self.dmin)) weight_frob = (weight_centered / frob) * self.scale return F.linear(input, weight_frob, self.bias)
def forward(self, preds: Tensor, target: Tensor) -> Tensor: return torch.mean(torch.square(preds - target))
def train(modelname, dataset, lr=.5 * 0.001, logging=False, epochs=100, show_ROC=False, saveResults=False, optimThreshould=False, filterFraction=0, savePrediction=False): print() Adj_norm, Adj, X, labels = get_data(dataset) model = get_model(modelname, dataset, Adj_norm) optimizer = optim.Adam(model.parameters(), lr=lr) loss_list = [] iterable = range(epochs) if logging == False: iterable = tqdm(range(epochs), desc=dataset) for i in iterable: optimizer.zero_grad() Att, A = model(X) feat_loss = torch.sqrt(torch.sum(torch.square(Att - X))) struct_loss = torch.sqrt(torch.sum(torch.square(A - Adj))) loss = torch.add(torch.mul(feat_loss, 0.5), torch.mul(struct_loss, 0.5)) loss.backward() optimizer.step() l = (model(X)) loss_list.append(loss.item()) if logging: if i > 3: if loss_list[-1] == loss_list[-2] and loss_list[ -2] == loss_list[-3]: print("\n") print("Epoch : ", i, " Loss: =", loss.item(), "Struct_loss = ", struct_loss.item(), "Feature_loss = ", feat_loss.item()) break if i % 5 == 0: print("Epoch : ", i, " Loss: =", loss.item(), "Struct_loss = ", struct_loss.item(), "Feature_loss = ", feat_loss.item()) else: pass with torch.no_grad(): feat_loss = torch.square(model(X)[0] - X) fl = [] for i in feat_loss: fl.append(torch.sqrt(torch.sum(i))) struct_loss = torch.square(model(X)[1] - Adj) sl = [] for i in struct_loss: sl.append(torch.sqrt(torch.sum(i))) diff = [] for i in range(len(feat_loss)): diff.append((fl[i] + sl[i]) / 2) fpr, tpr, thresholds = metrics.roc_curve(labels, diff, pos_label=None, drop_intermediate=False) threshold = thresholds[0] if optimThreshould: threshold = thresholds[0] min_d = 10 th_ind = 0 for i in range(len(fpr)): distance = math.sqrt((1 - fpr[i])**2 + (tpr[i])**2) if distance < min_d: min_d = distance th_ind = i threshold = thresholds[i] # print("Optimum threshould: "+str(threshold)) # print("threshould percentile:",th_ind*100.0/len(thresholds)) filterResult = [] if filterFraction != 0: th_ind = (filterFraction) * len(fpr) filterThreshould = thresholds[int(th_ind)] filtered_prediction = [] for loss in diff: if loss > filterThreshould: filtered_prediction.append(1) else: filtered_prediction.append(0) prfs = precision_recall_fscore_support(np.reshape(labels, (-1)), filtered_prediction) filterResult.append( str(accuracy_score(np.reshape(labels, (-1)), filtered_prediction))) print("Total Nodes:", len(filtered_prediction)) print("Labeled anomalies:", sum(labels)) print("Anomalies predicted:", sum(filtered_prediction)) print("prediction Accuracy: ", accuracy_score(np.reshape(labels, (-1)), filtered_prediction)) print("precision", prfs[0]) print("recall", prfs[1]) if savePrediction: with open('output.txt', 'w') as filehandle: for listitem in filtered_prediction: filehandle.write('%s\n' % listitem) auc_score = roc_auc_score(labels, diff) print(dataset + " AUC score : ", auc_score) if saveResults: file_path = "Results/" + dataset + "/results.json" data = {} data[modelname] = {"auc_score": 0.0, "model_summary": "fake summary"} if not (path.exists(file_path) and path.isfile(file_path)): f = open(file_path, 'x') else: f = open(file_path) try: data1 = json.load(f) data = data1 except: print("Issues in reading json file") f.close() f = open(file_path, 'w') if modelname not in data.keys(): data[modelname] = { "auc_score": 0.0, "model_summary": "fake summary" } data[modelname]["auc_score"] = auc_score if optimThreshould: data[modelname]['threshold'] = str(threshold) if filterFraction != 0: data[modelname]['accuracy'] = str(filterResult[0]) model_summary = summary(model, X, device='cpu', verbose=0) data[modelname]["model_summary"] = str(model_summary) f.write(json.dumps(data)) f.close() fig = plt.figure() plt.title(dataset + " AUC_score=" + str(auc_score)) plt.plot(fpr, tpr) if show_ROC: plt.show() if saveResults: fig.savefig('./Results' + '/' + dataset + '/roc.png') plt.close() return auc_score
def __init__(self, n_tasks: int, n_features: int, layer_sizes: Sequence[int] = [1000], weight_init_stddevs: OneOrMany[float] = 0.02, bias_init_consts: OneOrMany[float] = 1.0, weight_decay_penalty: float = 0.0, weight_decay_penalty_type: str = 'l2', dropouts: OneOrMany[float] = 0.5, activation_fns: OneOrMany[ActivationFn] = 'relu', uncertainty: bool = False, residual: bool = False, **kwargs) -> None: """Create a MultitaskRegressor. In addition to the following arguments, this class also accepts all the keywork arguments from TensorGraph. Parameters ---------- n_tasks: int number of tasks n_features: int number of features layer_sizes: list the size of each dense layer in the network. The length of this list determines the number of layers. weight_init_stddevs: list or float the standard deviation of the distribution to use for weight initialization of each layer. The length of this list should equal len(layer_sizes)+1. The final element corresponds to the output layer. Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. bias_init_consts: list or float the value to initialize the biases in each layer to. The length of this list should equal len(layer_sizes)+1. The final element corresponds to the output layer. Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. weight_decay_penalty: float the magnitude of the weight decay penalty to use weight_decay_penalty_type: str the type of penalty to use for weight decay, either 'l1' or 'l2' dropouts: list or float the dropout probablity to use for each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. activation_fns: list or object the PyTorch activation function to apply to each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. Standard activation functions from torch.nn.functional can be specified by name. uncertainty: bool if True, include extra outputs and loss terms to enable the uncertainty in outputs to be predicted residual: bool if True, the model will be composed of pre-activation residual blocks instead of a simple stack of dense layers. """ self.n_tasks = n_tasks self.n_features = n_features n_layers = len(layer_sizes) if not isinstance(weight_init_stddevs, SequenceCollection): weight_init_stddevs = [weight_init_stddevs] * (n_layers + 1) if not isinstance(bias_init_consts, SequenceCollection): bias_init_consts = [bias_init_consts] * (n_layers + 1) if not isinstance(dropouts, SequenceCollection): dropouts = [dropouts] * n_layers if isinstance(activation_fns, str) or not isinstance(activation_fns, SequenceCollection): activation_fns = [activation_fns] * n_layers activation_fns = [get_activation(f) for f in activation_fns] if uncertainty: if any(d == 0.0 for d in dropouts): raise ValueError( 'Dropout must be included in every layer to predict uncertainty') # Define the PyTorch Module that implements the model. class PytorchImpl(torch.nn.Module): def __init__(self): super(PytorchImpl, self).__init__() self.layers = torch.nn.ModuleList() prev_size = n_features for size, weight_stddev, bias_const in zip( layer_sizes, weight_init_stddevs, bias_init_consts): layer = torch.nn.Linear(prev_size, size) torch.nn.init.normal_(layer.weight, 0, weight_stddev) torch.nn.init.constant_(layer.bias, bias_const) self.layers.append(layer) prev_size = size self.output_layer = torch.nn.Linear(prev_size, n_tasks) torch.nn.init.normal_(self.output_layer.weight, 0, weight_init_stddevs[-1]) torch.nn.init.constant_(self.output_layer.bias, bias_init_consts[-1]) self.uncertainty_layer = torch.nn.Linear(prev_size, n_tasks) torch.nn.init.normal_(self.output_layer.weight, 0, weight_init_stddevs[-1]) torch.nn.init.constant_(self.output_layer.bias, 0) def forward(self, inputs): x, dropout_switch = inputs prev_size = n_features next_activation = None for size, layer, dropout, activation_fn, in zip( layer_sizes, self.layers, dropouts, activation_fns): y = x if next_activation is not None: y = next_activation(x) y = layer(y) if dropout > 0.0 and dropout_switch: y = F.dropout(y, dropout) if residual and prev_size == size: y = x + y x = y prev_size = size next_activation = activation_fn if next_activation is not None: y = next_activation(y) neural_fingerprint = y output = torch.reshape(self.output_layer(y), (-1, n_tasks, 1)) if uncertainty: log_var = torch.reshape(self.uncertainty_layer(y), (-1, n_tasks, 1)) var = torch.exp(log_var) return (output, var, output, log_var, neural_fingerprint) else: return (output, neural_fingerprint) model = PytorchImpl() regularization_loss: Optional[Callable] if weight_decay_penalty != 0: weights = [layer.weight for layer in model.layers] if weight_decay_penalty_type == 'l1': regularization_loss = lambda: weight_decay_penalty * torch.sum(torch.stack([torch.abs(w).sum() for w in weights])) else: regularization_loss = lambda: weight_decay_penalty * torch.sum(torch.stack([torch.square(w).sum() for w in weights])) else: regularization_loss = None loss: Union[dc.models.losses.Loss, LossFn] if uncertainty: output_types = ['prediction', 'variance', 'loss', 'loss', 'embedding'] def loss(outputs, labels, weights): output, labels = _make_pytorch_shapes_consistent(outputs[0], labels[0]) diff = labels - output losses = diff * diff / torch.exp(outputs[1]) + outputs[1] w = weights[0] if len(w.shape) < len(losses.shape): if isinstance(w, torch.Tensor): shape = tuple(w.shape) else: shape = w.shape shape = tuple(-1 if x is None else x for x in shape) w = w.reshape(shape + (1,) * (len(losses.shape) - len(w.shape))) loss = losses * w loss = loss.mean() if regularization_loss is not None: loss += regularization_loss() return loss else: output_types = ['prediction', 'embedding'] loss = dc.models.losses.L2Loss() super(MultitaskRegressor, self).__init__( model, loss, output_types=output_types, regularization_loss=regularization_loss, **kwargs)
def _build_fused(self, hp): n_input = hp['n_input'] n_rnn = hp['n_rnn'] n_output = hp['n_output'] # Activation functions if hp['activation'] == 'power': f_act = lambda x: torch.square(F.relu(x)) elif hp['activation'] == 'retanh': f_act = lambda x: torch.tanh(F.relu(x)) elif hp['activation'] == 'relu+': f_act = lambda x: nn.relu(x + init.constant_(1.)) else: f_act = getattr(F, hp['activation']) # Recurrent activity if hp['rnn_type'] == 'LeakyRNN': n_in_rnn = self.x.get_shape().as_list()[-1] cell = LeakyRNNCell(n_rnn, n_in_rnn, hp['alpha'], sigma_rec=hp['sigma_rec'], activation=hp['activation'], w_rec_init=hp['w_rec_init'], rng=self.rng) elif hp['rnn_type'] == 'LeakyGRU': cell = LeakyGRUCell(n_rnn, hp['alpha'], sigma_rec=hp['sigma_rec'], activation=f_act) elif hp['rnn_type'] == 'LSTM': cell = tf.contrib.rnn.LSTMCell(n_rnn, activation=f_act) elif hp['rnn_type'] == 'GRU': cell = tf.contrib.rnn.GRUCell(n_rnn, activation=f_act) else: raise NotImplementedError("""rnn_type must be one of LeakyRNN, LeakyGRU, EILeakyGRU, LSTM, GRU """) # Dynamic rnn with time major self.h, states = rnn.dynamic_rnn(cell, self.x, dtype=torch.float32, time_major=True) # Output with tf.variable_scope("output"): # Using default initialization `glorot_uniform_initializer` w_out = tf.get_variable('weights', [n_rnn, n_output], dtype=tf.float32) b_out = tf.get_variable('biases', [n_output], dtype=tf.float32, initializer=tf.constant_initializer( 0.0, dtype=tf.float32)) h_shaped = tf.reshape(self.h, (-1, n_rnn)) y_shaped = tf.reshape(self.y, (-1, n_output)) # y_hat_ shape (n_time*n_batch, n_unit) y_hat_ = tf.matmul(h_shaped, w_out) + b_out if hp['loss_type'] == 'lsq': # Least-square loss y_hat = tf.sigmoid(y_hat_) self.cost_lsq = tf.reduce_mean( tf.square((y_shaped - y_hat) * self.c_mask)) else: y_hat = tf.nn.softmax(y_hat_) # Cross-entropy loss self.cost_lsq = tf.reduce_mean( self.c_mask * tf.nn.softmax_cross_entropy_with_logits( labels=y_shaped, logits=y_hat_)) self.y_hat = tf.reshape(y_hat, (-1, tf.shape(self.h)[1], n_output)) y_hat_fix, y_hat_ring = tf.split(self.y_hat, [1, n_output - 1], axis=-1) self.y_hat_loc = tf_popvec(y_hat_ring)
def l2_normalize(self, feat): norm = torch.sqrt(torch.sum(torch.square(feat), dim=1)) return torch.div(feat.t(), norm.t()).t()