def accGradParameters(self, input, gradOutput, scale=1): self.network.accGradParameters([input, self.partition], gradOutput, scale) if self.bias is not None: if self.buffer is None: self.buffer = input.new() self.buffer.resize_(gradOutput.size(1)) torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale) self.gradBias.index_add_( 1, self.partition, self.buffer.view(1, self.buffer.nelement()) )
def _update_u_v(self): u = getattr(self.module, self.name + "_u") v = getattr(self.module, self.name + "_v") w = getattr(self.module, self.name + "_bar") height = w.data.shape[0] for _ in range(self.power_iterations): v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data)) u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data)) # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data)) sigma = u.dot(w.view(height, -1).mv(v)) setattr(self.module, self.name, w / sigma.expand_as(w))
def updateOutput(self, input): M, v = input assert M.ndimension() == 2 or M.ndimension() == 3 if M.ndimension() == 2: assert v.ndimension() == 1 if self.trans: M = M.transpose(0, 1) self.output.resize_(M.size(0)) torch.mv(M, v, out=self.output) else: assert v.ndimension() == 2 if self.trans: M = M.transpose(1, 2) self.output.resize_(M.size(0), M.size(1), 1) torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1)) return self.output
def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source)
def gmm_batch_model(data): p = pyro.param("p", Variable(torch.Tensor([0.3]), requires_grad=True)) p = torch.cat([p, 1 - p]) sigma = pyro.param("sigma", Variable(torch.Tensor([1.0]), requires_grad=True)) mus = Variable(torch.Tensor([-1, 1])) with pyro.iarange("data", len(data)) as batch: n = len(batch) z = pyro.sample("z", dist.Categorical(p.unsqueeze(0).expand(n, 2))) assert z.size() == (n, 2) mu = torch.mv(z, mus) pyro.observe("x", dist.Normal(mu, sigma.expand(n)), data[batch])
def predict(self, model, xtest): xtest = torch.Tensor(xtest).float() phi_x = self.get_features(model, xtest) # make sure to run once the optimizing step before making predictions. assert self.K is not None \ and self.K_inv is not None \ and self.m is not None \ and self.alpha is not None \ and self.beta is not None mu = torch.mv(phi_x, self.m) s2 = torch.mul(phi_x.t(), torch.mm(self.K_inv, phi_x.t())).sum(0).add(1 / self.beta) return mu, np.clip(s2, a_min=1e-5, a_max=np.inf)
def _scramble(self): g: Optional[torch.Generator] = None if self.seed is not None: g = torch.Generator() g.manual_seed(self.seed) cpu = torch.device("cpu") # Generate shift vector shift_ints = torch.randint(2, (self.dimension, self.MAXBIT), device=cpu, generator=g) self.shift = torch.mv(shift_ints, torch.pow(2, torch.arange(0, self.MAXBIT, device=cpu))) # Generate lower triangular matrices (stacked across dimensions) ltm_dims = (self.dimension, self.MAXBIT, self.MAXBIT) ltm = torch.randint(2, ltm_dims, device=cpu, generator=g).tril() torch._sobol_engine_scramble_(self.sobolstate, ltm, self.dimension)
def log_lh(rbm, spins, L, nsamples=10000): log_Z = log_Zeta(rbm, L) log_p_star = 0 _, h = rbm.sample_h(spins) for j in range(min(nsamples, len(spins))): try: pre_log = log_p_star log_p_star += torch.dot(rbm.b, spins[j]) / nsamples + torch.sum( torch.log(1 + torch.exp(rbm.c + torch.mv(rbm.W, spins[j])))) / nsamples # if j % 1000==0: print("In cycle ",j," with free_energy_log: ", log_p_star ) except: # print("Error in: ",j," cycle.") return 0 return log_p_star - log_Z
def prs_model(beta_hat, obs_error): z = pyro.sample( 'z', dist.Independent(dist.Bernoulli(torch.tensor([p_causal]*N)), 1) ) beta = pyro.sample( 'beta_latent', dist.Independent(dist.Normal(GENETIC_MEAN, GENETIC_SD), 1) ) beta_hat = pyro.sample( 'beta_hat', dist.MultivariateNormal(torch.mv(obs_error, beta*z), covariance_matrix=obs_error*sigma_sq_e), obs=beta_hat ) return beta_hat
def calc_linearized_pairwise_ranking_loss(last_layer, pairwise_prefs, demo_cnts, confidence=1): '''use (i,j) indices and precomputed feature counts to do faster pairwise ranking loss''' device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assume that we are on a CUDA machine, then this should print a CUDA device: #print(device) #don't need any gradients with torch.no_grad(): #do matrix multiply with last layer of network and the demo_cnts #print(list(reward_net.fc2.parameters())) linear = last_layer.weight.data #not using bias #print(linear) #print(bias) weights = linear.squeeze( ) #append bias and weights from last fc layer together #print('weights',weights) #print('demo_cnts', demo_cnts) demo_returns = confidence * torch.mv(demo_cnts, weights) #positivity prior if demo_returns[0] < 0.0: return torch.Tensor([-float("Inf")]) loss_criterion = nn.CrossEntropyLoss(reduction='sum') #sum up losses cum_log_likelihood = 0.0 outputs = torch.zeros(len(pairwise_prefs), 2) #each row is a new pair of returns for p, ppref in enumerate(pairwise_prefs): i, j = ppref outputs[p, :] = torch.tensor([demo_returns[i], demo_returns[j]]) labels = torch.ones(len(pairwise_prefs)).long() #outputs = outputs.unsqueeze(0) #print(outputs) #print(labels) cum_log_likelihood = -loss_criterion(outputs, labels) #if labels == 0: # log_likelihood = torch.log(return_i/(return_i + return_j)) #else: # log_likelihood = torch.log(return_j/(return_i + return_j)) #print("ll",log_likelihood) #cum_log_likelihood += log_likelihood return cum_log_likelihood
def newton_grad(loss, model): loss.backward(retain_graph=True) gs = model.get_param_g() grad = torch.zeros(len(gs)) for i, g in enumerate(gs): grad[i] = g.grad.data[0] hessian = torch.zeros(len(gs), len(gs)) # compute second order grad dl_dgs = torch.autograd.grad(loss, gs, create_graph=True) for i, dl_dg in enumerate(dl_dgs): ddg = torch.autograd.grad(dl_dg, gs, retain_graph=True) hessian[i][i] = ddg[i].data[0] invh_grad = torch.mv(torch.inverse(hessian), grad) for i, g in enumerate(gs): g.grad.data[0] = invh_grad[i] nn.utils.clip_grad_norm(gs, 1)
def mv(self, vs): vs_dict = vs.get_dict_representation() out_dict = dict() for layer_id, layer in self.generator.layer_collection.layers.items(): v = vs_dict[layer_id][0].view(-1) if layer.bias is not None: v = torch.cat([v, vs_dict[layer_id][1].view(-1)]) mv = torch.mv(self.data[layer_id], v) mv_tuple = (mv[:layer.weight.numel()].view(*layer.weight.size), ) if layer.bias is not None: mv_tuple = ( mv_tuple[0], mv[layer.weight.numel():].view(*layer.bias.size), ) out_dict[layer_id] = mv_tuple return PVector(layer_collection=vs.layer_collection, dict_repr=out_dict)
def test_torch_ista(seeds, adj, size, alpha, rho, iters): out = torch.empty(0) # Compute degree vectors/matrices d = np.asarray(adj.sum(axis=-1)).squeeze() d_sqrt = np.sqrt(d) dn_sqrt = 1 / d_sqrt D = sparse.diags(d) Dn_sqrt = sparse.diags(dn_sqrt) # Normalized adjacency matrix Q = D - ((1 - alpha) / 2) * (D + adj) Q = Dn_sqrt @ Q @ Dn_sqrt # Convert numpy float64 data to torch float32 tensor Q = spy_sparse2torch_sparse(Q) d_sqrt = torch.from_numpy(d_sqrt).float() dn_sqrt = torch.from_numpy(dn_sqrt).float() zero = torch.zeros(size) if args.hammerblade: Q = Q.hammerblade() d_sqrt = d_sqrt.hammerblade() dn_sqrt = dn_sqrt.hammerblade() zero = zero.hammerblade() for seed in tqdm(seeds): s = np.zeros(adj.shape[0]) s[seed] = 1 s = torch.from_numpy(s).float() if args.hammerblade: s = s.hammerblade() q = zero rad = rho * alpha * d_sqrt grad0 = -alpha * dn_sqrt * s grad = grad0.clone() for _ in range(iters): q = torch.max(q - grad - rad, zero) temp = torch.mv(Q, q) grad = grad + temp temp = torch.mul(q, d_sqrt).cpu() temp = temp.view(size, 1) out = torch.cat((out, temp), 1) return out
def resampling(self, weights, particles): n = self.num_particles x = torch.zeros([n, self.dim], dtype=torch.float) weights = weights.view(-1) gram = self.gram_matrix(particles, particles) weight_gram = torch.mv(gram, weights) ind = torch.argmax(weight_gram) x[0] = particles[ind] for p in range(1, n): x_t = x[0:p] gram = self.gram_matrix(particles, x_t) gram = torch.sum(gram, 1) ind = torch.argmax(weight_gram - gram) x[p] = particles[ind] return x
def forward(self, *input): """ The forward pass Args: - input: The input from the previous layer Returns: - forward_pass: The output of the function """ # Input from the layer self.input_from_layer = input[0] # Calculating the output, which is basically the multiplication # of the weights with the input layer and adding the bias self.forward_pass = torch.mv(self.weights, self.input_from_layer) + self.bias return self.forward_pass
def update_inv_jacobian_approx(B, deltaZ, deltaG): """ This is eq 10 in the paper :param B: nxn inv jacobian approx :param deltaZ: n vector :param deltaG: n vector """ Bdg = torch.mv(B, deltaG) # nx1 rational = deltaZ - Bdg rational = rational / (torch.dot(deltaZ, Bdg) + 1e-10) deltaZT = torch.unsqueeze(deltaZ, 0) # 1xn notrational = torch.matmul(deltaZT, B) # 1xn rational = torch.unsqueeze(rational, 1) # nx1 update = torch.matmul(rational, notrational) nextB = B + update # n x n return nextB
def oracle(self, w, x, y): self._validate_inputs(w, x, y) # TODO: Compute objective value obj = self.task_error( w, x, y) + 0.5 * self.hparams.mu * torch.sum(torch.abs(w)) # TODO: compute subgradient du = torch.zeros(w.size()) du[w > 0] = 1 du[w < 0] = -1 inter_mat = torch.mm(x, w).squeeze(-1) - y dw = 2 / self.hparams.n_samples * torch.mv(x.t(), inter_mat) dw = dw.unsqueeze(-1) dw += 0.5 * self.hparams.mu * du return {'obj': obj, 'dw': dw}
def step(self, g, y, dy): g = g.reshape(-1, 1)[:, 0] z = dy * self.tau dt = self.dt for _ in range(self.l): dx = (-self.a_x * self.x) / self.tau self.x = self.x + dx * dt psi = torch.exp(-self.h * torch.pow((self.x - self.c), 2)) fx = torch.mv(self.w, psi) * self.x * (g - self.y0) / torch.sum(psi) dz = self.a_z * (self.b_z * (g - y) - z) + fx dy = z dz = dz / self.tau dy = dy / self.tau y = y + dy * dt z = z + dz * dt self.i += 1 return y, dy, dz
def forward(self, x, y): x_lookup, y_lookup, neg_lookup = self.prepare_inputs(x, y) image = self.get_image(x) image = self.cnn(image) image = image.view(-1) vI = self.fc1(image) vO = self.WO(y_lookup) # vI = self.WI(x_lookup) samples = self.WO(neg_lookup) # vI = self.alpha * vI + self.beta * image pos_score = F.logsigmoid(t.dot(vO, vI)) neg_score = F.logsigmoid(-t.mv(samples, vI)) loss = -pos_score - t.sum(neg_score) return loss
def _oscar_prox_jacobian(y_star, dout=None): y_star = y_star.numpy() dim = y_star.shape[0] J = torch.zeros(dim, dim) _, inv, counts = np.unique(np.abs(y_star), return_inverse=True, return_counts=True) for i in range(dim): for j in range(dim): if (inv[i] == inv[j] and y_star[i] != 0): J[i, j] = (np.sign(y_star[i]) * np.sign(y_star[j]) / counts[inv[i]]) if dout is not None: return torch.mv(J, dout) else: return J
def sample(self, x, parameters): """ Create some logistic regression data. *** NOTE: This ignores the precisions of each of the values of w, and simply assuming the true (unknown) weight is w; this is different to finding the predictive distribution!! *** :param x: input values to predict at :param parameters: model parameters (not will not update) :param hyperparameters: model hyperparameters (will also not update) :return: y: tensor of parameter labels """ w_nat_means = parameters["w_mu"] z = torch.mv(x, w_nat_means) p = self.act(z) output_dist = Bernoulli(p) y = output_dist.sample() return y
def dphi_dq(self, dV=None, mH=None): if mH == None: mH = self.mH(dV) gg = torch.dot(dV, dV) agg = self.metric.msoftabsalpha * gg t = float(numpy.tanh(agg)) out = torch.mv(mH, dV) if (abs(agg) < 1e-4): out = out * 2 * ( (len(dV) / (3 * gg)) * agg * agg + self.metric.msoftabsalpha * t) else: out = out * 2 ((len(dV) / gg) * (1 - agg / t) + self.metric.msoftabsalpha * t) out = out + dV return (out)
def test_jacobian_plowrank(): for get_task in nonlinear_tasks: loader, lc, parameters, model, function, n_output = get_task() model.train() generator = Jacobian(layer_collection=lc, model=model, loader=loader, function=function, n_output=n_output) PMat_lowrank = PMatLowRank(generator) dw = random_pvector(lc, device=device) dense_tensor = PMat_lowrank.get_dense_tensor() # Test get_diag check_tensors(torch.diag(dense_tensor), PMat_lowrank.get_diag(), eps=1e-4) # Test frobenius frob_PMat = PMat_lowrank.frobenius_norm() frob_direct = (dense_tensor**2).sum()**.5 check_ratio(frob_direct, frob_PMat) # Test trace trace_PMat = PMat_lowrank.trace() trace_direct = torch.trace(dense_tensor) check_ratio(trace_PMat, trace_direct) # Test mv mv_direct = torch.mv(dense_tensor, dw.get_flat_representation()) check_tensors(mv_direct, PMat_lowrank.mv(dw).get_flat_representation()) # Test vTMV check_ratio(torch.dot(mv_direct, dw.get_flat_representation()), PMat_lowrank.vTMv(dw)) # Test solve TODO # Test inv TODO # Test add, sub, rmul check_tensors(1.23 * PMat_lowrank.get_dense_tensor(), (1.23 * PMat_lowrank).get_dense_tensor())
def attr_level_matching(self, compare_result, word_embeddings_rnn, field_embedding, token_mask): ''' Get attribute level comparison result by aggregating token level comparison result. Field_embedding and word_embeddings_rnn are used to distinguish importance weights of different tokens ''' size = word_embeddings_rnn.size() word_embeddings_rnn = word_embeddings_rnn.view(size[0] * size[1], -1) attention = torch.mv(word_embeddings_rnn, field_embedding) attention = attention.view(size[0], 1, -1) attention = F.softmax(attention, dim=2) attention = attention.view(attention.size()[0], -1) attention = attention * token_mask.float() attention = attention.view(size[0], 1, -1) compare_att_sum = torch.bmm(attention, compare_result) compare_att_sum = compare_att_sum.view(compare_att_sum.size()[0], -1) return compare_att_sum
def step(self, step_input, Hidden_State, Cell_State, rHidden_State, rCell_State): # GC-LSTM x = step_input gc = self.gc_list[0](x) for i in range(1, self.K): gc = torch.cat((gc, self.gc_list[i](x)), 1) combined = torch.cat((gc, Hidden_State), 1) f = torch.sigmoid(self.fl(combined)) i = torch.sigmoid(self.il(combined)) o = torch.sigmoid(self.ol(combined)) C = torch.tanh(self.Cl(combined)) NC = torch.mul( Cell_State, torch.mv( Variable(self.A_list[-1].repeat(self.output_dim, self.output_dim), requires_grad=False).to(self.device), self.Neighbor_weight)) Cell_State = f * NC + i * C # [batch_size, out_features] Hidden_State = o * torch.tanh(Cell_State) # [batch_size, out_features] # LSTM rcombined = torch.cat((step_input, rHidden_State), 1) # [batch_size, in_features + out_features] # rfl: nn.Linear([in_features + out_features, out_features]) rf = torch.sigmoid(self.rfl(rcombined)) # [batch_size, out_features] ri = torch.sigmoid(self.ril(rcombined)) ro = torch.sigmoid(self.rol(rcombined)) rC = torch.tanh(self.rCl(rcombined)) rCell_State = rf * rCell_State + ri * rC # [batch_size, out_features] rHidden_State = ro * torch.tanh( rCell_State) # [batch_size, out_features] # Kalman Filtering var1, var2 = torch.var(step_input), torch.var(gc) pred = (Hidden_State * var1 * self.c + rHidden_State * var2) / \ (var1 + var2 * self.c) # [batch_size, out_features] return Hidden_State, Cell_State, gc, rHidden_State, rCell_State, pred
def oracle(self, w, x, y): self._validate_inputs(w, x, y) # regularization hyper-parameter mu = self.hparams.mu # Compute objective value obj = self.task_error(w, x, y) + 0.5 * mu * torch.square(torch.norm(w)) # compute close form solution n = x.size(0) n_features = x.size(1) intermediary = torch.inverse( torch.mm(x.transpose(0, 1), x) + 0.5 * n * mu * torch.eye(n_features)) sol = torch.mv(torch.mm(intermediary, x.transpose(0, 1)), y) sol = sol.view(n_features, 1) return {'obj': obj, 'sol': sol}
def forward(ctx, Mu, Var, beta, Y, H, U, isCuda): # print(Mu,Var,beta,Y,H,U) (N, T, B) = Var.size() (M, T, B) = Y.size() invVar = torch.reciprocal(Var) hMu = multip(H, Mu) # hMu=torch.bmm(H.repeat(B,1,1),Mu.permute(2,0,1)).permute(1,2,0) dU = U - Mu dY = Y - hMu logdetY = torch.FloatTensor([0]) invSigmaYdY = (torch.zeros(M, T, B)) Id = torch.eye(M) if isCuda: Id = Id.cuda() invSigmaYdY = invSigmaYdY.cuda() logdetY = logdetY.cuda() for j in range(T): for k in range(B): SigmaY = (1 / beta) * Id + torch.mm( (H * Var[:, j, k]), H.transpose(0, 1)) logdetY = logdetY + torch.sum( torch.log(torch.eig( SigmaY, eigenvectors=False)[0][:, 0])) # Reached here invSigmaY = torch.inverse(SigmaY) invSigmaYdY[:, j, k] = torch.mv(invSigmaY, dY[:, j, k]) invVardU = torch.mul(invVar, dU) #print('Var is: ', Var) #print('logdetY:', logdetY) ctx.save_for_backward(Mu, Var, beta, Y, H, U) cost2 = logdetY + torch.sum(torch.mul(dY, invSigmaYdY)) - torch.sum( torch.log(Var)) - torch.sum(torch.mul(dU, invVardU)) logtwopi = torch.log(torch.FloatTensor([2 * 3.14])) if isCuda: logtwopi = logtwopi.cuda() cost1 = M * T * B * torch.log( beta) - N * T * B * logtwopi - beta * torch.sum( (Y - multip(H, U)).pow(2)) cost = 0.5 * (cost1 + cost2) return cost / (T * N * B)
def calc_values(self, model, xtrain, ytrain, alpha, beta): xtrain = torch.Tensor(xtrain).float().view([-1, xtrain.shape[1]]) ytrain = torch.Tensor(ytrain).float().view([-1, 1]) # Updates the basis matrices and stores them for later use # (like predicting) self.phi_train = self.get_features(model, xtrain) self.D = self.phi_train.shape[1] # self.K = torch.addmm(alpha ** 2, torch.eye(self.D), beta, self.phi_train.t(), self.phi_train) self.K = torch.addmm(alpha, torch.eye(self.D), beta, self.phi_train.t(), self.phi_train) self.K_inv = torch.inverse(self.K) self.m = torch.mv(torch.mm(self.K_inv, self.phi_train.t()), ytrain.view((-1, ))).mul(beta) self.alpha = alpha self.beta = beta
def extendXYScale(self, xScale=None, yScale=None): """Extend scale we mean we extend this model such that if input removes this scaler, result does not change We do it by modifying weights. Although I do think linear layer should also work. """ state_dct = self.state_dict() nlyr = len(self.layers) # so I just use 0 and this number - 1 if xScale is not None: w0 = state_dct['main.0.weight'] b0 = state_dct['main.0.bias'] xmean, xstd = xScale b0 -= torch.mv(w0, torch.from_numpy(np.squeeze(xmean/xstd, axis=0)).float()) w0 /= torch.from_numpy(xstd).float() if yScale is not None: wf = state_dct['main.%d.weight' % (nlyr - 1)] bf = state_dct['main.%d.bias' % (nlyr - 1)] ymean, ystd = yScale wf *= torch.from_numpy(ystd.T).float() bf *= torch.from_numpy(np.squeeze(ystd, axis=0)).float() bf += torch.from_numpy(np.squeeze(ymean, axis=0)).float()
def forward(self, query_src, query_lengths, res_src, res_lengths, state=None): _, batch = query_src.size() # encoder query and res query_encoder_final, query_memory_bank = self.encoder(query_src, lengths=query_lengths) res_encoder_final, res_memory_bank = self.encoder(res_src, lengths=res_lengths) if not isinstance(query_encoder_final, tuple): query_encoder_hidden = (query_encoder_final, ) res_encoder_final = (res_encoder_final, ) else: query_encoder_hidden = query_encoder_final res_encoder_hidden = res_encoder_final query_encoder_hidden = query_enocder_hidden[0].contiguous().view(batch_size, -1) res_encoder_hidden = res_encoder_hidden[0].contiguous().view(batch_size, -1) # get extended features extended_feats = [] if self.substract_flag: substract_feat = query_encoder_hidden - res_encoder_hidden extended_feats.append(substract_feat) if self.dot_flag: dot_feat = torch.mul(query_encoder_hidden, res_encoder_hidden) extended_feats.append(dot_feat) if self.bilinear_flag: bilinear_feat = self.bilinear(query_encoder_hidden, res_encoder_hidden) extended_feats.append(bilinear_feat) if self.inner_prod_flag: inner_prob_feat = torch.bmm(query_encoder_hidden.unsqueeze(1), res_encoder_hidden.unsqueeze(2)) extended_feats.append(inner_prob_feat.squeeze(-1)) # concatenate features combine_feats = torch.cat(extended_feats, dim=-1) if self.score_fn_type == 'MLP': outputs = self.score_fn(combine_feats) elif self.score_fn_type == 'LR': outputs = torch.mv(combine_feats, self.score_fn) + self.bias else: raise ValueError("{} is not valid for SingleArch, ".format(self.score_fn_type)) return outputs.squeeze(), query_encoder_final
def __init__(self): # 元々の重み original_weight = torch.Tensor([1, 2, 3]) # データ形式: torch.Size([3]) # Xのデータ準備 # データ形式: torch.Size([100, 3]) X = torch.cat([torch.ones(100, 1), torch.randn(100, 2)], 1) # データと重みの内積を計算する。 dot = torch.mv(X, original_weight) # 内積に乱数を足してyとする。 # データ形式: torch.Size([100]) y = dot + torch.randn(100) * 0.5 self.X = X self.y = y self.original_weight = original_weight
def energy(self, V, tuned=False): """ Calculate F(V) in exp(-F(V))/Z which gives the probability of vector V. The output is not the real probability since we dont know the partition function but we can use it to compare between data. Lower value means better. Don't @ me. """ if tuned: v_b = self.visible_bias_g h_b = self.hidden_bias_r w = self.w_r else: v_b = self.visible_bias h_b = self.hidden_bias w = self.w X = torch.mm(V,w) + h_b F = - torch.mv(V,v_b) - torch.sum(torch.log(1 + torch.exp(X)),dim=-1) return torch.mean(F)
def fit(self, path): """ Fit the model using analytic solution. """ fp = FingerprintsDataset(path) loader = DataLoader(dataset=fp, batch_size=1, collate_fn=fingerprints_collate_fn) X, y = self.prepare_data(loader) A = torch.inverse(torch.mm(X.t(), X)) beta = torch.mv(torch.mm(A, X.t()), y) self.set_params(beta) msg = 'fit model "{}" finished.'.format(self.__class__.__name__) logger.info(msg) print(msg)
def forward(self, input, hx=None): weight_ih = self.weight_ih bias = self.bias weight_hh = self.weight_hh z = (torch.mv(weight_ih[0], input) + torch.mv(weight_hh[0], hx) + bias[0]).sigmoid() r = (torch.mv(weight_ih[1], input) + torch.mv(weight_hh[1], hx) + bias[1]).sigmoid() n = (torch.mv(weight_ih[2], input) + torch.mv(weight_hh[2], hx * r) + bias[2]).relu() h = (torch.ones_like(z) - z) * n + z * hx return h
def select_action(self, context: torch.Tensor) -> int: """Select an action based on given context. Selecting action with highest predicted reward computed through betas sampled from posterior. Args: context (torch.Tensor): The context vector to select action for. Returns: int: The action to take. """ self.t += 1 if self.t < self.n_actions * self.init_pulls: return torch.tensor(self.t % self.n_actions, device=self.device, dtype=torch.int) var = torch.tensor( [ self.b[i] * invgamma.rvs(self.a[i]) for i in range(self.n_actions) ], device=self.device, dtype=torch.float, ) try: beta = (torch.tensor( np.stack([ np.random.multivariate_normal(self.mu[i], var[i] * self.cov[i]) for i in range(self.n_actions) ])).to(self.device).to(torch.float)) except np.linalg.LinAlgError as e: # noqa F841 beta = ((torch.stack([ torch.distributions.MultivariateNormal( torch.zeros(self.context_dim + 1), torch.eye(self.context_dim + 1), ).sample() for i in range(self.n_actions) ])).to(self.device).to(torch.float)) values = torch.mv(beta, torch.cat([context.view(-1), torch.ones(1)])) action = torch.argmax(values).to(torch.int) return action
def backward(ctx, grad_output): matrix, vector = ctx.saved_variables grad_add_vector = grad_matrix = grad_vector = None if ctx.needs_input_grad[0]: grad_add_vector = maybe_unexpand(grad_output, ctx.add_vector_size) if ctx.alpha != 1: grad_add_vector = grad_add_vector.mul(ctx.alpha) if ctx.needs_input_grad[1]: grad_matrix = torch.ger(grad_output, vector) if ctx.beta != 1: grad_matrix *= ctx.beta if ctx.needs_input_grad[2]: grad_vector = torch.mv(matrix.t(), grad_output) if ctx.beta != 1: grad_vector *= ctx.beta return grad_add_vector, grad_matrix, grad_vector, None, None, None
def backward(ctx, grad_output): vector1, vector2 = ctx.saved_variables grad_add_matrix = grad_vector1 = grad_vector2 = None if ctx.needs_input_grad[0]: grad_add_matrix = maybe_unexpand(grad_output, ctx.add_matrix_size) if ctx.alpha != 1: grad_add_matrix = grad_add_matrix.mul(ctx.alpha) if ctx.needs_input_grad[1]: grad_vector1 = torch.mv(grad_output, vector2) if ctx.beta != 1: grad_vector1 *= ctx.beta if ctx.needs_input_grad[2]: # TODO: maybe it's better to do transpose + mv + transpose grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0) if ctx.beta != 1: grad_vector2 *= ctx.beta return grad_add_matrix, grad_vector1, grad_vector2, None, None, None
def matmul(tensor1, tensor2, out=None): r"""Matrix product of two tensors. The behavior depends on the dimensionality of the tensors as follows: - If both tensors are 1-dimensional, the dot product (scalar) is returned. - If both arguments are 2-dimensional, the matrix-matrix product is returned. - If the first argument is 1-dimensional and the second argument is 2-dimensional, a 1 is prepended to its dimension for the purpose of the matrix multiply. After the matrix multiply, the prepended dimension is removed. - If the first argument is 2-dimensional and the second argument is 1-dimensional, the matrix-vector product is returned. - If both arguments are at least 1-dimensional and at least one argument is N-dimensional (where N > 2), then a batched matrix multiply is returned. If the first argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the batched matrix multiply and removed after. If the second argument is 1-dimensional, a 1 is appended to its dimension for the purpose of the batched matrix multiple and removed after. The non-matrix (i.e. batch) dimensions are :ref:`broadcasted <broadcasting-semantics>` (and thus must be broadcastable). For example, if :attr:`tensor1` is a :math:`(j \times 1 \times n \times m)` tensor and :attr:`tensor2` is a :math:`(k \times m \times p)` tensor, :attr:`out` will be an :math:`(j \times k \times n \times p)` tensor. .. note:: The 1-dimensional dot product version of this function does not support an :attr:`out` parameter. Arguments: tensor1 (Tensor): the first tensor to be multiplied tensor2 (Tensor): the second tensor to be multiplied out (Tensor, optional): the output tensor """ dim_tensor1 = tensor1.dim() dim_tensor2 = tensor2.dim() if dim_tensor1 == 1 and dim_tensor2 == 1: if out is None: return torch.dot(tensor1, tensor2) else: raise ValueError("out must be None for 1-d tensor matmul, returns a scalar") if dim_tensor1 == 2 and dim_tensor2 == 1: if out is None: return torch.mv(tensor1, tensor2) else: return torch.mv(tensor1, tensor2, out=out) elif dim_tensor1 == 1 and dim_tensor2 == 2: if out is None: return torch.mm(tensor1.unsqueeze(0), tensor2).squeeze_(0) else: return torch.mm(tensor1.unsqueeze(0), tensor2, out=out).squeeze_(0) elif dim_tensor1 == 2 and dim_tensor2 == 2: if out is None: return torch.mm(tensor1, tensor2) else: return torch.mm(tensor1, tensor2, out=out) elif dim_tensor1 >= 3 and (dim_tensor2 == 1 or dim_tensor2 == 2): # optimization: use mm instead of bmm by folding tensor1's batch into # its leading matrix dimension. if dim_tensor2 == 1: tensor2 = tensor2.unsqueeze(-1) size1 = tensor1.size() size2 = tensor2.size() output_size = size1[:-1] + size2[-1:] # fold the batch into the first dimension tensor1 = tensor1.contiguous().view(-1, size1[-1]) if out is None or not out.is_contiguous(): output = torch.mm(tensor1, tensor2) else: output = torch.mm(tensor1, tensor2, out=out) output = output.view(output_size) if dim_tensor2 == 1: output = output.squeeze(-1) if out is not None: out.set_(output) return out return output elif (dim_tensor1 >= 1 and dim_tensor2 >= 1) and (dim_tensor1 >= 3 or dim_tensor2 >= 3): # ensure each tensor size is at least 3-dimensional tensor1_exp_size = torch.Size((1,) * max(3 - tensor1.dim(), 0) + tensor1.size()) # rhs needs to be a separate case since we can't freely expand 1s on the rhs, but can on lhs if dim_tensor2 == 1: tensor2 = tensor2.unsqueeze(1) tensor2_exp_size = torch.Size((1,) * max(3 - tensor2.dim(), 0) + tensor2.size()) # expand the batch portion (i.e. cut off matrix dimensions and expand rest) expand_batch_portion = torch._C._infer_size(tensor1_exp_size[:-2], tensor2_exp_size[:-2]) # flatten expanded batches tensor1_expanded = tensor1.expand(*(expand_batch_portion + tensor1_exp_size[-2:])) \ .contiguous().view(reduce(mul, expand_batch_portion), *tensor1_exp_size[-2:]) tensor2_expanded = tensor2.expand(*(expand_batch_portion + tensor2_exp_size[-2:])) \ .contiguous().view(reduce(mul, expand_batch_portion), *tensor2_exp_size[-2:]) # reshape batches back into result total_expansion = expand_batch_portion + (tensor1_exp_size[-2], tensor2_exp_size[-1]) def maybeSqueeze(tensor): if dim_tensor1 == 1: return tensor.squeeze(-2) elif dim_tensor2 == 1: return tensor.squeeze(-1) else: return tensor if out is None or not out.is_contiguous(): output = torch.bmm(tensor1_expanded, tensor2_expanded) else: output = torch.bmm(tensor1_expanded, tensor2_expanded, out=out) output = maybeSqueeze(output.view(total_expansion)) if out is not None: out.set_(output) return out return output raise ValueError("both arguments to __matmul__ need to be at least 1D, " "but they are {}D and {}D".format(dim_tensor1, dim_tensor2))