Beispiel #1
0
 def accGradParameters(self, input, gradOutput, scale=1):
     self.network.accGradParameters([input, self.partition], gradOutput, scale)
     if self.bias is not None:
         if self.buffer is None:
             self.buffer = input.new()
         self.buffer.resize_(gradOutput.size(1))
         torch.mv(gradOutput.t(), self.addBuffer, out=self.buffer).mul_(scale)
         self.gradBias.index_add_(
             1, self.partition, self.buffer.view(1, self.buffer.nelement())
         )
    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))
Beispiel #3
0
    def updateOutput(self, input):
        M, v = input
        assert M.ndimension() == 2 or M.ndimension() == 3

        if M.ndimension() == 2:
            assert v.ndimension() == 1
            if self.trans:
                M = M.transpose(0, 1)
            self.output.resize_(M.size(0))
            torch.mv(M, v, out=self.output)
        else:
            assert v.ndimension() == 2
            if self.trans:
                M = M.transpose(1, 2)
            self.output.resize_(M.size(0), M.size(1), 1)
            torch.bmm(M, v.view(v.size(0), v.size(1), 1), out=self.output).resize_(M.size(0), M.size(1))

        return self.output
Beispiel #4
0
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
Beispiel #5
0
def gmm_batch_model(data):
    p = pyro.param("p", Variable(torch.Tensor([0.3]), requires_grad=True))
    p = torch.cat([p, 1 - p])
    sigma = pyro.param("sigma", Variable(torch.Tensor([1.0]), requires_grad=True))
    mus = Variable(torch.Tensor([-1, 1]))
    with pyro.iarange("data", len(data)) as batch:
        n = len(batch)
        z = pyro.sample("z", dist.Categorical(p.unsqueeze(0).expand(n, 2)))
        assert z.size() == (n, 2)
        mu = torch.mv(z, mus)
        pyro.observe("x", dist.Normal(mu, sigma.expand(n)), data[batch])
Beispiel #6
0
    def predict(self, model, xtest):
        xtest = torch.Tensor(xtest).float()
        phi_x = self.get_features(model, xtest)

        # make sure to run once the optimizing step before making predictions.
        assert self.K is not None \
               and self.K_inv is not None \
               and self.m is not None \
               and self.alpha is not None \
               and self.beta is not None

        mu = torch.mv(phi_x, self.m)
        s2 = torch.mul(phi_x.t(),
                       torch.mm(self.K_inv,
                                phi_x.t())).sum(0).add(1 / self.beta)

        return mu, np.clip(s2, a_min=1e-5, a_max=np.inf)
Beispiel #7
0
    def _scramble(self):
        g: Optional[torch.Generator] = None
        if self.seed is not None:
            g = torch.Generator()
            g.manual_seed(self.seed)

        cpu = torch.device("cpu")

        # Generate shift vector
        shift_ints = torch.randint(2, (self.dimension, self.MAXBIT), device=cpu, generator=g)
        self.shift = torch.mv(shift_ints, torch.pow(2, torch.arange(0, self.MAXBIT, device=cpu)))

        # Generate lower triangular matrices (stacked across dimensions)
        ltm_dims = (self.dimension, self.MAXBIT, self.MAXBIT)
        ltm = torch.randint(2, ltm_dims, device=cpu, generator=g).tril()

        torch._sobol_engine_scramble_(self.sobolstate, ltm, self.dimension)
Beispiel #8
0
def log_lh(rbm, spins, L, nsamples=10000):
    log_Z = log_Zeta(rbm, L)
    log_p_star = 0
    _, h = rbm.sample_h(spins)
    for j in range(min(nsamples, len(spins))):
        try:
            pre_log = log_p_star
            log_p_star += torch.dot(rbm.b, spins[j]) / nsamples + torch.sum(
                torch.log(1 + torch.exp(rbm.c +
                                        torch.mv(rbm.W, spins[j])))) / nsamples


#      if j % 1000==0: print("In cycle ",j," with free_energy_log: ", log_p_star )
        except:
            #      print("Error in: ",j," cycle.")
            return 0
    return log_p_star - log_Z
Beispiel #9
0
def prs_model(beta_hat, obs_error):
    z = pyro.sample(
        'z',
        dist.Independent(dist.Bernoulli(torch.tensor([p_causal]*N)), 1)
    )
    beta = pyro.sample(
        'beta_latent',
        dist.Independent(dist.Normal(GENETIC_MEAN,
                                     GENETIC_SD), 1)
    )
    beta_hat = pyro.sample(
        'beta_hat',
        dist.MultivariateNormal(torch.mv(obs_error, beta*z),
                                covariance_matrix=obs_error*sigma_sq_e),
        obs=beta_hat
    )
    return beta_hat
Beispiel #10
0
def calc_linearized_pairwise_ranking_loss(last_layer,
                                          pairwise_prefs,
                                          demo_cnts,
                                          confidence=1):
    '''use (i,j) indices and precomputed feature counts to do faster pairwise ranking loss'''
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # Assume that we are on a CUDA machine, then this should print a CUDA device:
    #print(device)
    #don't need any gradients
    with torch.no_grad():

        #do matrix multiply with last layer of network and the demo_cnts
        #print(list(reward_net.fc2.parameters()))
        linear = last_layer.weight.data  #not using bias
        #print(linear)
        #print(bias)
        weights = linear.squeeze(
        )  #append bias and weights from last fc layer together
        #print('weights',weights)
        #print('demo_cnts', demo_cnts)
        demo_returns = confidence * torch.mv(demo_cnts, weights)

        #positivity prior
        if demo_returns[0] < 0.0:
            return torch.Tensor([-float("Inf")])

        loss_criterion = nn.CrossEntropyLoss(reduction='sum')  #sum up losses
        cum_log_likelihood = 0.0
        outputs = torch.zeros(len(pairwise_prefs),
                              2)  #each row is a new pair of returns
        for p, ppref in enumerate(pairwise_prefs):
            i, j = ppref
            outputs[p, :] = torch.tensor([demo_returns[i], demo_returns[j]])
        labels = torch.ones(len(pairwise_prefs)).long()

        #outputs = outputs.unsqueeze(0)
        #print(outputs)
        #print(labels)
        cum_log_likelihood = -loss_criterion(outputs, labels)
        #if labels == 0:
        #    log_likelihood = torch.log(return_i/(return_i + return_j))
        #else:
        #    log_likelihood = torch.log(return_j/(return_i + return_j))
        #print("ll",log_likelihood)
        #cum_log_likelihood += log_likelihood
    return cum_log_likelihood
Beispiel #11
0
def newton_grad(loss, model):
    loss.backward(retain_graph=True)
    gs = model.get_param_g()
    grad = torch.zeros(len(gs))
    for i, g in enumerate(gs):
        grad[i] = g.grad.data[0]
    hessian = torch.zeros(len(gs), len(gs))
    # compute second order grad
    dl_dgs = torch.autograd.grad(loss, gs, create_graph=True)
    for i, dl_dg in enumerate(dl_dgs):
        ddg = torch.autograd.grad(dl_dg, gs, retain_graph=True)
        hessian[i][i] = ddg[i].data[0]

    invh_grad = torch.mv(torch.inverse(hessian), grad)
    for i, g in enumerate(gs):
        g.grad.data[0] = invh_grad[i]
    nn.utils.clip_grad_norm(gs, 1)
Beispiel #12
0
 def mv(self, vs):
     vs_dict = vs.get_dict_representation()
     out_dict = dict()
     for layer_id, layer in self.generator.layer_collection.layers.items():
         v = vs_dict[layer_id][0].view(-1)
         if layer.bias is not None:
             v = torch.cat([v, vs_dict[layer_id][1].view(-1)])
         mv = torch.mv(self.data[layer_id], v)
         mv_tuple = (mv[:layer.weight.numel()].view(*layer.weight.size), )
         if layer.bias is not None:
             mv_tuple = (
                 mv_tuple[0],
                 mv[layer.weight.numel():].view(*layer.bias.size),
             )
         out_dict[layer_id] = mv_tuple
     return PVector(layer_collection=vs.layer_collection,
                    dict_repr=out_dict)
Beispiel #13
0
def test_torch_ista(seeds, adj, size, alpha, rho, iters):
    out = torch.empty(0)

    # Compute degree vectors/matrices
    d       = np.asarray(adj.sum(axis=-1)).squeeze()
    d_sqrt  = np.sqrt(d)
    dn_sqrt = 1 / d_sqrt

    D       = sparse.diags(d)
    Dn_sqrt = sparse.diags(dn_sqrt)

    # Normalized adjacency matrix
    Q = D - ((1 - alpha) / 2) * (D + adj)
    Q = Dn_sqrt @ Q @ Dn_sqrt

    # Convert numpy float64 data to torch float32 tensor
    Q = spy_sparse2torch_sparse(Q)
    d_sqrt = torch.from_numpy(d_sqrt).float()
    dn_sqrt = torch.from_numpy(dn_sqrt).float()
    zero = torch.zeros(size)
    if args.hammerblade:
        Q = Q.hammerblade()
        d_sqrt = d_sqrt.hammerblade()
        dn_sqrt = dn_sqrt.hammerblade()
        zero = zero.hammerblade()

    for seed in tqdm(seeds):
        s = np.zeros(adj.shape[0])
        s[seed] = 1
        s = torch.from_numpy(s).float()
        if args.hammerblade:
            s = s.hammerblade()
        q = zero       
        rad = rho * alpha * d_sqrt
        grad0 = -alpha * dn_sqrt * s
        grad = grad0.clone()
        for _ in range(iters):
            q = torch.max(q - grad - rad, zero)
            temp = torch.mv(Q, q)
            grad = grad + temp
        temp = torch.mul(q, d_sqrt).cpu()
        temp = temp.view(size, 1)
        out = torch.cat((out, temp), 1)
        
    return out
Beispiel #14
0
    def resampling(self, weights, particles):
        n = self.num_particles
        x = torch.zeros([n, self.dim], dtype=torch.float)

        weights = weights.view(-1)
        gram = self.gram_matrix(particles, particles)
        weight_gram = torch.mv(gram, weights)
        ind = torch.argmax(weight_gram)
        x[0] = particles[ind]

        for p in range(1, n):
            x_t = x[0:p]
            gram = self.gram_matrix(particles, x_t)
            gram = torch.sum(gram, 1)
            ind = torch.argmax(weight_gram - gram)
            x[p] = particles[ind]

        return x
Beispiel #15
0
    def forward(self, *input):
        """
        The forward pass
        
        Args: 
            - input: The input from the previous layer
        Returns:
            - forward_pass: The output of the function
        """

        # Input from the layer
        self.input_from_layer = input[0]
        # Calculating the output, which is basically the multiplication
        # of the weights with the input layer and adding the bias
        self.forward_pass = torch.mv(self.weights,
                                     self.input_from_layer) + self.bias

        return self.forward_pass
Beispiel #16
0
def update_inv_jacobian_approx(B, deltaZ, deltaG):
    """
    This is eq 10 in the paper
    :param B: nxn inv jacobian approx
    :param deltaZ: n vector
    :param deltaG: n vector
    """
    Bdg = torch.mv(B, deltaG)  # nx1

    rational = deltaZ - Bdg
    rational = rational / (torch.dot(deltaZ, Bdg) + 1e-10)

    deltaZT = torch.unsqueeze(deltaZ, 0)  # 1xn
    notrational = torch.matmul(deltaZT, B)  # 1xn
    rational = torch.unsqueeze(rational, 1)  # nx1
    update = torch.matmul(rational, notrational)
    nextB = B + update  # n x n
    return nextB
Beispiel #17
0
    def oracle(self, w, x, y):
        self._validate_inputs(w, x, y)
        # TODO: Compute objective value
        obj = self.task_error(
            w, x, y) + 0.5 * self.hparams.mu * torch.sum(torch.abs(w))
        # TODO: compute subgradient
        du = torch.zeros(w.size())

        du[w > 0] = 1
        du[w < 0] = -1

        inter_mat = torch.mm(x, w).squeeze(-1) - y

        dw = 2 / self.hparams.n_samples * torch.mv(x.t(), inter_mat)
        dw = dw.unsqueeze(-1)
        dw += 0.5 * self.hparams.mu * du

        return {'obj': obj, 'dw': dw}
 def step(self, g, y, dy):
     g = g.reshape(-1, 1)[:, 0]
     z = dy * self.tau
     dt = self.dt
     for _ in range(self.l):
         dx = (-self.a_x * self.x) / self.tau
         self.x = self.x + dx * dt
         psi = torch.exp(-self.h * torch.pow((self.x - self.c), 2))
         fx = torch.mv(self.w,
                       psi) * self.x * (g - self.y0) / torch.sum(psi)
         dz = self.a_z * (self.b_z * (g - y) - z) + fx
         dy = z
         dz = dz / self.tau
         dy = dy / self.tau
         y = y + dy * dt
         z = z + dz * dt
     self.i += 1
     return y, dy, dz
Beispiel #19
0
    def forward(self, x, y):
        x_lookup, y_lookup, neg_lookup = self.prepare_inputs(x, y)
        image = self.get_image(x)
        image = self.cnn(image)
        image = image.view(-1)
        vI = self.fc1(image)

        vO = self.WO(y_lookup)
        # vI = self.WI(x_lookup)
        samples = self.WO(neg_lookup)

        # vI = self.alpha * vI + self.beta * image

        pos_score = F.logsigmoid(t.dot(vO, vI))
        neg_score = F.logsigmoid(-t.mv(samples, vI))

        loss = -pos_score - t.sum(neg_score)
        return loss
Beispiel #20
0
def _oscar_prox_jacobian(y_star, dout=None):
    y_star = y_star.numpy()
    dim = y_star.shape[0]
    J = torch.zeros(dim, dim)

    _, inv, counts = np.unique(np.abs(y_star),
                               return_inverse=True,
                               return_counts=True)

    for i in range(dim):
        for j in range(dim):
            if (inv[i] == inv[j] and y_star[i] != 0):
                J[i, j] = (np.sign(y_star[i]) * np.sign(y_star[j]) /
                           counts[inv[i]])
    if dout is not None:
        return torch.mv(J, dout)
    else:
        return J
Beispiel #21
0
    def sample(self, x, parameters):
        """
        Create some logistic regression data. *** NOTE: This ignores the precisions of each of the values of w, and
        simply assuming the true (unknown) weight is w; this is different to finding the predictive distribution!! ***

        :param x: input values to predict at
        :param parameters: model parameters (not will not update)
        :param hyperparameters: model hyperparameters (will also not update)
        :return: y: tensor of parameter labels
        """
        w_nat_means = parameters["w_mu"]

        z = torch.mv(x, w_nat_means)
        p = self.act(z)

        output_dist = Bernoulli(p)
        y = output_dist.sample()
        return y
    def dphi_dq(self, dV=None, mH=None):
        if mH == None:
            mH = self.mH(dV)
        gg = torch.dot(dV, dV)
        agg = self.metric.msoftabsalpha * gg
        t = float(numpy.tanh(agg))
        out = torch.mv(mH, dV)

        if (abs(agg) < 1e-4):
            out = out * 2 * (
                (len(dV) /
                 (3 * gg)) * agg * agg + self.metric.msoftabsalpha * t)
        else:
            out = out * 2 ((len(dV) / gg) *
                           (1 - agg / t) + self.metric.msoftabsalpha * t)
        out = out + dV

        return (out)
Beispiel #23
0
def test_jacobian_plowrank():
    for get_task in nonlinear_tasks:
        loader, lc, parameters, model, function, n_output = get_task()
        model.train()
        generator = Jacobian(layer_collection=lc,
                             model=model,
                             loader=loader,
                             function=function,
                             n_output=n_output)
        PMat_lowrank = PMatLowRank(generator)
        dw = random_pvector(lc, device=device)
        dense_tensor = PMat_lowrank.get_dense_tensor()

        # Test get_diag
        check_tensors(torch.diag(dense_tensor),
                      PMat_lowrank.get_diag(),
                      eps=1e-4)

        # Test frobenius
        frob_PMat = PMat_lowrank.frobenius_norm()
        frob_direct = (dense_tensor**2).sum()**.5
        check_ratio(frob_direct, frob_PMat)

        # Test trace
        trace_PMat = PMat_lowrank.trace()
        trace_direct = torch.trace(dense_tensor)
        check_ratio(trace_PMat, trace_direct)

        # Test mv
        mv_direct = torch.mv(dense_tensor, dw.get_flat_representation())
        check_tensors(mv_direct,
                      PMat_lowrank.mv(dw).get_flat_representation())

        # Test vTMV
        check_ratio(torch.dot(mv_direct, dw.get_flat_representation()),
                    PMat_lowrank.vTMv(dw))

        # Test solve TODO
        # Test inv TODO

        # Test add, sub, rmul

        check_tensors(1.23 * PMat_lowrank.get_dense_tensor(),
                      (1.23 * PMat_lowrank).get_dense_tensor())
Beispiel #24
0
    def attr_level_matching(self, compare_result, word_embeddings_rnn, field_embedding, token_mask):
        '''
             Get attribute level comparison result by aggregating token level comparison result.
             Field_embedding and word_embeddings_rnn are used to distinguish importance weights of different tokens
        '''
        size = word_embeddings_rnn.size()
        word_embeddings_rnn = word_embeddings_rnn.view(size[0] * size[1], -1)

        attention = torch.mv(word_embeddings_rnn, field_embedding)
        attention = attention.view(size[0], 1, -1)
        attention = F.softmax(attention, dim=2)

        attention = attention.view(attention.size()[0], -1)
        attention = attention * token_mask.float()
        attention = attention.view(size[0], 1, -1)

        compare_att_sum = torch.bmm(attention, compare_result)
        compare_att_sum = compare_att_sum.view(compare_att_sum.size()[0], -1)
        return compare_att_sum
Beispiel #25
0
    def step(self, step_input, Hidden_State, Cell_State, rHidden_State,
             rCell_State):
        # GC-LSTM
        x = step_input
        gc = self.gc_list[0](x)
        for i in range(1, self.K):
            gc = torch.cat((gc, self.gc_list[i](x)), 1)

        combined = torch.cat((gc, Hidden_State), 1)
        f = torch.sigmoid(self.fl(combined))
        i = torch.sigmoid(self.il(combined))
        o = torch.sigmoid(self.ol(combined))
        C = torch.tanh(self.Cl(combined))

        NC = torch.mul(
            Cell_State,
            torch.mv(
                Variable(self.A_list[-1].repeat(self.output_dim,
                                                self.output_dim),
                         requires_grad=False).to(self.device),
                self.Neighbor_weight))
        Cell_State = f * NC + i * C  # [batch_size, out_features]
        Hidden_State = o * torch.tanh(Cell_State)  # [batch_size, out_features]

        # LSTM
        rcombined = torch.cat((step_input, rHidden_State),
                              1)  # [batch_size, in_features + out_features]
        # rfl: nn.Linear([in_features + out_features, out_features])
        rf = torch.sigmoid(self.rfl(rcombined))  # [batch_size, out_features]
        ri = torch.sigmoid(self.ril(rcombined))
        ro = torch.sigmoid(self.rol(rcombined))
        rC = torch.tanh(self.rCl(rcombined))
        rCell_State = rf * rCell_State + ri * rC  # [batch_size, out_features]
        rHidden_State = ro * torch.tanh(
            rCell_State)  # [batch_size, out_features]

        # Kalman Filtering
        var1, var2 = torch.var(step_input), torch.var(gc)

        pred = (Hidden_State * var1 * self.c + rHidden_State * var2) / \
               (var1 + var2 * self.c)  # [batch_size, out_features]

        return Hidden_State, Cell_State, gc, rHidden_State, rCell_State, pred
    def oracle(self, w, x, y):
        self._validate_inputs(w, x, y)
        # regularization hyper-parameter
        mu = self.hparams.mu

        # Compute objective value
        obj = self.task_error(w, x, y) + 0.5 * mu * torch.square(torch.norm(w))

        # compute close form solution
        n = x.size(0)
        n_features = x.size(1)

        intermediary = torch.inverse(
            torch.mm(x.transpose(0, 1), x) +
            0.5 * n * mu * torch.eye(n_features))
        sol = torch.mv(torch.mm(intermediary, x.transpose(0, 1)), y)
        sol = sol.view(n_features, 1)

        return {'obj': obj, 'sol': sol}
Beispiel #27
0
    def forward(ctx, Mu, Var, beta, Y, H, U, isCuda):
        # print(Mu,Var,beta,Y,H,U)
        (N, T, B) = Var.size()
        (M, T, B) = Y.size()
        invVar = torch.reciprocal(Var)
        hMu = multip(H, Mu)
        # hMu=torch.bmm(H.repeat(B,1,1),Mu.permute(2,0,1)).permute(1,2,0)
        dU = U - Mu
        dY = Y - hMu
        logdetY = torch.FloatTensor([0])
        invSigmaYdY = (torch.zeros(M, T, B))
        Id = torch.eye(M)
        if isCuda:
            Id = Id.cuda()
            invSigmaYdY = invSigmaYdY.cuda()
            logdetY = logdetY.cuda()

        for j in range(T):
            for k in range(B):
                SigmaY = (1 / beta) * Id + torch.mm(
                    (H * Var[:, j, k]), H.transpose(0, 1))
                logdetY = logdetY + torch.sum(
                    torch.log(torch.eig(
                        SigmaY, eigenvectors=False)[0][:, 0]))  # Reached here
                invSigmaY = torch.inverse(SigmaY)
                invSigmaYdY[:, j, k] = torch.mv(invSigmaY, dY[:, j, k])

        invVardU = torch.mul(invVar, dU)
        #print('Var is: ', Var)
        #print('logdetY:', logdetY)

        ctx.save_for_backward(Mu, Var, beta, Y, H, U)
        cost2 = logdetY + torch.sum(torch.mul(dY, invSigmaYdY)) - torch.sum(
            torch.log(Var)) - torch.sum(torch.mul(dU, invVardU))
        logtwopi = torch.log(torch.FloatTensor([2 * 3.14]))
        if isCuda:
            logtwopi = logtwopi.cuda()
        cost1 = M * T * B * torch.log(
            beta) - N * T * B * logtwopi - beta * torch.sum(
                (Y - multip(H, U)).pow(2))

        cost = 0.5 * (cost1 + cost2)
        return cost / (T * N * B)
Beispiel #28
0
    def calc_values(self, model, xtrain, ytrain, alpha, beta):
        xtrain = torch.Tensor(xtrain).float().view([-1, xtrain.shape[1]])
        ytrain = torch.Tensor(ytrain).float().view([-1, 1])

        # Updates the basis matrices and stores them for later use
        #  (like predicting)
        self.phi_train = self.get_features(model, xtrain)

        self.D = self.phi_train.shape[1]

        # self.K = torch.addmm(alpha ** 2, torch.eye(self.D), beta, self.phi_train.t(), self.phi_train)
        self.K = torch.addmm(alpha, torch.eye(self.D), beta,
                             self.phi_train.t(), self.phi_train)
        self.K_inv = torch.inverse(self.K)
        self.m = torch.mv(torch.mm(self.K_inv, self.phi_train.t()),
                          ytrain.view((-1, ))).mul(beta)

        self.alpha = alpha
        self.beta = beta
Beispiel #29
0
 def extendXYScale(self, xScale=None, yScale=None):
     """Extend scale we mean we extend this model such that if input removes this scaler, result does not change
     We do it by modifying weights. Although I do think linear layer should also work.
     """
     state_dct = self.state_dict()
     nlyr = len(self.layers)  # so I just use 0 and this number - 1
     if xScale is not None:
         w0 = state_dct['main.0.weight']
         b0 = state_dct['main.0.bias']
         xmean, xstd = xScale
         b0 -= torch.mv(w0, torch.from_numpy(np.squeeze(xmean/xstd, axis=0)).float())
         w0 /= torch.from_numpy(xstd).float()
     if yScale is not None:
         wf = state_dct['main.%d.weight' % (nlyr - 1)]
         bf = state_dct['main.%d.bias' % (nlyr - 1)]
         ymean, ystd = yScale
         wf *= torch.from_numpy(ystd.T).float()
         bf *= torch.from_numpy(np.squeeze(ystd, axis=0)).float()
         bf += torch.from_numpy(np.squeeze(ymean, axis=0)).float()
Beispiel #30
0
        def forward(self, query_src, query_lengths, res_src, res_lengths, state=None):
            _, batch = query_src.size()
            # encoder query and res
            query_encoder_final, query_memory_bank = self.encoder(query_src, lengths=query_lengths)
            res_encoder_final, res_memory_bank = self.encoder(res_src, lengths=res_lengths)

            if not isinstance(query_encoder_final, tuple):
                query_encoder_hidden = (query_encoder_final, )
                res_encoder_final = (res_encoder_final, )
            else:
                query_encoder_hidden = query_encoder_final
                res_encoder_hidden = res_encoder_final

            query_encoder_hidden = query_enocder_hidden[0].contiguous().view(batch_size, -1)
            res_encoder_hidden = res_encoder_hidden[0].contiguous().view(batch_size, -1)

            # get extended features
            extended_feats = []
            if self.substract_flag:
                substract_feat = query_encoder_hidden - res_encoder_hidden
                extended_feats.append(substract_feat)
            if self.dot_flag:
                dot_feat = torch.mul(query_encoder_hidden, res_encoder_hidden)
                extended_feats.append(dot_feat)
            if self.bilinear_flag:
                bilinear_feat = self.bilinear(query_encoder_hidden, res_encoder_hidden)
                extended_feats.append(bilinear_feat)
            if self.inner_prod_flag:
                inner_prob_feat = torch.bmm(query_encoder_hidden.unsqueeze(1),
                                       res_encoder_hidden.unsqueeze(2))
                extended_feats.append(inner_prob_feat.squeeze(-1))

            # concatenate features
            combine_feats = torch.cat(extended_feats, dim=-1)

            if self.score_fn_type == 'MLP':
                outputs = self.score_fn(combine_feats)
            elif self.score_fn_type == 'LR':
                outputs = torch.mv(combine_feats, self.score_fn) + self.bias
            else:
                raise ValueError("{} is not valid for SingleArch, ".format(self.score_fn_type))

            return outputs.squeeze(), query_encoder_final
Beispiel #31
0
    def __init__(self):
        # 元々の重み
        original_weight = torch.Tensor([1, 2, 3])
        # データ形式: torch.Size([3])

        # Xのデータ準備
        # データ形式: torch.Size([100, 3])
        X = torch.cat([torch.ones(100, 1), torch.randn(100, 2)], 1)

        # データと重みの内積を計算する。
        dot = torch.mv(X, original_weight)

        # 内積に乱数を足してyとする。
        # データ形式: torch.Size([100])
        y = dot + torch.randn(100) * 0.5

        self.X = X
        self.y = y
        self.original_weight = original_weight
Beispiel #32
0
 def energy(self, V, tuned=False):
     """
     Calculate F(V) in exp(-F(V))/Z which gives the probability of vector V.
     The output is not the real probability since we dont know the partition function
     but we can use it to compare between data.
     Lower value means better. Don't @ me.
     """
     if tuned:
         v_b = self.visible_bias_g
         h_b = self.hidden_bias_r
         w = self.w_r
     else:
         v_b = self.visible_bias
         h_b = self.hidden_bias
         w = self.w
     
     X = torch.mm(V,w) + h_b
     F = - torch.mv(V,v_b) - torch.sum(torch.log(1 + torch.exp(X)),dim=-1)
     return torch.mean(F)
Beispiel #33
0
    def fit(self, path):
        """
        Fit the model using analytic solution.
        """
        fp = FingerprintsDataset(path)

        loader = DataLoader(dataset=fp,
                            batch_size=1,
                            collate_fn=fingerprints_collate_fn)

        X, y = self.prepare_data(loader)
        A = torch.inverse(torch.mm(X.t(), X))
        beta = torch.mv(torch.mm(A, X.t()), y)

        self.set_params(beta)

        msg = 'fit model "{}" finished.'.format(self.__class__.__name__)
        logger.info(msg)
        print(msg)
Beispiel #34
0
    def forward(self, input, hx=None):
        weight_ih = self.weight_ih
        bias = self.bias
        weight_hh = self.weight_hh

        z = (torch.mv(weight_ih[0], input) + torch.mv(weight_hh[0], hx) +
             bias[0]).sigmoid()
        r = (torch.mv(weight_ih[1], input) + torch.mv(weight_hh[1], hx) +
             bias[1]).sigmoid()
        n = (torch.mv(weight_ih[2], input) + torch.mv(weight_hh[2], hx * r) +
             bias[2]).relu()
        h = (torch.ones_like(z) - z) * n + z * hx

        return h
Beispiel #35
0
    def select_action(self, context: torch.Tensor) -> int:
        """Select an action based on given context.

        Selecting action with highest predicted reward computed through
        betas sampled from posterior.

        Args:
            context (torch.Tensor): The context vector to select action for.

        Returns:
            int: The action to take.
        """
        self.t += 1
        if self.t < self.n_actions * self.init_pulls:
            return torch.tensor(self.t % self.n_actions,
                                device=self.device,
                                dtype=torch.int)
        var = torch.tensor(
            [
                self.b[i] * invgamma.rvs(self.a[i])
                for i in range(self.n_actions)
            ],
            device=self.device,
            dtype=torch.float,
        )
        try:
            beta = (torch.tensor(
                np.stack([
                    np.random.multivariate_normal(self.mu[i],
                                                  var[i] * self.cov[i])
                    for i in range(self.n_actions)
                ])).to(self.device).to(torch.float))
        except np.linalg.LinAlgError as e:  # noqa F841
            beta = ((torch.stack([
                torch.distributions.MultivariateNormal(
                    torch.zeros(self.context_dim + 1),
                    torch.eye(self.context_dim + 1),
                ).sample() for i in range(self.n_actions)
            ])).to(self.device).to(torch.float))
        values = torch.mv(beta, torch.cat([context.view(-1), torch.ones(1)]))
        action = torch.argmax(values).to(torch.int)
        return action
Beispiel #36
0
    def backward(ctx, grad_output):
        matrix, vector = ctx.saved_variables
        grad_add_vector = grad_matrix = grad_vector = None

        if ctx.needs_input_grad[0]:
            grad_add_vector = maybe_unexpand(grad_output, ctx.add_vector_size)
            if ctx.alpha != 1:
                grad_add_vector = grad_add_vector.mul(ctx.alpha)

        if ctx.needs_input_grad[1]:
            grad_matrix = torch.ger(grad_output, vector)
            if ctx.beta != 1:
                grad_matrix *= ctx.beta

        if ctx.needs_input_grad[2]:
            grad_vector = torch.mv(matrix.t(), grad_output)
            if ctx.beta != 1:
                grad_vector *= ctx.beta

        return grad_add_vector, grad_matrix, grad_vector, None, None, None
Beispiel #37
0
    def backward(ctx, grad_output):
        vector1, vector2 = ctx.saved_variables
        grad_add_matrix = grad_vector1 = grad_vector2 = None

        if ctx.needs_input_grad[0]:
            grad_add_matrix = maybe_unexpand(grad_output, ctx.add_matrix_size)
            if ctx.alpha != 1:
                grad_add_matrix = grad_add_matrix.mul(ctx.alpha)

        if ctx.needs_input_grad[1]:
            grad_vector1 = torch.mv(grad_output, vector2)
            if ctx.beta != 1:
                grad_vector1 *= ctx.beta

        if ctx.needs_input_grad[2]:
            # TODO: maybe it's better to do transpose + mv + transpose
            grad_vector2 = torch.mm(vector1.unsqueeze(0), grad_output).squeeze(0)
            if ctx.beta != 1:
                grad_vector2 *= ctx.beta

        return grad_add_matrix, grad_vector1, grad_vector2, None, None, None
Beispiel #38
0
def matmul(tensor1, tensor2, out=None):
    r"""Matrix product of two tensors.

    The behavior depends on the dimensionality of the tensors as follows:

    - If both tensors are 1-dimensional, the dot product (scalar) is returned.
    - If both arguments are 2-dimensional, the matrix-matrix product is returned.
    - If the first argument is 1-dimensional and the second argument is 2-dimensional,
      a 1 is prepended to its dimension for the purpose of the matrix multiply.
      After the matrix multiply, the prepended dimension is removed.
    - If the first argument is 2-dimensional and the second argument is 1-dimensional,
      the matrix-vector product is returned.
    - If both arguments are at least 1-dimensional and at least one argument is
      N-dimensional (where N > 2), then a batched matrix multiply is returned.  If the first
      argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the
      batched matrix multiply and removed after.  If the second argument is 1-dimensional, a
      1 is appended to its dimension for the purpose of the batched matrix multiple and removed after.
      The non-matrix (i.e. batch) dimensions are :ref:`broadcasted <broadcasting-semantics>` (and thus
      must be broadcastable).  For example, if :attr:`tensor1` is a
      :math:`(j \times 1 \times n \times m)` tensor and :attr:`tensor2` is a :math:`(k \times m \times p)`
      tensor, :attr:`out` will be an :math:`(j \times k \times n \times p)` tensor.

    .. note::

        The 1-dimensional dot product version of this function does not support an :attr:`out` parameter.

    Arguments:
        tensor1 (Tensor): the first tensor to be multiplied
        tensor2 (Tensor): the second tensor to be multiplied
        out (Tensor, optional): the output tensor
    """
    dim_tensor1 = tensor1.dim()
    dim_tensor2 = tensor2.dim()
    if dim_tensor1 == 1 and dim_tensor2 == 1:
        if out is None:
            return torch.dot(tensor1, tensor2)
        else:
            raise ValueError("out must be None for 1-d tensor matmul, returns a scalar")
    if dim_tensor1 == 2 and dim_tensor2 == 1:
        if out is None:
            return torch.mv(tensor1, tensor2)
        else:
            return torch.mv(tensor1, tensor2, out=out)
    elif dim_tensor1 == 1 and dim_tensor2 == 2:
        if out is None:
            return torch.mm(tensor1.unsqueeze(0), tensor2).squeeze_(0)
        else:
            return torch.mm(tensor1.unsqueeze(0), tensor2, out=out).squeeze_(0)
    elif dim_tensor1 == 2 and dim_tensor2 == 2:
        if out is None:
            return torch.mm(tensor1, tensor2)
        else:
            return torch.mm(tensor1, tensor2, out=out)
    elif dim_tensor1 >= 3 and (dim_tensor2 == 1 or dim_tensor2 == 2):
        # optimization: use mm instead of bmm by folding tensor1's batch into
        # its leading matrix dimension.

        if dim_tensor2 == 1:
            tensor2 = tensor2.unsqueeze(-1)

        size1 = tensor1.size()
        size2 = tensor2.size()
        output_size = size1[:-1] + size2[-1:]

        # fold the batch into the first dimension
        tensor1 = tensor1.contiguous().view(-1, size1[-1])

        if out is None or not out.is_contiguous():
            output = torch.mm(tensor1, tensor2)
        else:
            output = torch.mm(tensor1, tensor2, out=out)

        output = output.view(output_size)

        if dim_tensor2 == 1:
            output = output.squeeze(-1)

        if out is not None:
            out.set_(output)
            return out

        return output
    elif (dim_tensor1 >= 1 and dim_tensor2 >= 1) and (dim_tensor1 >= 3 or dim_tensor2 >= 3):
        # ensure each tensor size is at least 3-dimensional
        tensor1_exp_size = torch.Size((1,) * max(3 - tensor1.dim(), 0) + tensor1.size())
        # rhs needs to be a separate case since we can't freely expand 1s on the rhs, but can on lhs
        if dim_tensor2 == 1:
            tensor2 = tensor2.unsqueeze(1)
        tensor2_exp_size = torch.Size((1,) * max(3 - tensor2.dim(), 0) + tensor2.size())

        # expand the batch portion (i.e. cut off matrix dimensions and expand rest)
        expand_batch_portion = torch._C._infer_size(tensor1_exp_size[:-2], tensor2_exp_size[:-2])

        # flatten expanded batches
        tensor1_expanded = tensor1.expand(*(expand_batch_portion + tensor1_exp_size[-2:])) \
            .contiguous().view(reduce(mul, expand_batch_portion), *tensor1_exp_size[-2:])
        tensor2_expanded = tensor2.expand(*(expand_batch_portion + tensor2_exp_size[-2:])) \
            .contiguous().view(reduce(mul, expand_batch_portion), *tensor2_exp_size[-2:])

        # reshape batches back into result
        total_expansion = expand_batch_portion + (tensor1_exp_size[-2], tensor2_exp_size[-1])

        def maybeSqueeze(tensor):
            if dim_tensor1 == 1:
                return tensor.squeeze(-2)
            elif dim_tensor2 == 1:
                return tensor.squeeze(-1)
            else:
                return tensor

        if out is None or not out.is_contiguous():
            output = torch.bmm(tensor1_expanded, tensor2_expanded)
        else:
            output = torch.bmm(tensor1_expanded, tensor2_expanded, out=out)

        output = maybeSqueeze(output.view(total_expansion))

        if out is not None:
            out.set_(output)
            return out

        return output

    raise ValueError("both arguments to __matmul__ need to be at least 1D, "
                     "but they are {}D and {}D".format(dim_tensor1, dim_tensor2))