Example #1
0
def power_iteration(W, u_, update=True, eps=1e-12):
    # Lists holding singular vectors and values
    Wt = torch.Tensor(W).t()
    us, vs, svs = [], [], []
    for i, u in enumerate(u_):
        # Run one step of the power iteration
        with torch.no_grad():
            if W.shape[1] == 27:
                a = 1
            v = torch.matmul(u, W)
            # if (W.shape[0]==u.shape[1])  :
            #   v = torch.matmul(u, W)
            # else:
            #   v = torch.matmul(u, Wt)
            # Run Gram-Schmidt to subtract components of all other singular vectors
            v = F.normalize(gram_schmidt(v, vs), eps=eps)
            # Add to the list
            vs += [v]
            # Update the other singular vector
            u = torch.matmul(v, Wt)
            # if (W.shape[0]!=v.shape[1]):
            #   u = torch.matmul(v, Wt  )
            # else:
            #   u = torch.matmul(v, W)
            # Run Gram-Schmidt to subtract components of all other singular vectors
            u = F.normalize(gram_schmidt(u, us), eps=eps)
            # Add to the list
            us += [u]
            if update:
                torch.copy(u, u_[i])
                # u_[i][:] = u
        # Compute this singular value and add it to the list
        svs += [torch.squeeze(torch.matmul(torch.matmul(v, Wt), u.t()))]
        # if (W.shape[0]!=v.shape[1]):
        #   svs += [torch.squeeze(torch.matmul(torch.matmul(v, Wt  ), u.t() ))]
        # else:
        #   svs += [torch.squeeze(torch.matmul(torch.matmul(v, W), u.t()))]
        #svs += [torch.sum(F.linear(u, W.transpose(0, 1)) * v)]
    return svs, us, vs
Example #2
0
 def matmul(self, y):
     return torch.matmul(self, y)
Example #3
0

import torch

torch.manual_seed(0)
a = torch.randn(70839, 64 )

b = torch.randn(64, 64, requires_grad=True)


print(torch.argmax(torch.matmul(a,b)))
import paddorch
import paddle
a2 =paddorch.Tensor(a.detach().cpu().numpy())

b2 = paddorch.Tensor(b.detach().cpu().numpy())

print(paddle.argmax(paddorch.matmul(a2,b2) ))

Example #4
0
def train_moco(epoch, train_loader, model, model_ema, contrast, criterion,
               optimizer, sw, opt):
    """
    one epoch training for moco
    """

    n_batch = train_loader.dataset.total // opt.batch_size
    no_update_debug = False
    if no_update_debug:
        model.eval()
        contrast.eval()
    else:
        model.train()
    model_ema.eval()

    def set_bn_train(m):
        classname = m.__class__.__name__
        if classname.find("BatchNorm") != -1:
            m.train()

    if not no_update_debug:
        model_ema.apply(set_bn_train)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    loss_meter = AverageMeter()
    epoch_loss_meter = AverageMeter()
    prob_meter = AverageMeter()
    graph_size = AverageMeter()
    gnorm_meter = AverageMeter()
    max_num_nodes = 0
    max_num_edges = 0

    end = time.time()
    if no_update_debug:
        graph_q, graph_k = train_loader.dataset[0]
        graph_q2, graph_k2 = train_loader.dataset[1]
        graph_q, graph_k = dgl.batch([graph_q, graph_q2
                                      ]), dgl.batch([graph_k, graph_k2])

    for idx, batch in enumerate(train_loader):
        data_time.update(time.time() - end)
        if not no_update_debug:
            graph_q, graph_k = batch

        # graph_q.to(torch.device(opt.gpu))
        # graph_k.to(torch.device(opt.gpu))
        ##inject testing

        bsz = graph_q.batch_size

        if opt.moco:
            # ===================Moco forward=====================
            feat_q = model(graph_q)
            with torch.no_grad():
                feat_k = model_ema(graph_k)

            out = contrast(feat_q, feat_k)

            prob = out[:, 0].mean()
        else:
            # ===================Negative sampling forward=====================
            feat_q = model(graph_q)
            feat_k = model(graph_k)

            out = torch.matmul(feat_k, feat_q.t()) / opt.nce_t
            prob = out[range(graph_q.batch_size),
                       range(graph_q.batch_size)].mean()

        assert feat_q.shape == (graph_q.batch_size, opt.hidden_size)

        # ===================backward=====================
        optimizer.zero_grad()
        loss = criterion(out)
        #clip before the backward
        # [torch.nn.utils.clip_by_norm(p, opt.clip_norm) for p in model.parameters() ]

        if not no_update_debug:
            loss.backward()

        grad_norm = clip_grad_norm(model.parameters(), 0)

        global_step = epoch * n_batch + idx
        lr_this_step = opt.learning_rate * warmup_linear(
            global_step / (opt.epochs * n_batch), 0.1)
        if lr_this_step is not None:
            optimizer.set_lr(lr_this_step)

        # for param_group in optimizer.param_groups:
        #     param_group["lr"] = lr_this_step
        optimizer.step()
        # if not no_update_debug:
        #     optimizer.minimize(loss)
        if no_update_debug:
            print(loss.item())
        # ===================meters=====================
        loss_meter.update(loss.item(), bsz)
        epoch_loss_meter.update(loss.item(), bsz)
        prob_meter.update(prob.item(), bsz)
        graph_size.update(
            (graph_q.number_of_nodes() + graph_k.number_of_nodes()) / 2.0 /
            bsz, 2 * bsz)

        gnorm_meter.update(grad_norm, 1)
        max_num_nodes = max(max_num_nodes, graph_q.number_of_nodes())
        max_num_edges = max(max_num_edges, graph_q.number_of_edges())

        if opt.moco:
            if not no_update_debug:
                moment_update(model, model_ema, opt.alpha)

        batch_time.update(time.time() - end)
        end = time.time()
        # del graph_q, graph_k, feat_q, feat_k

        # print info
        if (idx + 1) % opt.print_freq == 0:
            mem = psutil.virtual_memory()
            #  print(f'{idx:8} - {mem.percent:5} - {mem.free/1024**3:10.2f} - {mem.available/1024**3:10.2f} - {mem.used/1024**3:10.2f}')
            #  mem_used.append(mem.used/1024**3)
            print("Train: [{0}][{1}/{2}]\t"
                  "BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                  "DT {data_time.val:.3f} ({data_time.avg:.3f})\t"
                  "loss {loss.val:.3f} ({loss.avg:.3f})\t"
                  "prob {prob.val:.3f} ({prob.avg:.3f})\t"
                  "GS {graph_size.val:.3f} ({graph_size.avg:.3f})\t"
                  "mem {mem:.3f}".format(
                      epoch,
                      idx + 1,
                      n_batch,
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=loss_meter,
                      prob=prob_meter,
                      graph_size=graph_size,
                      mem=mem.used / 1024**3,
                  ))
            #  print(out[0].abs().max())

        # tensorboard logger
        if (idx + 1) % opt.tb_freq == 0:
            global_step = epoch * n_batch + idx
            sw.add_scalar("moco_loss", loss_meter.avg, global_step)
            sw.add_scalar("moco_prob", prob_meter.avg, global_step)
            sw.add_scalar("graph_size", graph_size.avg, global_step)
            sw.add_scalar("graph_size/max", max_num_nodes, global_step)
            sw.add_scalar("graph_size/max_edges", max_num_edges, global_step)
            sw.add_scalar("gnorm", gnorm_meter.avg, global_step)
            sw.add_scalar("learning_rate", optimizer.param_groups[0]["lr"],
                          global_step)
            loss_meter.reset()
            prob_meter.reset()
            graph_size.reset()
            gnorm_meter.reset()
            max_num_nodes, max_num_edges = 0, 0
    return epoch_loss_meter.avg