Example #1
0
    def __init__(self,
                 hidden_dim,
                 num_gc_layers,
                 alpha=0.5,
                 beta=1.,
                 gamma=.1):
        super(GcnInfomax, self).__init__()

        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.prior = args.prior

        self.embedding_dim = mi_units = hidden_dim * num_gc_layers
        self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)

        self.local_d = FF(self.embedding_dim)
        self.global_d = FF(self.embedding_dim)
        # self.local_d = MI1x1ConvNet(self.embedding_dim, mi_units)
        # self.global_d = MIFCNet(self.embedding_dim, mi_units)

        if self.prior:
            self.prior_d = PriorDiscriminator(self.embedding_dim)

        self.init_emb()
Example #2
0
    def __init__(self,
                 hidden_dim,
                 num_gc_layers,
                 alpha=0.5,
                 beta=1.,
                 gamma=.1):
        super(GcnInfomax, self).__init__()

        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.prior = args.prior

        self.embedding_dim = mi_units = hidden_dim * num_gc_layers
        self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)

        self.local_d = FF(self.embedding_dim)
        self.global_d = FF(self.embedding_dim)
        # self.local_d = MI1x1ConvNet(self.embedding_dim, mi_units)
        # self.global_d = MIFCNet(self.embedding_dim, mi_units)

        if self.prior:
            self.prior_d = PriorDiscriminator(self.embedding_dim)

        self.init_emb()

        self.sigmoid = nn.Sigmoid()
        self.bceloss = nn.BCELoss()
        self.projection_head = nn.Sequential(nn.Linear(96, 96),
                                             nn.ReLU(inplace=True),
                                             nn.Linear(96, 96))
Example #3
0
    def __init__(self,
                 hidden_dim,
                 num_gc_layers,
                 alpha=0.5,
                 beta=1.,
                 gamma=.1):
        super(simclr, self).__init__()

        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.prior = args.prior

        self.embedding_dim = mi_units = hidden_dim * num_gc_layers
        self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)

        self.init_emb()
Example #4
0
  def __init__(self, hidden_dim, num_gc_layers, alpha=0.5, beta=1., gamma=.1):
    super(GcnInfomax, self).__init__()

    self.alpha = alpha
    self.beta = beta
    self.gamma = gamma
    self.prior = args.prior

    self.embedding_dim = mi_units = hidden_dim * num_gc_layers
    self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)
    self.decoder = Decoder(hidden_dim, hidden_dim, dataset_num_features)

    self.local_d = FF(self.embedding_dim)
    self.global_d = FF(self.embedding_dim)


    self.init_emb()
Example #5
0
    def __init__(self,
                 hidden_dim,
                 num_gc_layers,
                 alpha=0.5,
                 beta=1.,
                 gamma=.1):
        super(simclr, self).__init__()

        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.prior = args.prior

        self.embedding_dim = mi_units = hidden_dim * num_gc_layers
        self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)

        self.proj_head = nn.Sequential(
            nn.Linear(self.embedding_dim, self.embedding_dim),
            nn.ReLU(inplace=True),
            nn.Linear(self.embedding_dim, self.embedding_dim))

        self.init_emb()
Example #6
0
    dataset_eval = TUDataset(path, name=DS, aug='none',
        stro_aug='none').shuffle()
    print(len(dataset))
    print(dataset.get_num_feature())
    try:
        dataset_num_features = dataset.get_num_feature()
    except:
        dataset_num_features = 1

    dataloader = DataLoader(dataset, batch_size=batch_size)
    dataloader_eval = DataLoader(dataset_eval, batch_size=batch_size)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    emb_dim = args.hidden_dim * args.num_gc_layers

    online_encoder = Encoder(dataset_num_features, args.hidden_dim, args.num_gc_layers)
    model = MoCo(online_encoder, dim=emb_dim).to(device)
    # print(model)
    optimizer = torch.optim.Adam(model.encoder_q.parameters(), lr=lr)

    print('================')
    print('lr: {}'.format(lr))
    print('num_features: {}'.format(dataset_num_features))
    print('hidden_dim: {}'.format(args.hidden_dim))
    print('num_gc_layers: {}'.format(args.num_gc_layers))
    print('================')

    model.eval()
    emb, y = model.encoder_q.get_embeddings(dataloader_eval)
    # print(emb.shape, y.shape)
Example #7
0
def train(args, DS, gpu, num_gc_layers=4, epoch=40, batch=64):

    accuracies = {'val': [], 'test': []}
    epochs = epoch
    log_interval = 10
    batch_size = batch
    # batch_size = 512
    lr = args.lr
    path = osp.join(osp.dirname(osp.realpath(__file__)), 'data', DS)
    # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
    # print(path)
    dataset = TUDataset(path, name=DS, aug=args.aug,
                        stro_aug=args.stro_aug).shuffle()
    dataset_eval = TUDataset(path, name=DS, aug='none',
                             stro_aug='none').shuffle()
    print(len(dataset))
    print(dataset.get_num_feature())
    try:
        dataset_num_features = dataset.get_num_feature()
    except:
        dataset_num_features = 1

    dataloader = DataLoader(dataset, batch_size=batch_size)
    dataloader_eval = DataLoader(dataset_eval, batch_size=batch_size)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    online_encoder = Encoder(dataset_num_features, args.hidden_dim,
                             num_gc_layers)
    model = BYOL(online_encoder,
                 args.hidden_dim,
                 num_gc_layers,
                 use_momentum=False).to(device)
    # print(model)
    optimizer = torch.optim.Adam(model.online_encoder.parameters(), lr=lr)

    print('================')
    print('lr: {}'.format(lr))
    print('num_features: {}'.format(dataset_num_features))
    print('hidden_dim: {}'.format(args.hidden_dim))
    print('num_gc_layers: {}'.format(num_gc_layers))
    print('================')

    model.eval()
    emb, y = model.online_encoder.get_embeddings(dataloader_eval)
    # print(emb.shape, y.shape)
    """
    acc_val, acc = evaluate_embedding(emb, y)
    accuracies['val'].append(acc_val)
    accuracies['test'].append(acc)
    """

    for epoch in range(1, epochs + 1):
        loss_all = 0
        model.train()
        for data in dataloader:

            # print('start')
            data, data_weak_aug, data_stro_aug = data
            optimizer.zero_grad()

            node_num, _ = data.x.size()
            data = data.to(device)
            # x = model(data.x, data.edge_index, data.batch, data.num_graphs)

            if args.aug == 'dnodes' or args.aug == 'subgraph' or args.aug == 'random2' or args.aug == 'random3' or args.aug == 'random4':
                edge_idx = data_weak_aug.edge_index.numpy()
                _, edge_num = edge_idx.shape
                idx_not_missing = [
                    n for n in range(node_num)
                    if (n in edge_idx[0] or n in edge_idx[1])
                ]

                node_num_aug = len(idx_not_missing)
                data_weak_aug.x = data_weak_aug.x[idx_not_missing]

                data_weak_aug.batch = data.batch[idx_not_missing]
                idx_dict = {idx_not_missing[n]: n for n in range(node_num_aug)}
                edge_idx = [[
                    idx_dict[edge_idx[0, n]], idx_dict[edge_idx[1, n]]
                ] for n in range(edge_num)
                            if not edge_idx[0, n] == edge_idx[1, n]]
                data_weak_aug.edge_index = torch.tensor(edge_idx).transpose_(
                    0, 1)

            if args.stro_aug == 'stro_dnodes' or args.stro_aug == \
                    'stro_subgraph' or args.stro_aug \
                    == 'random2' or args.stro_aug == 'random3' or args.stro_aug == 'random4':
                edge_idx = data_stro_aug.edge_index.numpy()
                _, edge_num = edge_idx.shape
                idx_not_missing = [
                    n for n in range(node_num)
                    if (n in edge_idx[0] or n in edge_idx[1])
                ]

                node_num_aug = len(idx_not_missing)
                data_stro_aug.x = data_stro_aug.x[idx_not_missing]

                data_stro_aug.batch = data.batch[idx_not_missing]
                idx_dict = {idx_not_missing[n]: n for n in range(node_num_aug)}
                edge_idx = [[
                    idx_dict[edge_idx[0, n]], idx_dict[edge_idx[1, n]]
                ] for n in range(edge_num)
                            if not edge_idx[0, n] == edge_idx[1, n]]
                data_stro_aug.edge_index = torch.tensor(edge_idx).transpose_(
                    0, 1)

            data_weak_aug = data_weak_aug.to(device)
            data_stro_aug = data_stro_aug.to(device)

            # weak_proj, x_proj = model(data.x, data.edge_index, data.batch,
            #     data.num_graphs, data_weak_aug.x, data_weak_aug.edge_index,
            #     data_weak_aug.batch, data_weak_aug.num_graphs)
            # target = model.loss_cal(x_proj, weak_proj)
            # loss_C = - torch.log(target).mean()
            #
            # stro_proj, x_proj = model(data.x, data.edge_index, data.batch,
            #     data.num_graphs, data_stro_aug.x, data_stro_aug.edge_index,
            #     data_stro_aug.batch, data_stro_aug.num_graphs)
            # prediction = model.loss_cal(x_proj, stro_proj)
            # loss_D = model.clsa_loss(prediction, target)

            # loss = loss_D.item() * data.num_graphs + loss_C
            # print('Loss {}, Loss_D {}, Loss_C'.format(loss, loss_D, loss_C))

            loss = model(data_weak_aug.x, data_weak_aug.edge_index,
                         data_weak_aug.batch, data_weak_aug.num_graphs,
                         data_stro_aug.x, data_stro_aug.edge_index,
                         data_stro_aug.batch, data_stro_aug.num_graphs)
            loss_all += loss

            loss.backward()
            optimizer.step()
            # model.update_moving_average()

        print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader)))

        if epoch % log_interval == 0:
            model.eval()
            emb, y = model.online_encoder.get_embeddings(dataloader_eval)
            acc_val, acc = evaluate_embedding(emb, y)
            accuracies['val'].append(acc_val)
            accuracies['test'].append(acc)
            # print(accuracies['val'][-1], accuracies['test'][-1])

    tpe = ('local' if args.local else '') + ('prior' if args.prior else '')
    with open(
            'logs/log_BYOL_' + args.DS + '_' + args.aug + '_' + args.stro_aug,
            'a+') as f:
        s = json.dumps(accuracies)
        f.write(
            '{},bs:{},epoch:{},layers:{},{},gpu:{},{},{},{},{},{}\n'.format(
                args.DS, batch, epoch, layers, tpe, gpu, num_gc_layers, epochs,
                log_interval, lr, s))
        f.write('\n')