def __init__(self, in_dim, hidden_dim, out_dim, num_heads, num_classes=2, use_gdc=True): super(BasicSummarizerWithGDC, self).__init__() self.CNT = 0 self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') self.tokenizer.padding_side = 'left' self.embedder = LSTM(self.tokenizer.vocab_size) self.gat_classifier = GATClassifier(in_dim, hidden_dim, out_dim, num_heads, num_classes) self.use_gdc = use_gdc if self.use_gdc: self.gdc = T.GDC(self_loop_weight=1, normalization_in='sym', normalization_out='col', diffusion_kwargs=dict(method='ppr', alpha=0.05), sparsification_kwargs=dict(method='topk', k=5, dim=0), exact=True)
parser = argparse.ArgumentParser() parser.add_argument('--use_gdc', action='store_true', help='Use GDC preprocessing.') args = parser.parse_args() dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] args.use_gdc = True if args.use_gdc: gdc = T.GDC(self_loop_weight=1, normalization_in='sym', normalization_out='col', diffusion_kwargs=dict(method='ppr', alpha=0.05), sparsification_kwargs=dict(method='topk', k=128, dim=0), exact=True) data = gdc(data) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GCNConv(dataset.num_features, 16, cached=True, normalize=not args.use_gdc) self.conv2 = GCNConv(16, dataset.num_classes, cached=True,
def main(args): set_seeds(args.rand_seed) dataset = args.data path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) data = dataset[0] if args.use_gdc: gdc = T.GDC(self_loop_weight=1, normalization_in='sym', normalization_out='col', diffusion_kwargs=dict(method='ppr', alpha=args.ppr), ## orig 0.05 sparsification_kwargs=dict(method='topk', k=args.topk, dim=0), exact=True) data = gdc(data) if args.shuffle: print('Performing shuffle... over {} using model {}'.format(args.data, args.model)) train_dev_test_tuple = (data.train_mask.sum().data.item(), data.val_mask.sum().data.item(), data.test_mask.sum().item()) train_mask, val_mask, test_mask = random_split(N=data.train_mask.shape[0], train_dev_test_tuple=train_dev_test_tuple, random_seed=args.rand_seed) data.train_mask = train_mask data.val_mask = val_mask data.test_mask = test_mask else: print('Standard splitting...over {} using model {}'.format(args.data, args.model)) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GCNConv(dataset.num_features, args.hid_dim, cached=True, normalize=not args.use_gdc) self.conv2 = GCNConv(args.hid_dim, dataset.num_classes, cached=True, normalize=not args.use_gdc) def forward(self): x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr x = F.relu(self.conv1(x, edge_index, edge_weight)) x = F.dropout(x, training=self.training) x = self.conv2(x, edge_index, edge_weight) return F.log_softmax(x, dim=1) class NetLayerNorm_FF(torch.nn.Module): def __init__(self): super(NetLayerNorm_FF, self).__init__() self.conv1 = GCNConv(dataset.num_features, args.hid_dim, cached=True, normalize=not args.use_gdc) if dataset.num_features != args.hid_dim: self.res_fc = nn.Linear(dataset.num_features, args.hid_dim, bias=False) else: self.res_fc = None self.layer_norm_1 = torch.nn.LayerNorm(args.hid_dim) self.layer_norm_2 = torch.nn.LayerNorm(args.hid_dim) self.ff_layer = PositionwiseFeedForward(model_dim=args.hid_dim, d_hidden=4 * args.hid_dim) self.conv2 = GCNConv(args.hid_dim, dataset.num_classes, cached=True, normalize=not args.use_gdc) def forward(self): x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr convx = self.conv1(x, edge_index, edge_weight) if self.res_fc is not None: res_x = self.res_fc(x) else: res_x = x norm_x = F.dropout(self.layer_norm_1(convx), training=self.training) norm_x = norm_x + res_x x_ff = self.ff_layer(norm_x) x_ff = F.dropout(x_ff, training=self.training) x = norm_x + x_ff x = self.layer_norm_2(x) x = self.conv2(x, edge_index, edge_weight) return F.log_softmax(x, dim=1) class DeepNet(torch.nn.Module): def __init__(self, layers=args.layers): super(DeepNet, self).__init__() self.conv1 = GCNConv(dataset.num_features, args.hid_dim, cached=True, normalize=not args.use_gdc) if dataset.num_features != args.hid_dim: self.res_fc = nn.Linear(dataset.num_features, args.hid_dim, bias=False) else: self.res_fc = None self.multi_conv_layers = nn.ModuleList() for i in range(2, layers): layer_i = GCNConv(args.hid_dim, args.hid_dim, cached=True, normalize=not args.use_gdc) self.multi_conv_layers.append(layer_i) self.conv2 = GCNConv(args.hid_dim, dataset.num_classes, cached=True, normalize=not args.use_gdc) def forward(self): x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr if self.res_fc is not None: res_x = self.res_fc(x) x = F.relu(self.conv1(x, edge_index, edge_weight)) x = x + res_x for layer_i in self.multi_conv_layers: x_temp = x x = F.relu(layer_i(x, edge_index, edge_weight)) x = x + x_temp x = F.dropout(x, training=self.training) x = self.conv2(x, edge_index, edge_weight) return F.log_softmax(x, dim=1) if args.model == 'Net': model, data = Net().to(device), data.to(device) elif args.model == 'NetFF': model, data = NetLayerNorm_FF().to(device), data.to(device) elif args.model == 'Deep': model, data = DeepNet().to(device), data.to(device) print('Deep model layer number = {}'.format(args.layers)) else: raise ValueError('model %s not supported' % args.model) print(model) optimizer = torch.optim.Adam([ dict(params=model.conv1.parameters(), weight_decay=args.weight_decay), dict(params=model.conv2.parameters(), weight_decay=0) ], lr=args.lr) # Only perform weight-decay on first convolution. def train(): model.train() optimizer.zero_grad() F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward() optimizer.step() @torch.no_grad() def test(): model.eval() logits, accs = model(), [] for _, mask in data('train_mask', 'val_mask', 'test_mask'): pred = logits[mask].max(1)[1] acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item() accs.append(acc) return accs best_val_acc = test_acc = 0 for epoch in range(1, 301): train() train_acc, val_acc, tmp_test_acc = test() if val_acc > best_val_acc: best_val_acc = val_acc test_acc = tmp_test_acc log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}' print(log.format(epoch, train_acc, best_val_acc, test_acc)) print('model {}: data: {}: test_acc: {}'.format(args.model, args.data, test_acc)) return test_acc
# RECT-L 66.30 68.20 74.60 71.20 75.30 # GCN 51.80 55.70 55.80 57.10 59.80 # NodeFeats 61.40 61.40 57.50 57.50 73.10 parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default='Cora', choices=['Cora', 'CiteSeer', 'PubMed']) parser.add_argument('--unseen-classes', type=int, nargs='*', default=[1, 2, 3]) args = parser.parse_args() path = osp.join(osp.dirname(osp.realpath(__file__)), '../data/Planetoid') train_mask_original = Planetoid(path, args.dataset)[0].train_mask.clone() transform = T.Compose([ T.NormalizeFeatures(), T.SVDFeatureReduction(200), T.GDC(), ]) dataset = Planetoid(path, args.dataset, transform=transform) data = dataset[0] zs_data = T.RemoveTrainingClasses(args.unseen_classes)(copy.copy(data)) model = RECT_L(200, 200, normalize=False, dropout=0.0) zs_data.y = model.get_semantic_labels(zs_data.x, zs_data.y, zs_data.train_mask) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model, zs_data = model.to(device), zs_data.to(device) criterion = torch.nn.MSELoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) model.train()