def __init__( self, output_dim=32, node_input_dim=32, node_hidden_dim=32, edge_input_dim=32, edge_hidden_dim=32, num_step_message_passing=6, lstm_as_gate=False, ): super(UnsupervisedMPNN, self).__init__() self.num_step_message_passing = num_step_message_passing self.lin0 = nn.Linear(node_input_dim, node_hidden_dim) edge_network = nn.Sequential( nn.Linear(edge_input_dim, edge_hidden_dim), nn.ReLU(), nn.Linear(edge_hidden_dim, node_hidden_dim * node_hidden_dim), ) self.conv = NNConv( in_feats=node_hidden_dim, out_feats=node_hidden_dim, edge_func=edge_network, aggregator_type="sum", ) self.lstm_as_gate = lstm_as_gate if lstm_as_gate: self.lstm = nn.LSTM(node_hidden_dim, node_hidden_dim) else: self.gru = nn.GRU(node_hidden_dim, node_hidden_dim)
def __init__(self, input_nc, ndf=64, n_layers=5): super(Discriminator, self).__init__() model = [nn.ReflectionPad2d(1), nn.utils.spectral_norm(nn.Conv2d(3, ndf, 4, 2, 0, bias=True)), nn.LeakyReLU(0.2, True)] for i in range(1, n_layers - 2): mult = 2 ** (i - 1) model += [nn.ReflectionPad2d(1), nn.utils.spectral_norm(nn.Conv2d(ndf * mult, ndf * mult * 2, 4, 2, 0, bias=True)), nn.LeakyReLU(0.2, True)] mult = 2 ** (n_layers - 2 - 1) model += [nn.ReflectionPad2d(1), nn.utils.spectral_norm(nn.Conv2d(ndf * mult, ndf * mult * 2, 4, 1, 0, bias=True)), nn.LeakyReLU(0.2, True)] # Class Activation Map mult = 2 ** (n_layers - 2) self.gap_fc = nn.utils.spectral_norm(nn.Linear(ndf * mult, 1, bias=False)) self.gmp_fc = nn.utils.spectral_norm(nn.Linear(ndf * mult, 1, bias=False)) self.conv1x1 = nn.Conv2d(ndf * mult * 2, ndf * mult, 1, 1, bias=True) self.leaky_relu = nn.LeakyReLU(0.2, True) self.pad = nn.ReflectionPad2d(1) self.conv = nn.utils.spectral_norm(nn.Conv2d(ndf * mult, 1, 4, 1, 0, bias=False)) self.model = nn.Sequential(*model)
def __init__(self, num_classes=1000): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), )
def __init__(self, in_channels, se_channels): super(SELayer, self).__init__() self.in_channels = in_channels self.se_channels = se_channels self.encoder_decoder = nn.Sequential( nn.Linear(in_channels, se_channels), nn.ELU(), nn.Linear(se_channels, in_channels), nn.Sigmoid(), )
def __init__( self, hidden_size=64, num_layer=2, readout="avg", layernorm: bool = False, set2set_lstm_layer: int = 3, set2set_iter: int = 6, ): super(UnsupervisedGCN, self).__init__() self.layers = nn.ModuleList([ GCNLayer( in_feats=hidden_size, out_feats=hidden_size, activation=F.relu if i + 1 < num_layer else None, residual=False, batchnorm=False, dropout=0.0, ) for i in range(num_layer) ]) if readout == "avg": self.readout = AvgPooling() elif readout == "set2set": self.readout = Set2Set(hidden_size, n_iters=set2set_iter, n_layers=set2set_lstm_layer) self.linear = nn.Linear(2 * hidden_size, hidden_size) elif readout == "root": # HACK: process outside the model part self.readout = lambda _, x: x else: raise NotImplementedError self.layernorm = layernorm if layernorm: self.ln = nn.LayerNorm(hidden_size, elementwise_affine=False)
def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, inception_blocks=None): super(Inception3, self).__init__() if inception_blocks is None: inception_blocks = [ BasicConv2d, InceptionA, InceptionB, InceptionC, InceptionD, InceptionE, InceptionAux ] assert len(inception_blocks) == 7 conv_block = inception_blocks[0] inception_a = inception_blocks[1] inception_b = inception_blocks[2] inception_c = inception_blocks[3] inception_d = inception_blocks[4] inception_e = inception_blocks[5] inception_aux = inception_blocks[6] self.aux_logits = aux_logits self.transform_input = transform_input self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2) self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3) self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1) self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1) self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3) self.Mixed_5b = inception_a(192, pool_features=32) self.Mixed_5c = inception_a(256, pool_features=64) self.Mixed_5d = inception_a(288, pool_features=64) self.Mixed_6a = inception_b(288) self.Mixed_6b = inception_c(768, channels_7x7=128) self.Mixed_6c = inception_c(768, channels_7x7=160) self.Mixed_6d = inception_c(768, channels_7x7=160) self.Mixed_6e = inception_c(768, channels_7x7=192) if aux_logits: self.AuxLogits = inception_aux(768, num_classes) self.Mixed_7a = inception_d(768) self.Mixed_7b = inception_e(1280) self.Mixed_7c = inception_e(2048) self.fc = nn.Linear(2048, num_classes) # for m in self.modules(): for name, m in self._sub_layers.items(): if isinstance(m, dygraph.Conv2D) or isinstance(m, dygraph.Linear): import scipy.stats as stats stddev = m.stddev if hasattr(m, 'stddev') else 0.1 X = stats.truncnorm(-2, 2, scale=stddev) values = torch.as_tensor( X.rvs(np.prod(m.weight.shape)).astype("float32")) values = values.view(*m.weight.shape) with torch.no_grad(): fluid.layers.assign(values, m.weight) elif isinstance(m, dygraph.BatchNorm): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__(self, in_channels, num_classes, conv_block=None): super(InceptionAux, self).__init__() if conv_block is None: conv_block = BasicConv2d self.conv0 = conv_block(in_channels, 128, kernel_size=1) self.conv1 = conv_block(128, 768, kernel_size=5) self.conv1.stddev = 0.01 self.fc = nn.Linear(768, num_classes) self.fc.stddev = 0.001
def __init__(self, num_layers, input_dim, hidden_dim, output_dim, use_selayer): """MLP layers construction Paramters --------- num_layers: int The number of linear layers input_dim: int The dimensionality of input features hidden_dim: int The dimensionality of hidden units at ALL layers output_dim: int The number of classes for prediction """ super(MLP, self).__init__() self.linear_or_not = True # default is linear model self.num_layers = num_layers self.output_dim = output_dim if num_layers < 1: raise ValueError("number of layers should be positive!") elif num_layers == 1: # Linear model self.linear = nn.Linear(input_dim, output_dim) else: # Multi-layer model self.linear_or_not = False self.linears = torch.nn.ModuleList() self.batch_norms = torch.nn.ModuleList() self.linears.append(nn.Linear(input_dim, hidden_dim)) for layer in range(num_layers - 2): self.linears.append(nn.Linear(hidden_dim, hidden_dim)) self.linears.append(nn.Linear(hidden_dim, output_dim)) for layer in range(num_layers - 1): self.batch_norms.append( SELayer(hidden_dim, int(np.sqrt(hidden_dim)) ) if use_selayer else nn.BatchNorm1d(hidden_dim))
def __init__(self, latent_dim=16, style_dim=64, num_domains=2): super().__init__() layers = [] layers += [nn.Linear(latent_dim, 512)] layers += [nn.ReLU()] for _ in range(3): layers += [nn.Linear(512, 512)] layers += [nn.ReLU()] self.shared = nn.Sequential(*layers) self.unshared = nn.ModuleList() for _ in range(num_domains): self.unshared += [ nn.Sequential(nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, style_dim)) ]
def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512): super().__init__() dim_in = 2**14 // img_size blocks = [] blocks += [nn.Conv2d(3, dim_in, 3, 1, 1)] repeat_num = int(np.log2(img_size)) - 2 for _ in range(repeat_num): dim_out = min(dim_in * 2, max_conv_dim) blocks += [ResBlk(dim_in, dim_out, downsample=True)] dim_in = dim_out blocks += [nn.LeakyReLU(0.2)] blocks += [nn.Conv2d(dim_out, dim_out, 4, 1, 0)] blocks += [nn.LeakyReLU(0.2)] self.shared = nn.Sequential(*blocks) self.unshared = nn.ModuleList() for _ in range(num_domains): self.unshared += [nn.Linear(dim_out, style_dim)]
def __init__( self, num_layers, num_mlp_layers, input_dim, hidden_dim, output_dim, final_dropout, learn_eps, graph_pooling_type, neighbor_pooling_type, use_selayer, ): """model parameters setting Paramters --------- num_layers: int The number of linear layers in the neural network num_mlp_layers: int The number of linear layers in mlps input_dim: int The dimensionality of input features hidden_dim: int The dimensionality of hidden units at ALL layers output_dim: int The number of classes for prediction final_dropout: float dropout ratio on the final linear layer learn_eps: boolean If True, learn epsilon to distinguish center nodes from neighbors If False, aggregate neighbors and center nodes altogether. neighbor_pooling_type: str how to aggregate neighbors (sum, mean, or max) graph_pooling_type: str how to aggregate entire nodes in a graph (sum, mean or max) """ super(UnsupervisedGIN, self).__init__() self.num_layers = num_layers self.learn_eps = learn_eps # List of MLPs self.ginlayers = torch.nn.ModuleList() self.batch_norms = torch.nn.ModuleList() for layer in range(self.num_layers - 1): if layer == 0: mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim, use_selayer) else: mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim, use_selayer) self.ginlayers.append( GINConv( ApplyNodeFunc(mlp, use_selayer), neighbor_pooling_type, 0, self.learn_eps, )) self.batch_norms.append( SELayer(hidden_dim, int(np.sqrt(hidden_dim)) ) if use_selayer else nn.BatchNorm1d(hidden_dim)) # Linear function for graph poolings of output of each layer # which maps the output of different layers into a prediction score self.linears_prediction = torch.nn.ModuleList() for layer in range(num_layers): if layer == 0: self.linears_prediction.append(nn.Linear( input_dim, output_dim)) else: self.linears_prediction.append( nn.Linear(hidden_dim, output_dim)) self.drop = nn.Dropout(final_dropout) if graph_pooling_type == "sum": self.pool = SumPooling() elif graph_pooling_type == "mean": self.pool = AvgPooling() elif graph_pooling_type == "max": self.pool = MaxPooling() else: raise NotImplementedError
def __init__(self, input_nc, output_nc, ngf=64, n_blocks=6, img_size=256, light=False): super(ResnetGenerator, self).__init__() self.n_res=n_blocks self.light= light down_layer = [ nn.ReflectionPad2d(3), nn.Conv2d(3, ngf, 7, 1, 0, bias=False), nn.InstanceNorm2d(ngf,affine=True), nn.ReLU(inplace=True), # Down-Sampling nn.ReflectionPad2d(1), nn.Conv2d(ngf, ngf*2, 3, 2, 0, bias=False), nn.InstanceNorm2d(ngf*2,affine=True), nn.ReLU(inplace=True), nn.ReflectionPad2d(1), nn.Conv2d(ngf*2, ngf*4, 3, 2, 0, bias=False), nn.InstanceNorm2d(ngf*4,affine=True), nn.ReLU(inplace=True), # Down-Sampling Bottleneck ResNetBlock(ngf*4), ResNetBlock(ngf*4), ResNetBlock(ngf*4), ResNetBlock(ngf*4), ] # Class Activation Map self.gap_fc = nn.Linear(ngf*4, 1, bias=False) self.gmp_fc = nn.Linear(ngf*4, 1, bias=False) self.conv1x1 = nn.Conv2d(ngf*8, ngf*4, 1, 1, bias=True) self.relu = nn.ReLU(inplace=True) # # Gamma, Beta block # fc = [ # nn.Linear(image_size * image_size * 16, 256, bias=False), # nn.ReLU(inplace=True), # nn.Linear(256, 256, bias=False), # nn.ReLU(inplace=True) # ] # Gamma, Beta block if self.light: fc = [nn.Linear(ngf*4, ngf*4, bias=False), nn.ReLU(True), nn.Linear(ngf*4, ngf*4, bias=False), nn.ReLU(True)] else: fc = [nn.Linear(img_size * img_size * ngf//4, ngf*4, bias=False), nn.ReLU(True), nn.Linear(ngf*4, ngf*4, bias=False), nn.ReLU(True)] self.gamma = nn.Linear(ngf*4, ngf*4, bias=False) self.beta = nn.Linear(ngf*4, ngf*4, bias=False) # Up-Sampling Bottleneck for i in range(self.n_res): setattr(self, "ResNetAdaILNBlock_" + str(i + 1), ResNetAdaILNBlock(ngf*4)) up_layer = [ nn.Upsample(scale_factor=2, mode="nearest"), nn.ReflectionPad2d(1), nn.Conv2d(ngf*4, ngf*2, 3, 1, 0, bias=False), ILN(ngf*2), nn.ReLU(inplace=True), nn.Upsample(scale_factor=2, mode="nearest"), nn.ReflectionPad2d(1), nn.Conv2d(ngf*2, ngf, 3, 1, 0, bias=False), ILN(ngf), nn.ReLU(inplace=True), nn.ReflectionPad2d(3), nn.Conv2d(ngf, 3, 7, 1, 0, bias=False), nn.Tanh() ] self.down_layer = nn.Sequential(*down_layer) self.fc = nn.Sequential(*fc) self.up_layer = nn.Sequential(*up_layer)
def main(args): dgl.random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu >= 0: torch.cuda.manual_seed(args.seed) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location="cpu") pretrain_args = checkpoint["opt"] pretrain_args.fold_idx = args.fold_idx pretrain_args.gpu = args.gpu pretrain_args.finetune = args.finetune pretrain_args.resume = args.resume pretrain_args.cv = args.cv pretrain_args.dataset = args.dataset pretrain_args.epochs = args.epochs pretrain_args.num_workers = args.num_workers if args.dataset in GRAPH_CLASSIFICATION_DSETS: # HACK for speeding up finetuning on graph classification tasks pretrain_args.num_workers = 0 pretrain_args.batch_size = args.batch_size args = pretrain_args else: print("=> no checkpoint found at '{}'".format(args.resume)) args = option_update(args) print(args) if args.gpu >= 0: assert args.gpu is not None and torch.cuda.is_available() print("Use GPU: {} for training".format(args.gpu)) assert args.positional_embedding_size % 2 == 0 print("setting random seeds") mem = psutil.virtual_memory() print("before construct dataset", mem.used / 1024**3) if args.finetune: if args.dataset in GRAPH_CLASSIFICATION_DSETS: dataset = GraphClassificationDatasetLabeled( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) labels = dataset.dataset.data.y.tolist() else: dataset = NodeClassificationDatasetLabeled( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) labels = dataset.data.y.argmax(dim=1).tolist() skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=args.seed) idx_list = [] for idx in skf.split(np.zeros(len(labels)), labels): idx_list.append(idx) assert (0 <= args.fold_idx and args.fold_idx < 10), "fold_idx must be from 0 to 9." train_idx, test_idx = idx_list[args.fold_idx] train_dataset = torch.utils.data.Subset(dataset, train_idx) valid_dataset = torch.utils.data.Subset(dataset, test_idx) elif args.dataset == "dgl": train_dataset = LoadBalanceGraphDataset( rw_hops=args.rw_hops, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, num_workers=args.num_workers, num_samples=args.num_samples, dgl_graphs_file="./data/small.bin", num_copies=args.num_copies, ) else: if args.dataset in GRAPH_CLASSIFICATION_DSETS: train_dataset = GraphClassificationDataset( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) else: train_dataset = NodeClassificationDataset( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) mem = psutil.virtual_memory() print("before construct dataloader", mem.used / 1024**3) train_loader = torch.utils.data.graph.Dataloader( dataset=train_dataset, batch_size=args.batch_size, collate_fn=labeled_batcher() if args.finetune else batcher(), shuffle=True if args.finetune else False, num_workers=args.num_workers, worker_init_fn=None if args.finetune or args.dataset != "dgl" else worker_init_fn, ) if args.finetune: valid_loader = torch.utils.data.DataLoader( dataset=valid_dataset, batch_size=args.batch_size, collate_fn=labeled_batcher(), num_workers=args.num_workers, ) mem = psutil.virtual_memory() print("before training", mem.used / 1024**3) # create model and optimizer # n_data = train_dataset.total n_data = None import gcc.models.graph_encoder gcc.models.graph_encoder.final_dropout = 0 ##disable dropout model, model_ema = [ GraphEncoder( positional_embedding_size=args.positional_embedding_size, max_node_freq=args.max_node_freq, max_edge_freq=args.max_edge_freq, max_degree=args.max_degree, freq_embedding_size=args.freq_embedding_size, degree_embedding_size=args.degree_embedding_size, output_dim=args.hidden_size, node_hidden_dim=args.hidden_size, edge_hidden_dim=args.hidden_size, num_layers=args.num_layer, num_step_set2set=args.set2set_iter, num_layer_set2set=args.set2set_lstm_layer, norm=args.norm, gnn_model=args.model, degree_input=True, ) for _ in range(2) ] # copy weights from `model' to `model_ema' if args.moco: moment_update(model, model_ema, 0) # set the contrast memory and criterion contrast = MemoryMoCo(args.hidden_size, n_data, args.nce_k, args.nce_t, use_softmax=True) if args.gpu >= 0: contrast = contrast.cuda(args.gpu) if args.finetune: criterion = nn.CrossEntropyLoss() else: criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS() if args.gpu >= 0: criterion = criterion.cuda(args.gpu) if args.gpu >= 0: model = model.cuda(args.gpu) model_ema = model_ema.cuda(args.gpu) if args.finetune: output_layer = nn.Linear(in_features=args.hidden_size, out_features=dataset.num_classes) if args.gpu >= 0: output_layer = output_layer.cuda(args.gpu) output_layer_optimizer = torch.optim.Adam( output_layer.parameters(), lr=args.learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, ) def clear_bn(m): classname = m.__class__.__name__ if classname.find("BatchNorm") != -1: m.reset_running_stats() model.apply(clear_bn) if args.optimizer == "sgd": optimizer = torch.optim.SGD( model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optimizer == "adam": optimizer = torch.optim.Adam( model.parameters(), lr=args.learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, ) elif args.optimizer == "adagrad": optimizer = torch.optim.Adagrad( model.parameters(), lr=args.learning_rate, lr_decay=args.lr_decay_rate, weight_decay=args.weight_decay, ) else: raise NotImplementedError # optionally resume from a checkpoint args.start_epoch = 1 if True: # print("=> loading checkpoint '{}'".format(args.resume)) # checkpoint = torch.load(args.resume, map_location="cpu") import torch as th checkpoint = th.load("torch_models/ckpt_epoch_100.pth", map_location=th.device('cpu')) torch_input_output_grad = th.load( "torch_models/torch_input_output_grad.pt", map_location=th.device('cpu')) from paddorch.convert_pretrain_model import load_pytorch_pretrain_model print("loading.............. model") paddle_state_dict = load_pytorch_pretrain_model( model, checkpoint["model"]) model.load_state_dict(paddle_state_dict) print("loading.............. contrast") paddle_state_dict2 = load_pytorch_pretrain_model( contrast, checkpoint["contrast"]) contrast.load_state_dict(paddle_state_dict2) print("loading.............. model_ema") paddle_state_dict3 = load_pytorch_pretrain_model( model_ema, checkpoint["model_ema"]) if args.moco: model_ema.load_state_dict(paddle_state_dict3) print("=> loaded successfully '{}' (epoch {})".format( args.resume, checkpoint["epoch"])) del checkpoint if args.gpu >= 0: torch.cuda.empty_cache() optimizer = torch.optim.Adam( model.parameters(), lr=args.learning_rate * 0.1, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, ) for _ in range(1): graph_q, graph_k = train_dataset[0] graph_q2, graph_k2 = train_dataset[1] graph_q, graph_k = dgl.batch([graph_q, graph_q2 ]), dgl.batch([graph_k, graph_k2]) input_output_grad = [] input_output_grad.append([graph_q, graph_k]) model.train() model_ema.eval() feat_q = model(graph_q) with torch.no_grad(): feat_k = model_ema(graph_k) out = contrast(feat_q, feat_k) loss = criterion(out) optimizer.zero_grad() loss.backward() input_output_grad.append([feat_q, out, loss]) print("loss:", loss.numpy()) optimizer.step() moment_update(model, model_ema, args.alpha) print( "max diff feat_q:", np.max( np.abs(torch_input_output_grad[1][0].detach().numpy() - feat_q.numpy()))) print( "max diff out:", np.max( np.abs(torch_input_output_grad[1][1].detach().numpy() - out.numpy()))) print( "max diff loss:", np.max( np.abs(torch_input_output_grad[1][2].detach().numpy() - loss.numpy()))) name2grad = dict() for name, p in dict(model.named_parameters()).items(): if p.grad is not None: name2grad[name] = p.grad torch_grad = torch_input_output_grad[2][name].numpy() if "linear" in name and "weight" in name: torch_grad = torch_grad.T max_grad_diff = np.max(np.abs(p.grad - torch_grad)) print("max grad diff:", name, max_grad_diff) input_output_grad.append(name2grad)
def __init__( self, positional_embedding_size=32, max_node_freq=8, max_edge_freq=8, max_degree=128, freq_embedding_size=32, degree_embedding_size=32, output_dim=32, node_hidden_dim=32, edge_hidden_dim=32, num_layers=6, num_heads=4, num_step_set2set=6, num_layer_set2set=3, norm=False, gnn_model="mpnn", degree_input=False, lstm_as_gate=False, ): super(GraphEncoder, self).__init__() if degree_input: node_input_dim = positional_embedding_size + degree_embedding_size + 1 else: node_input_dim = positional_embedding_size + 1 # node_input_dim = ( # positional_embedding_size + freq_embedding_size + degree_embedding_size + 3 # ) edge_input_dim = freq_embedding_size + 1 if gnn_model == "mpnn": self.gnn = UnsupervisedMPNN( output_dim=output_dim, node_input_dim=node_input_dim, node_hidden_dim=node_hidden_dim, edge_input_dim=edge_input_dim, edge_hidden_dim=edge_hidden_dim, num_step_message_passing=num_layers, lstm_as_gate=lstm_as_gate, ) elif gnn_model == "gat": self.gnn = UnsupervisedGAT( node_input_dim=node_input_dim, node_hidden_dim=node_hidden_dim, edge_input_dim=edge_input_dim, num_layers=num_layers, num_heads=num_heads, ) elif gnn_model == "gin": self.gnn = UnsupervisedGIN( num_layers=num_layers, num_mlp_layers=2, input_dim=node_input_dim, hidden_dim=node_hidden_dim, output_dim=output_dim, final_dropout=final_dropout, learn_eps=False, graph_pooling_type="sum", neighbor_pooling_type="sum", use_selayer=False, ) self.gnn_model = gnn_model self.max_node_freq = max_node_freq self.max_edge_freq = max_edge_freq self.max_degree = max_degree self.degree_input = degree_input # self.node_freq_embedding = nn.Embedding( # num_embeddings=max_node_freq + 1, embedding_dim=freq_embedding_size # ) if degree_input: self.degree_embedding = nn.Embedding( num_embeddings=max_degree + 1, embedding_dim=degree_embedding_size ) # self.edge_freq_embedding = nn.Embedding( # num_embeddings=max_edge_freq + 1, embedding_dim=freq_embedding_size # ) self.set2set = Set2Set(node_hidden_dim, num_step_set2set, num_layer_set2set) self.lin_readout = nn.Sequential( nn.Linear(2 * node_hidden_dim, node_hidden_dim), nn.ReLU(), nn.Linear(node_hidden_dim, output_dim), ) self.norm = norm
def main(args): dgl.random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu >= 0: torch.cuda.manual_seed(args.seed) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location="cpu") pretrain_args = checkpoint["opt"] pretrain_args.fold_idx = args.fold_idx pretrain_args.gpu = args.gpu pretrain_args.finetune = args.finetune pretrain_args.resume = args.resume pretrain_args.cv = args.cv pretrain_args.dataset = args.dataset pretrain_args.epochs = args.epochs pretrain_args.num_workers = args.num_workers if args.dataset in GRAPH_CLASSIFICATION_DSETS: # HACK for speeding up finetuning on graph classification tasks pretrain_args.num_workers = 1 pretrain_args.batch_size = args.batch_size args = pretrain_args else: print("=> no checkpoint found at '{}'".format(args.resume)) args = option_update(args) learning_rate = float(args.learning_rate) print(args) if args.gpu >= 0: assert args.gpu is not None and torch.cuda.is_available() print("Use GPU: {} for training".format(args.gpu)) assert args.positional_embedding_size % 2 == 0 print("setting random seeds") mem = psutil.virtual_memory() print("before construct dataset", mem.used / 1024**3) if args.finetune: if args.dataset in GRAPH_CLASSIFICATION_DSETS: dataset = GraphClassificationDatasetLabeled( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) labels = dataset.dataset.data.y.tolist() else: dataset = NodeClassificationDatasetLabeled( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) labels = dataset.data.y.argmax(dim=1).tolist() skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=args.seed) idx_list = [] for idx in skf.split(np.zeros(len(labels)), labels): idx_list.append(idx) assert (0 <= args.fold_idx and args.fold_idx < 10), "fold_idx must be from 0 to 9." train_idx, test_idx = idx_list[args.fold_idx] train_dataset = torch.utils.data.Subset(dataset, train_idx) valid_dataset = torch.utils.data.Subset(dataset, test_idx) elif args.dataset == "dgl": train_dataset = LoadBalanceGraphDataset( rw_hops=args.rw_hops, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, num_workers=args.num_workers, num_samples=args.num_samples, dgl_graphs_file="./data/small.bin", num_copies=args.num_copies, ) else: if args.dataset in GRAPH_CLASSIFICATION_DSETS: train_dataset = GraphClassificationDataset( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) else: train_dataset = NodeClassificationDataset( dataset=args.dataset, rw_hops=args.rw_hops, subgraph_size=args.subgraph_size, restart_prob=args.restart_prob, positional_embedding_size=args.positional_embedding_size, ) mem = psutil.virtual_memory() print("before construct dataloader", mem.used / 1024**3) train_loader = torch.utils.data.graph.Dataloader( dataset=train_dataset, batch_size=args.batch_size, collate_fn=labeled_batcher() if args.finetune else batcher(), shuffle=True if args.finetune else False, num_workers=args.num_workers, worker_init_fn=None if args.finetune or args.dataset != "dgl" else worker_init_fn, ) if args.finetune: valid_loader = torch.utils.data.graph.Dataloader( dataset=valid_dataset, batch_size=args.batch_size, collate_fn=labeled_batcher(), num_workers=args.num_workers, ) mem = psutil.virtual_memory() print("before training", mem.used / 1024**3) # create model and optimizer # n_data = train_dataset.total n_data = None model, model_ema = [ GraphEncoder( positional_embedding_size=args.positional_embedding_size, max_node_freq=args.max_node_freq, max_edge_freq=args.max_edge_freq, max_degree=args.max_degree, freq_embedding_size=args.freq_embedding_size, degree_embedding_size=args.degree_embedding_size, output_dim=args.hidden_size, node_hidden_dim=args.hidden_size, edge_hidden_dim=args.hidden_size, num_layers=args.num_layer, num_step_set2set=args.set2set_iter, num_layer_set2set=args.set2set_lstm_layer, norm=args.norm, gnn_model=args.model, degree_input=True, ) for _ in range(2) ] # copy weights from `model' to `model_ema' if args.moco: # model_ema.load_state_dict(model.state_dict()) ##complete copy of model moment_update(model, model_ema, 0) # set the contrast memory and criterion contrast = MemoryMoCo(args.hidden_size, n_data, args.nce_k, args.nce_t, use_softmax=True) if args.gpu >= 0: contrast = contrast if args.finetune: criterion = nn.CrossEntropyLoss() else: criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS() if args.gpu >= 0: criterion = criterion if args.gpu >= 0: model = model model_ema = model_ema import paddle if args.finetune: output_layer = nn.Linear(in_features=args.hidden_size, out_features=dataset.num_classes) if args.gpu >= 0: output_layer = output_layer output_layer_optimizer = torch.optim.Adam( output_layer.parameters(), lr=args.learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, grad_clip=paddle.nn.clip.ClipGradByValue(max=1)) def clear_bn(m): classname = m.__class__.__name__ if classname.find("BatchNorm") != -1: m.reset_running_stats() model.apply(clear_bn) if args.optimizer == "sgd": optimizer = torch.optim.SGD( model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, ) elif args.optimizer == "adam": if args.finetune: optimizer = torch.optim.Adam( model.parameters(), lr=learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, grad_clip=paddle.nn.clip.ClipGradByValue(max=1), ) else: optimizer = torch.optim.Adam( model.parameters(), lr=learning_rate, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, grad_clip=paddle.nn.clip.ClipGradByNorm(args.clip_norm)) elif args.optimizer == "adagrad": optimizer = torch.optim.Adagrad( model.parameters(), lr=args.learning_rate, lr_decay=args.lr_decay_rate, weight_decay=args.weight_decay, ) else: raise NotImplementedError # optionally resume from a checkpoint args.start_epoch = 1 if args.resume: if args.finetune: ##if finetune model exists, continue resume that if os.path.isdir(args.model_folder + "/current.pth"): args.resume = args.model_folder + "/current.pth" print("change resume model to finetune model path:", args.resume) ##find last end epoch import glob ckpt_epoches = glob.glob(args.model_folder + "/ckpt_epoch*.pth") if len(ckpt_epoches) > 0: args.start_epoch = sorted([ int( os.path.basename(x).replace(".pth", "").replace( "ckpt_epoch_", "")) for x in ckpt_epoches ])[-1] + 1 print("starting epoch:", args.start_epoch) args.epochs = args.epochs + args.start_epoch - 1 print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location="cpu") # checkpoint = torch.load(args.resume) # args.start_epoch = checkpoint["epoch"] + 1 model.load_state_dict(checkpoint["model"]) # optimizer.load_state_dict(checkpoint["optimizer"]) contrast.load_state_dict(checkpoint["contrast"]) if args.moco: model_ema.load_state_dict(checkpoint["model_ema"]) print("=> loaded successfully '{}' ".format(args.resume)) if args.finetune: if "output_layer" in checkpoint: output_layer.load_state_dict(checkpoint["output_layer"]) print("loaded output layer") # del checkpoint if args.gpu >= 0: torch.cuda.empty_cache() # tensorboard # logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2) sw = LogWriter(logdir=args.tb_folder) import gc gc.enable() for epoch in range(args.start_epoch, args.epochs + 1): adjust_learning_rate(epoch, args, optimizer) print("==> training...") time1 = time.time() try: if args.finetune: loss, _ = train_finetune( epoch, train_loader, model, output_layer, criterion, optimizer, output_layer_optimizer, sw, args, ) else: loss = train_moco( epoch, train_loader, model, model_ema, contrast, criterion, optimizer, sw, args, ) except: print("Error in Epoch", epoch) continue time2 = time.time() print("epoch {}, total time {:.2f}".format(epoch, time2 - time1)) # save model if epoch % args.save_freq == 0: print("==> Saving...") state = { "opt": vars(args).copy(), "model": model.state_dict(), "contrast": contrast.state_dict(), "optimizer": optimizer.state_dict() } if args.moco: state["model_ema"] = model_ema.state_dict() if args.finetune: state['output_layer'] = output_layer.state_dict() save_file = os.path.join( args.model_folder, "ckpt_epoch_{epoch}.pth".format(epoch=epoch)) torch.save(state, save_file) # help release GPU memory # del state # saving the model print("==> Saving...") state = { "opt": vars(args).copy(), "model": model.state_dict(), "contrast": contrast.state_dict(), "optimizer": optimizer.state_dict() } if args.moco: state["model_ema"] = model_ema.state_dict() if args.finetune: state['output_layer'] = output_layer.state_dict() save_file = os.path.join(args.model_folder, "current.pth") torch.save(state, save_file) if epoch % args.save_freq == 0: save_file = os.path.join( args.model_folder, "ckpt_epoch_{epoch}.pth".format(epoch=epoch)) torch.save(state, save_file) # help release GPU memory # del state if args.gpu >= 0: torch.cuda.empty_cache() if args.finetune: valid_loss, valid_f1 = test_finetune(epoch, valid_loader, model, output_layer, criterion, sw, args) print("epoch %d| valid f1: %.3f" % (epoch, valid_f1)) # del model,model_ema,train_loader gc.collect() return valid_f1
def __init__(self, input_nc, output_nc, ngf=64, n_blocks=6, img_size=256, light=False): assert (n_blocks >= 0) super(ResnetGenerator, self).__init__() self.input_nc = input_nc self.output_nc = output_nc self.ngf = ngf self.n_blocks = n_blocks self.img_size = img_size self.light = light DownBlock = [] DownBlock += [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, stride=1, padding=0, bias=False), nn.InstanceNorm2d(ngf, affine=True), nn.ReLU(True) ] # Down-Sampling n_downsampling = 2 for i in range(n_downsampling): mult = 2**i DownBlock += [ nn.ReflectionPad2d(1), nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0, bias=False), nn.InstanceNorm2d(ngf * mult * 2, affine=True), nn.ReLU(True) ] # Down-Sampling Bottleneck mult = 2**n_downsampling for i in range(n_blocks): DownBlock += [ResnetBlock(ngf * mult, use_bias=False)] # Class Activation Map self.gap_fc = nn.Linear(ngf * mult, 1, bias=False) self.gmp_fc = nn.Linear(ngf * mult, 1, bias=False) self.conv1x1 = nn.Conv2d(ngf * mult * 2, ngf * mult, kernel_size=1, stride=1, bias=True) self.relu = nn.ReLU(True) # Gamma, Beta block if self.light: FC = [ nn.Linear(ngf * mult, ngf * mult, bias=False), nn.ReLU(True), nn.Linear(ngf * mult, ngf * mult, bias=False), nn.ReLU(True) ] else: FC = [ nn.Linear(img_size // mult * img_size // mult * ngf * mult, ngf * mult, bias=False), nn.ReLU(True), nn.Linear(ngf * mult, ngf * mult, bias=False), nn.ReLU(True) ] self.gamma = nn.Linear(ngf * mult, ngf * mult, bias=False) self.beta = nn.Linear(ngf * mult, ngf * mult, bias=False) # Up-Sampling Bottleneck for i in range(n_blocks): setattr(self, 'UpBlock1_' + str(i + 1), ResnetAdaILNBlock(ngf * mult, use_bias=False)) # Up-Sampling UpBlock2 = [] for i in range(n_downsampling): mult = 2**(n_downsampling - i) UpBlock2 += [ nn.Upsample(scale_factor=2, mode='nearest'), nn.ReflectionPad2d(1), nn.Conv2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=1, padding=0, bias=False), ILN(int(ngf * mult / 2)), nn.ReLU(True) ] UpBlock2 += [ nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, stride=1, padding=0, bias=False), nn.Tanh() ] self.DownBlock = nn.Sequential(*DownBlock) self.FC = nn.Sequential(*FC) self.UpBlock2 = nn.Sequential(*UpBlock2)
def __init__(self, style_dim, num_features): super().__init__() self.norm = nn.InstanceNorm2d(num_features, affine=False) self.fc = nn.Linear(style_dim, num_features * 2)