예제 #1
0
    def __init__(
        self,
        output_dim=32,
        node_input_dim=32,
        node_hidden_dim=32,
        edge_input_dim=32,
        edge_hidden_dim=32,
        num_step_message_passing=6,
        lstm_as_gate=False,
    ):
        super(UnsupervisedMPNN, self).__init__()

        self.num_step_message_passing = num_step_message_passing
        self.lin0 = nn.Linear(node_input_dim, node_hidden_dim)
        edge_network = nn.Sequential(
            nn.Linear(edge_input_dim, edge_hidden_dim),
            nn.ReLU(),
            nn.Linear(edge_hidden_dim, node_hidden_dim * node_hidden_dim),
        )
        self.conv = NNConv(
            in_feats=node_hidden_dim,
            out_feats=node_hidden_dim,
            edge_func=edge_network,
            aggregator_type="sum",
        )
        self.lstm_as_gate = lstm_as_gate
        if lstm_as_gate:
            self.lstm = nn.LSTM(node_hidden_dim, node_hidden_dim)
        else:
            self.gru = nn.GRU(node_hidden_dim, node_hidden_dim)
예제 #2
0
    def __init__(self, input_nc, ndf=64, n_layers=5):
        super(Discriminator, self).__init__()
        model = [nn.ReflectionPad2d(1),
                 nn.utils.spectral_norm(nn.Conv2d(3, ndf, 4, 2, 0, bias=True)),
                 nn.LeakyReLU(0.2, True)]

        for i in range(1, n_layers - 2):
            mult = 2 ** (i - 1)
            model += [nn.ReflectionPad2d(1),
                      nn.utils.spectral_norm(nn.Conv2d(ndf * mult, ndf * mult * 2, 4, 2, 0, bias=True)),
                      nn.LeakyReLU(0.2, True)]

        mult = 2 ** (n_layers - 2 - 1)
        model += [nn.ReflectionPad2d(1),
                  nn.utils.spectral_norm(nn.Conv2d(ndf * mult, ndf * mult * 2, 4, 1, 0, bias=True)),
                  nn.LeakyReLU(0.2, True)]

        # Class Activation Map
        mult = 2 ** (n_layers - 2)
        self.gap_fc = nn.utils.spectral_norm(nn.Linear(ndf * mult, 1, bias=False))
        self.gmp_fc = nn.utils.spectral_norm(nn.Linear(ndf * mult, 1, bias=False))
        self.conv1x1 = nn.Conv2d(ndf * mult * 2, ndf * mult, 1, 1, bias=True)
        self.leaky_relu = nn.LeakyReLU(0.2, True)

        self.pad = nn.ReflectionPad2d(1)
        self.conv = nn.utils.spectral_norm(nn.Conv2d(ndf * mult, 1, 4, 1, 0, bias=False))

        self.model = nn.Sequential(*model)
예제 #3
0
파일: alexnet.py 프로젝트: zzz2010/Contrib
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
예제 #4
0
    def __init__(self, in_channels, se_channels):
        super(SELayer, self).__init__()

        self.in_channels = in_channels
        self.se_channels = se_channels

        self.encoder_decoder = nn.Sequential(
            nn.Linear(in_channels, se_channels),
            nn.ELU(),
            nn.Linear(se_channels, in_channels),
            nn.Sigmoid(),
        )
예제 #5
0
파일: gcn.py 프로젝트: PaddlePaddle/Contrib
 def __init__(
     self,
     hidden_size=64,
     num_layer=2,
     readout="avg",
     layernorm: bool = False,
     set2set_lstm_layer: int = 3,
     set2set_iter: int = 6,
 ):
     super(UnsupervisedGCN, self).__init__()
     self.layers = nn.ModuleList([
         GCNLayer(
             in_feats=hidden_size,
             out_feats=hidden_size,
             activation=F.relu if i + 1 < num_layer else None,
             residual=False,
             batchnorm=False,
             dropout=0.0,
         ) for i in range(num_layer)
     ])
     if readout == "avg":
         self.readout = AvgPooling()
     elif readout == "set2set":
         self.readout = Set2Set(hidden_size,
                                n_iters=set2set_iter,
                                n_layers=set2set_lstm_layer)
         self.linear = nn.Linear(2 * hidden_size, hidden_size)
     elif readout == "root":
         # HACK: process outside the model part
         self.readout = lambda _, x: x
     else:
         raise NotImplementedError
     self.layernorm = layernorm
     if layernorm:
         self.ln = nn.LayerNorm(hidden_size, elementwise_affine=False)
예제 #6
0
    def __init__(self,
                 num_classes=1000,
                 aux_logits=True,
                 transform_input=False,
                 inception_blocks=None):
        super(Inception3, self).__init__()
        if inception_blocks is None:
            inception_blocks = [
                BasicConv2d, InceptionA, InceptionB, InceptionC, InceptionD,
                InceptionE, InceptionAux
            ]
        assert len(inception_blocks) == 7
        conv_block = inception_blocks[0]
        inception_a = inception_blocks[1]
        inception_b = inception_blocks[2]
        inception_c = inception_blocks[3]
        inception_d = inception_blocks[4]
        inception_e = inception_blocks[5]
        inception_aux = inception_blocks[6]

        self.aux_logits = aux_logits
        self.transform_input = transform_input
        self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2)
        self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3)
        self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1)
        self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1)
        self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3)
        self.Mixed_5b = inception_a(192, pool_features=32)
        self.Mixed_5c = inception_a(256, pool_features=64)
        self.Mixed_5d = inception_a(288, pool_features=64)
        self.Mixed_6a = inception_b(288)
        self.Mixed_6b = inception_c(768, channels_7x7=128)
        self.Mixed_6c = inception_c(768, channels_7x7=160)
        self.Mixed_6d = inception_c(768, channels_7x7=160)
        self.Mixed_6e = inception_c(768, channels_7x7=192)
        if aux_logits:
            self.AuxLogits = inception_aux(768, num_classes)
        self.Mixed_7a = inception_d(768)
        self.Mixed_7b = inception_e(1280)
        self.Mixed_7c = inception_e(2048)
        self.fc = nn.Linear(2048, num_classes)

        # for m in self.modules():
        for name, m in self._sub_layers.items():
            if isinstance(m, dygraph.Conv2D) or isinstance(m, dygraph.Linear):
                import scipy.stats as stats
                stddev = m.stddev if hasattr(m, 'stddev') else 0.1
                X = stats.truncnorm(-2, 2, scale=stddev)
                values = torch.as_tensor(
                    X.rvs(np.prod(m.weight.shape)).astype("float32"))
                values = values.view(*m.weight.shape)
                with torch.no_grad():
                    fluid.layers.assign(values, m.weight)

            elif isinstance(m, dygraph.BatchNorm):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
예제 #7
0
 def __init__(self, in_channels, num_classes, conv_block=None):
     super(InceptionAux, self).__init__()
     if conv_block is None:
         conv_block = BasicConv2d
     self.conv0 = conv_block(in_channels, 128, kernel_size=1)
     self.conv1 = conv_block(128, 768, kernel_size=5)
     self.conv1.stddev = 0.01
     self.fc = nn.Linear(768, num_classes)
     self.fc.stddev = 0.001
예제 #8
0
    def __init__(self, num_layers, input_dim, hidden_dim, output_dim,
                 use_selayer):
        """MLP layers construction

        Paramters
        ---------
        num_layers: int
            The number of linear layers
        input_dim: int
            The dimensionality of input features
        hidden_dim: int
            The dimensionality of hidden units at ALL layers
        output_dim: int
            The number of classes for prediction

        """
        super(MLP, self).__init__()
        self.linear_or_not = True  # default is linear model
        self.num_layers = num_layers
        self.output_dim = output_dim

        if num_layers < 1:
            raise ValueError("number of layers should be positive!")
        elif num_layers == 1:
            # Linear model
            self.linear = nn.Linear(input_dim, output_dim)
        else:
            # Multi-layer model
            self.linear_or_not = False
            self.linears = torch.nn.ModuleList()
            self.batch_norms = torch.nn.ModuleList()

            self.linears.append(nn.Linear(input_dim, hidden_dim))
            for layer in range(num_layers - 2):
                self.linears.append(nn.Linear(hidden_dim, hidden_dim))
            self.linears.append(nn.Linear(hidden_dim, output_dim))

            for layer in range(num_layers - 1):
                self.batch_norms.append(
                    SELayer(hidden_dim, int(np.sqrt(hidden_dim))
                            ) if use_selayer else nn.BatchNorm1d(hidden_dim))
예제 #9
0
    def __init__(self, latent_dim=16, style_dim=64, num_domains=2):
        super().__init__()
        layers = []
        layers += [nn.Linear(latent_dim, 512)]
        layers += [nn.ReLU()]
        for _ in range(3):
            layers += [nn.Linear(512, 512)]
            layers += [nn.ReLU()]
        self.shared = nn.Sequential(*layers)

        self.unshared = nn.ModuleList()
        for _ in range(num_domains):
            self.unshared += [
                nn.Sequential(nn.Linear(512, 512), nn.ReLU(),
                              nn.Linear(512, 512), nn.ReLU(),
                              nn.Linear(512, 512), nn.ReLU(),
                              nn.Linear(512, style_dim))
            ]
예제 #10
0
    def __init__(self,
                 img_size=256,
                 style_dim=64,
                 num_domains=2,
                 max_conv_dim=512):
        super().__init__()
        dim_in = 2**14 // img_size
        blocks = []
        blocks += [nn.Conv2d(3, dim_in, 3, 1, 1)]

        repeat_num = int(np.log2(img_size)) - 2
        for _ in range(repeat_num):
            dim_out = min(dim_in * 2, max_conv_dim)
            blocks += [ResBlk(dim_in, dim_out, downsample=True)]
            dim_in = dim_out

        blocks += [nn.LeakyReLU(0.2)]
        blocks += [nn.Conv2d(dim_out, dim_out, 4, 1, 0)]
        blocks += [nn.LeakyReLU(0.2)]
        self.shared = nn.Sequential(*blocks)

        self.unshared = nn.ModuleList()
        for _ in range(num_domains):
            self.unshared += [nn.Linear(dim_out, style_dim)]
예제 #11
0
    def __init__(
        self,
        num_layers,
        num_mlp_layers,
        input_dim,
        hidden_dim,
        output_dim,
        final_dropout,
        learn_eps,
        graph_pooling_type,
        neighbor_pooling_type,
        use_selayer,
    ):
        """model parameters setting

        Paramters
        ---------
        num_layers: int
            The number of linear layers in the neural network
        num_mlp_layers: int
            The number of linear layers in mlps
        input_dim: int
            The dimensionality of input features
        hidden_dim: int
            The dimensionality of hidden units at ALL layers
        output_dim: int
            The number of classes for prediction
        final_dropout: float
            dropout ratio on the final linear layer
        learn_eps: boolean
            If True, learn epsilon to distinguish center nodes from neighbors
            If False, aggregate neighbors and center nodes altogether.
        neighbor_pooling_type: str
            how to aggregate neighbors (sum, mean, or max)
        graph_pooling_type: str
            how to aggregate entire nodes in a graph (sum, mean or max)

        """
        super(UnsupervisedGIN, self).__init__()
        self.num_layers = num_layers
        self.learn_eps = learn_eps

        # List of MLPs
        self.ginlayers = torch.nn.ModuleList()
        self.batch_norms = torch.nn.ModuleList()

        for layer in range(self.num_layers - 1):
            if layer == 0:
                mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim,
                          use_selayer)
            else:
                mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim,
                          use_selayer)

            self.ginlayers.append(
                GINConv(
                    ApplyNodeFunc(mlp, use_selayer),
                    neighbor_pooling_type,
                    0,
                    self.learn_eps,
                ))
            self.batch_norms.append(
                SELayer(hidden_dim, int(np.sqrt(hidden_dim))
                        ) if use_selayer else nn.BatchNorm1d(hidden_dim))

        # Linear function for graph poolings of output of each layer
        # which maps the output of different layers into a prediction score
        self.linears_prediction = torch.nn.ModuleList()

        for layer in range(num_layers):
            if layer == 0:
                self.linears_prediction.append(nn.Linear(
                    input_dim, output_dim))
            else:
                self.linears_prediction.append(
                    nn.Linear(hidden_dim, output_dim))

        self.drop = nn.Dropout(final_dropout)

        if graph_pooling_type == "sum":
            self.pool = SumPooling()
        elif graph_pooling_type == "mean":
            self.pool = AvgPooling()
        elif graph_pooling_type == "max":
            self.pool = MaxPooling()
        else:
            raise NotImplementedError
예제 #12
0
    def __init__(self, input_nc, output_nc, ngf=64, n_blocks=6, img_size=256, light=False):
        super(ResnetGenerator, self).__init__()
        self.n_res=n_blocks
        self.light= light
        down_layer = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(3, ngf, 7, 1, 0, bias=False),
            nn.InstanceNorm2d(ngf,affine=True),
            nn.ReLU(inplace=True),

            # Down-Sampling
            nn.ReflectionPad2d(1),
            nn.Conv2d(ngf, ngf*2, 3, 2, 0, bias=False),
            nn.InstanceNorm2d(ngf*2,affine=True),
            nn.ReLU(inplace=True),
            nn.ReflectionPad2d(1),
            nn.Conv2d(ngf*2, ngf*4, 3, 2, 0, bias=False),
            nn.InstanceNorm2d(ngf*4,affine=True),
            nn.ReLU(inplace=True),

            # Down-Sampling Bottleneck
            ResNetBlock(ngf*4),
            ResNetBlock(ngf*4),
            ResNetBlock(ngf*4),
            ResNetBlock(ngf*4),
        ]

        # Class Activation Map
        self.gap_fc = nn.Linear(ngf*4, 1, bias=False)
        self.gmp_fc = nn.Linear(ngf*4, 1, bias=False)
        self.conv1x1 = nn.Conv2d(ngf*8, ngf*4, 1, 1, bias=True)
        self.relu = nn.ReLU(inplace=True)

        # # Gamma, Beta block
        # fc = [
        #     nn.Linear(image_size * image_size * 16, 256, bias=False),
        #     nn.ReLU(inplace=True),
        #     nn.Linear(256, 256, bias=False),
        #     nn.ReLU(inplace=True)
        # ]
        # Gamma, Beta block
        if self.light:
            fc = [nn.Linear(ngf*4, ngf*4, bias=False),
                  nn.ReLU(True),
                  nn.Linear(ngf*4, ngf*4, bias=False),
                  nn.ReLU(True)]
        else:
            fc = [nn.Linear(img_size * img_size * ngf//4, ngf*4, bias=False),
                  nn.ReLU(True),
                  nn.Linear(ngf*4, ngf*4, bias=False),
                  nn.ReLU(True)]


        self.gamma = nn.Linear(ngf*4, ngf*4, bias=False)
        self.beta = nn.Linear(ngf*4, ngf*4, bias=False)

        # Up-Sampling Bottleneck
        for i in range(self.n_res):
            setattr(self, "ResNetAdaILNBlock_" + str(i + 1), ResNetAdaILNBlock(ngf*4))

        up_layer = [
            nn.Upsample(scale_factor=2, mode="nearest"),
            nn.ReflectionPad2d(1),
            nn.Conv2d(ngf*4, ngf*2, 3, 1, 0, bias=False),
            ILN(ngf*2),
            nn.ReLU(inplace=True),

            nn.Upsample(scale_factor=2, mode="nearest"),
            nn.ReflectionPad2d(1),
            nn.Conv2d(ngf*2, ngf, 3, 1, 0, bias=False),
            ILN(ngf),
            nn.ReLU(inplace=True),

            nn.ReflectionPad2d(3),
            nn.Conv2d(ngf, 3, 7, 1, 0, bias=False),
            nn.Tanh()
        ]

        self.down_layer = nn.Sequential(*down_layer)
        self.fc = nn.Sequential(*fc)
        self.up_layer = nn.Sequential(*up_layer)
예제 #13
0
def main(args):
    dgl.random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu >= 0:
        torch.cuda.manual_seed(args.seed)
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location="cpu")
            pretrain_args = checkpoint["opt"]
            pretrain_args.fold_idx = args.fold_idx
            pretrain_args.gpu = args.gpu
            pretrain_args.finetune = args.finetune
            pretrain_args.resume = args.resume
            pretrain_args.cv = args.cv
            pretrain_args.dataset = args.dataset
            pretrain_args.epochs = args.epochs
            pretrain_args.num_workers = args.num_workers
            if args.dataset in GRAPH_CLASSIFICATION_DSETS:
                # HACK for speeding up finetuning on graph classification tasks
                pretrain_args.num_workers = 0
            pretrain_args.batch_size = args.batch_size
            args = pretrain_args
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    args = option_update(args)
    print(args)
    if args.gpu >= 0:
        assert args.gpu is not None and torch.cuda.is_available()
        print("Use GPU: {} for training".format(args.gpu))
    assert args.positional_embedding_size % 2 == 0
    print("setting random seeds")

    mem = psutil.virtual_memory()
    print("before construct dataset", mem.used / 1024**3)
    if args.finetune:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            dataset = GraphClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.dataset.data.y.tolist()
        else:
            dataset = NodeClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.data.y.argmax(dim=1).tolist()

        skf = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=args.seed)
        idx_list = []
        for idx in skf.split(np.zeros(len(labels)), labels):
            idx_list.append(idx)
        assert (0 <= args.fold_idx
                and args.fold_idx < 10), "fold_idx must be from 0 to 9."
        train_idx, test_idx = idx_list[args.fold_idx]
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        valid_dataset = torch.utils.data.Subset(dataset, test_idx)

    elif args.dataset == "dgl":
        train_dataset = LoadBalanceGraphDataset(
            rw_hops=args.rw_hops,
            restart_prob=args.restart_prob,
            positional_embedding_size=args.positional_embedding_size,
            num_workers=args.num_workers,
            num_samples=args.num_samples,
            dgl_graphs_file="./data/small.bin",
            num_copies=args.num_copies,
        )
    else:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            train_dataset = GraphClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
        else:
            train_dataset = NodeClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )

    mem = psutil.virtual_memory()
    print("before construct dataloader", mem.used / 1024**3)
    train_loader = torch.utils.data.graph.Dataloader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        collate_fn=labeled_batcher() if args.finetune else batcher(),
        shuffle=True if args.finetune else False,
        num_workers=args.num_workers,
        worker_init_fn=None
        if args.finetune or args.dataset != "dgl" else worker_init_fn,
    )
    if args.finetune:
        valid_loader = torch.utils.data.DataLoader(
            dataset=valid_dataset,
            batch_size=args.batch_size,
            collate_fn=labeled_batcher(),
            num_workers=args.num_workers,
        )
    mem = psutil.virtual_memory()
    print("before training", mem.used / 1024**3)

    # create model and optimizer
    # n_data = train_dataset.total
    n_data = None
    import gcc.models.graph_encoder
    gcc.models.graph_encoder.final_dropout = 0  ##disable dropout
    model, model_ema = [
        GraphEncoder(
            positional_embedding_size=args.positional_embedding_size,
            max_node_freq=args.max_node_freq,
            max_edge_freq=args.max_edge_freq,
            max_degree=args.max_degree,
            freq_embedding_size=args.freq_embedding_size,
            degree_embedding_size=args.degree_embedding_size,
            output_dim=args.hidden_size,
            node_hidden_dim=args.hidden_size,
            edge_hidden_dim=args.hidden_size,
            num_layers=args.num_layer,
            num_step_set2set=args.set2set_iter,
            num_layer_set2set=args.set2set_lstm_layer,
            norm=args.norm,
            gnn_model=args.model,
            degree_input=True,
        ) for _ in range(2)
    ]

    # copy weights from `model' to `model_ema'
    if args.moco:
        moment_update(model, model_ema, 0)

    # set the contrast memory and criterion
    contrast = MemoryMoCo(args.hidden_size,
                          n_data,
                          args.nce_k,
                          args.nce_t,
                          use_softmax=True)
    if args.gpu >= 0:
        contrast = contrast.cuda(args.gpu)

    if args.finetune:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS()
        if args.gpu >= 0:
            criterion = criterion.cuda(args.gpu)
    if args.gpu >= 0:
        model = model.cuda(args.gpu)
        model_ema = model_ema.cuda(args.gpu)

    if args.finetune:
        output_layer = nn.Linear(in_features=args.hidden_size,
                                 out_features=dataset.num_classes)
        if args.gpu >= 0:
            output_layer = output_layer.cuda(args.gpu)
        output_layer_optimizer = torch.optim.Adam(
            output_layer.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )

        def clear_bn(m):
            classname = m.__class__.__name__
            if classname.find("BatchNorm") != -1:
                m.reset_running_stats()

        model.apply(clear_bn)

    if args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adam":
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adagrad":
        optimizer = torch.optim.Adagrad(
            model.parameters(),
            lr=args.learning_rate,
            lr_decay=args.lr_decay_rate,
            weight_decay=args.weight_decay,
        )
    else:
        raise NotImplementedError

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if True:
        # print("=> loading checkpoint '{}'".format(args.resume))
        # checkpoint = torch.load(args.resume, map_location="cpu")
        import torch as th
        checkpoint = th.load("torch_models/ckpt_epoch_100.pth",
                             map_location=th.device('cpu'))
        torch_input_output_grad = th.load(
            "torch_models/torch_input_output_grad.pt",
            map_location=th.device('cpu'))
        from paddorch.convert_pretrain_model import load_pytorch_pretrain_model
        print("loading.............. model")
        paddle_state_dict = load_pytorch_pretrain_model(
            model, checkpoint["model"])
        model.load_state_dict(paddle_state_dict)
        print("loading.............. contrast")
        paddle_state_dict2 = load_pytorch_pretrain_model(
            contrast, checkpoint["contrast"])
        contrast.load_state_dict(paddle_state_dict2)
        print("loading.............. model_ema")
        paddle_state_dict3 = load_pytorch_pretrain_model(
            model_ema, checkpoint["model_ema"])
        if args.moco:
            model_ema.load_state_dict(paddle_state_dict3)

        print("=> loaded successfully '{}' (epoch {})".format(
            args.resume, checkpoint["epoch"]))
        del checkpoint
        if args.gpu >= 0:
            torch.cuda.empty_cache()
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=args.learning_rate * 0.1,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
        )
        for _ in range(1):
            graph_q, graph_k = train_dataset[0]
            graph_q2, graph_k2 = train_dataset[1]
            graph_q, graph_k = dgl.batch([graph_q, graph_q2
                                          ]), dgl.batch([graph_k, graph_k2])

            input_output_grad = []
            input_output_grad.append([graph_q, graph_k])
            model.train()
            model_ema.eval()

            feat_q = model(graph_q)
            with torch.no_grad():
                feat_k = model_ema(graph_k)

            out = contrast(feat_q, feat_k)
            loss = criterion(out)
            optimizer.zero_grad()
            loss.backward()
            input_output_grad.append([feat_q, out, loss])
            print("loss:", loss.numpy())
            optimizer.step()
            moment_update(model, model_ema, args.alpha)
        print(
            "max diff feat_q:",
            np.max(
                np.abs(torch_input_output_grad[1][0].detach().numpy() -
                       feat_q.numpy())))
        print(
            "max diff out:",
            np.max(
                np.abs(torch_input_output_grad[1][1].detach().numpy() -
                       out.numpy())))
        print(
            "max diff loss:",
            np.max(
                np.abs(torch_input_output_grad[1][2].detach().numpy() -
                       loss.numpy())))

        name2grad = dict()
        for name, p in dict(model.named_parameters()).items():
            if p.grad is not None:
                name2grad[name] = p.grad
                torch_grad = torch_input_output_grad[2][name].numpy()

                if "linear" in name and "weight" in name:
                    torch_grad = torch_grad.T
                max_grad_diff = np.max(np.abs(p.grad - torch_grad))
                print("max grad diff:", name, max_grad_diff)
        input_output_grad.append(name2grad)
예제 #14
0
    def __init__(
        self,
        positional_embedding_size=32,
        max_node_freq=8,
        max_edge_freq=8,
        max_degree=128,
        freq_embedding_size=32,
        degree_embedding_size=32,
        output_dim=32,
        node_hidden_dim=32,
        edge_hidden_dim=32,
        num_layers=6,
        num_heads=4,
        num_step_set2set=6,
        num_layer_set2set=3,
        norm=False,
        gnn_model="mpnn",
        degree_input=False,
        lstm_as_gate=False,
    ):
        super(GraphEncoder, self).__init__()

        if degree_input:
            node_input_dim = positional_embedding_size + degree_embedding_size + 1
        else:
            node_input_dim = positional_embedding_size + 1
        # node_input_dim = (
        #     positional_embedding_size + freq_embedding_size + degree_embedding_size + 3
        # )
        edge_input_dim = freq_embedding_size + 1
        if gnn_model == "mpnn":
            self.gnn = UnsupervisedMPNN(
                output_dim=output_dim,
                node_input_dim=node_input_dim,
                node_hidden_dim=node_hidden_dim,
                edge_input_dim=edge_input_dim,
                edge_hidden_dim=edge_hidden_dim,
                num_step_message_passing=num_layers,
                lstm_as_gate=lstm_as_gate,
            )
        elif gnn_model == "gat":
            self.gnn = UnsupervisedGAT(
                node_input_dim=node_input_dim,
                node_hidden_dim=node_hidden_dim,
                edge_input_dim=edge_input_dim,
                num_layers=num_layers,
                num_heads=num_heads,
            )
        elif gnn_model == "gin":
            self.gnn = UnsupervisedGIN(
                num_layers=num_layers,
                num_mlp_layers=2,
                input_dim=node_input_dim,
                hidden_dim=node_hidden_dim,
                output_dim=output_dim,
                final_dropout=final_dropout,
                learn_eps=False,
                graph_pooling_type="sum",
                neighbor_pooling_type="sum",
                use_selayer=False,
            )
        self.gnn_model = gnn_model

        self.max_node_freq = max_node_freq
        self.max_edge_freq = max_edge_freq
        self.max_degree = max_degree
        self.degree_input = degree_input

        # self.node_freq_embedding = nn.Embedding(
        #     num_embeddings=max_node_freq + 1, embedding_dim=freq_embedding_size
        # )
        if degree_input:
            self.degree_embedding = nn.Embedding(
                num_embeddings=max_degree + 1, embedding_dim=degree_embedding_size
            )

        # self.edge_freq_embedding = nn.Embedding(
        #     num_embeddings=max_edge_freq + 1, embedding_dim=freq_embedding_size
        # )

        self.set2set = Set2Set(node_hidden_dim, num_step_set2set, num_layer_set2set)
        self.lin_readout = nn.Sequential(
            nn.Linear(2 * node_hidden_dim, node_hidden_dim),
            nn.ReLU(),
            nn.Linear(node_hidden_dim, output_dim),
        )
        self.norm = norm
예제 #15
0
def main(args):
    dgl.random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpu >= 0:
        torch.cuda.manual_seed(args.seed)
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location="cpu")
            pretrain_args = checkpoint["opt"]
            pretrain_args.fold_idx = args.fold_idx
            pretrain_args.gpu = args.gpu
            pretrain_args.finetune = args.finetune
            pretrain_args.resume = args.resume
            pretrain_args.cv = args.cv
            pretrain_args.dataset = args.dataset
            pretrain_args.epochs = args.epochs
            pretrain_args.num_workers = args.num_workers
            if args.dataset in GRAPH_CLASSIFICATION_DSETS:
                # HACK for speeding up finetuning on graph classification tasks
                pretrain_args.num_workers = 1
            pretrain_args.batch_size = args.batch_size
            args = pretrain_args
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    args = option_update(args)
    learning_rate = float(args.learning_rate)
    print(args)
    if args.gpu >= 0:
        assert args.gpu is not None and torch.cuda.is_available()
        print("Use GPU: {} for training".format(args.gpu))
    assert args.positional_embedding_size % 2 == 0
    print("setting random seeds")

    mem = psutil.virtual_memory()
    print("before construct dataset", mem.used / 1024**3)
    if args.finetune:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            dataset = GraphClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.dataset.data.y.tolist()
        else:
            dataset = NodeClassificationDatasetLabeled(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
            labels = dataset.data.y.argmax(dim=1).tolist()

        skf = StratifiedKFold(n_splits=10,
                              shuffle=True,
                              random_state=args.seed)
        idx_list = []
        for idx in skf.split(np.zeros(len(labels)), labels):
            idx_list.append(idx)
        assert (0 <= args.fold_idx
                and args.fold_idx < 10), "fold_idx must be from 0 to 9."
        train_idx, test_idx = idx_list[args.fold_idx]
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        valid_dataset = torch.utils.data.Subset(dataset, test_idx)

    elif args.dataset == "dgl":
        train_dataset = LoadBalanceGraphDataset(
            rw_hops=args.rw_hops,
            restart_prob=args.restart_prob,
            positional_embedding_size=args.positional_embedding_size,
            num_workers=args.num_workers,
            num_samples=args.num_samples,
            dgl_graphs_file="./data/small.bin",
            num_copies=args.num_copies,
        )
    else:
        if args.dataset in GRAPH_CLASSIFICATION_DSETS:
            train_dataset = GraphClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )
        else:
            train_dataset = NodeClassificationDataset(
                dataset=args.dataset,
                rw_hops=args.rw_hops,
                subgraph_size=args.subgraph_size,
                restart_prob=args.restart_prob,
                positional_embedding_size=args.positional_embedding_size,
            )

    mem = psutil.virtual_memory()
    print("before construct dataloader", mem.used / 1024**3)
    train_loader = torch.utils.data.graph.Dataloader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        collate_fn=labeled_batcher() if args.finetune else batcher(),
        shuffle=True if args.finetune else False,
        num_workers=args.num_workers,
        worker_init_fn=None
        if args.finetune or args.dataset != "dgl" else worker_init_fn,
    )
    if args.finetune:
        valid_loader = torch.utils.data.graph.Dataloader(
            dataset=valid_dataset,
            batch_size=args.batch_size,
            collate_fn=labeled_batcher(),
            num_workers=args.num_workers,
        )
    mem = psutil.virtual_memory()
    print("before training", mem.used / 1024**3)

    # create model and optimizer
    # n_data = train_dataset.total
    n_data = None

    model, model_ema = [
        GraphEncoder(
            positional_embedding_size=args.positional_embedding_size,
            max_node_freq=args.max_node_freq,
            max_edge_freq=args.max_edge_freq,
            max_degree=args.max_degree,
            freq_embedding_size=args.freq_embedding_size,
            degree_embedding_size=args.degree_embedding_size,
            output_dim=args.hidden_size,
            node_hidden_dim=args.hidden_size,
            edge_hidden_dim=args.hidden_size,
            num_layers=args.num_layer,
            num_step_set2set=args.set2set_iter,
            num_layer_set2set=args.set2set_lstm_layer,
            norm=args.norm,
            gnn_model=args.model,
            degree_input=True,
        ) for _ in range(2)
    ]

    # copy weights from `model' to `model_ema'
    if args.moco:
        # model_ema.load_state_dict(model.state_dict()) ##complete copy of model
        moment_update(model, model_ema, 0)

    # set the contrast memory and criterion
    contrast = MemoryMoCo(args.hidden_size,
                          n_data,
                          args.nce_k,
                          args.nce_t,
                          use_softmax=True)
    if args.gpu >= 0:
        contrast = contrast

    if args.finetune:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = NCESoftmaxLoss() if args.moco else NCESoftmaxLossNS()
        if args.gpu >= 0:
            criterion = criterion
    if args.gpu >= 0:
        model = model
        model_ema = model_ema

    import paddle
    if args.finetune:
        output_layer = nn.Linear(in_features=args.hidden_size,
                                 out_features=dataset.num_classes)
        if args.gpu >= 0:
            output_layer = output_layer
        output_layer_optimizer = torch.optim.Adam(
            output_layer.parameters(),
            lr=args.learning_rate,
            betas=(args.beta1, args.beta2),
            weight_decay=args.weight_decay,
            grad_clip=paddle.nn.clip.ClipGradByValue(max=1))

        def clear_bn(m):
            classname = m.__class__.__name__
            if classname.find("BatchNorm") != -1:
                m.reset_running_stats()

        model.apply(clear_bn)

    if args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optimizer == "adam":
        if args.finetune:
            optimizer = torch.optim.Adam(
                model.parameters(),
                lr=learning_rate,
                betas=(args.beta1, args.beta2),
                weight_decay=args.weight_decay,
                grad_clip=paddle.nn.clip.ClipGradByValue(max=1),
            )
        else:
            optimizer = torch.optim.Adam(
                model.parameters(),
                lr=learning_rate,
                betas=(args.beta1, args.beta2),
                weight_decay=args.weight_decay,
                grad_clip=paddle.nn.clip.ClipGradByNorm(args.clip_norm))
    elif args.optimizer == "adagrad":
        optimizer = torch.optim.Adagrad(
            model.parameters(),
            lr=args.learning_rate,
            lr_decay=args.lr_decay_rate,
            weight_decay=args.weight_decay,
        )
    else:
        raise NotImplementedError

    # optionally resume from a checkpoint
    args.start_epoch = 1
    if args.resume:
        if args.finetune:  ##if finetune model exists, continue resume that
            if os.path.isdir(args.model_folder + "/current.pth"):
                args.resume = args.model_folder + "/current.pth"
                print("change resume model to finetune model path:",
                      args.resume)
                ##find last end epoch
                import glob
                ckpt_epoches = glob.glob(args.model_folder +
                                         "/ckpt_epoch*.pth")
                if len(ckpt_epoches) > 0:
                    args.start_epoch = sorted([
                        int(
                            os.path.basename(x).replace(".pth", "").replace(
                                "ckpt_epoch_", "")) for x in ckpt_epoches
                    ])[-1] + 1
                    print("starting epoch:", args.start_epoch)
                    args.epochs = args.epochs + args.start_epoch - 1
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume, map_location="cpu")
        # checkpoint = torch.load(args.resume)
        # args.start_epoch = checkpoint["epoch"] + 1
        model.load_state_dict(checkpoint["model"])
        # optimizer.load_state_dict(checkpoint["optimizer"])
        contrast.load_state_dict(checkpoint["contrast"])
        if args.moco:
            model_ema.load_state_dict(checkpoint["model_ema"])

        print("=> loaded successfully '{}' ".format(args.resume))
        if args.finetune:
            if "output_layer" in checkpoint:
                output_layer.load_state_dict(checkpoint["output_layer"])
                print("loaded output layer")
        # del checkpoint
        if args.gpu >= 0:
            torch.cuda.empty_cache()

    # tensorboard
    #  logger = tb_logger.Logger(logdir=args.tb_folder, flush_secs=2)
    sw = LogWriter(logdir=args.tb_folder)

    import gc
    gc.enable()
    for epoch in range(args.start_epoch, args.epochs + 1):

        adjust_learning_rate(epoch, args, optimizer)
        print("==> training...")

        time1 = time.time()
        try:
            if args.finetune:
                loss, _ = train_finetune(
                    epoch,
                    train_loader,
                    model,
                    output_layer,
                    criterion,
                    optimizer,
                    output_layer_optimizer,
                    sw,
                    args,
                )
            else:

                loss = train_moco(
                    epoch,
                    train_loader,
                    model,
                    model_ema,
                    contrast,
                    criterion,
                    optimizer,
                    sw,
                    args,
                )
        except:
            print("Error in Epoch", epoch)
            continue
        time2 = time.time()
        print("epoch {}, total time {:.2f}".format(epoch, time2 - time1))

        # save model
        if epoch % args.save_freq == 0:
            print("==> Saving...")
            state = {
                "opt": vars(args).copy(),
                "model": model.state_dict(),
                "contrast": contrast.state_dict(),
                "optimizer": optimizer.state_dict()
            }
            if args.moco:
                state["model_ema"] = model_ema.state_dict()
            if args.finetune:
                state['output_layer'] = output_layer.state_dict()
            save_file = os.path.join(
                args.model_folder,
                "ckpt_epoch_{epoch}.pth".format(epoch=epoch))
            torch.save(state, save_file)
            # help release GPU memory
            # del state

        # saving the model
        print("==> Saving...")
        state = {
            "opt": vars(args).copy(),
            "model": model.state_dict(),
            "contrast": contrast.state_dict(),
            "optimizer": optimizer.state_dict()
        }
        if args.moco:
            state["model_ema"] = model_ema.state_dict()
        if args.finetune:
            state['output_layer'] = output_layer.state_dict()
        save_file = os.path.join(args.model_folder, "current.pth")
        torch.save(state, save_file)
        if epoch % args.save_freq == 0:
            save_file = os.path.join(
                args.model_folder,
                "ckpt_epoch_{epoch}.pth".format(epoch=epoch))
            torch.save(state, save_file)
        # help release GPU memory
        # del state
        if args.gpu >= 0:
            torch.cuda.empty_cache()

        if args.finetune:
            valid_loss, valid_f1 = test_finetune(epoch, valid_loader, model,
                                                 output_layer, criterion, sw,
                                                 args)
            print("epoch %d| valid f1: %.3f" % (epoch, valid_f1))

    # del model,model_ema,train_loader
    gc.collect()
    return valid_f1
예제 #16
0
    def __init__(self,
                 input_nc,
                 output_nc,
                 ngf=64,
                 n_blocks=6,
                 img_size=256,
                 light=False):
        assert (n_blocks >= 0)
        super(ResnetGenerator, self).__init__()
        self.input_nc = input_nc
        self.output_nc = output_nc
        self.ngf = ngf
        self.n_blocks = n_blocks
        self.img_size = img_size
        self.light = light

        DownBlock = []
        DownBlock += [
            nn.ReflectionPad2d(3),
            nn.Conv2d(input_nc,
                      ngf,
                      kernel_size=7,
                      stride=1,
                      padding=0,
                      bias=False),
            nn.InstanceNorm2d(ngf, affine=True),
            nn.ReLU(True)
        ]

        # Down-Sampling
        n_downsampling = 2
        for i in range(n_downsampling):
            mult = 2**i
            DownBlock += [
                nn.ReflectionPad2d(1),
                nn.Conv2d(ngf * mult,
                          ngf * mult * 2,
                          kernel_size=3,
                          stride=2,
                          padding=0,
                          bias=False),
                nn.InstanceNorm2d(ngf * mult * 2, affine=True),
                nn.ReLU(True)
            ]

        # Down-Sampling Bottleneck
        mult = 2**n_downsampling
        for i in range(n_blocks):
            DownBlock += [ResnetBlock(ngf * mult, use_bias=False)]

        # Class Activation Map
        self.gap_fc = nn.Linear(ngf * mult, 1, bias=False)
        self.gmp_fc = nn.Linear(ngf * mult, 1, bias=False)
        self.conv1x1 = nn.Conv2d(ngf * mult * 2,
                                 ngf * mult,
                                 kernel_size=1,
                                 stride=1,
                                 bias=True)
        self.relu = nn.ReLU(True)

        # Gamma, Beta block
        if self.light:
            FC = [
                nn.Linear(ngf * mult, ngf * mult, bias=False),
                nn.ReLU(True),
                nn.Linear(ngf * mult, ngf * mult, bias=False),
                nn.ReLU(True)
            ]
        else:
            FC = [
                nn.Linear(img_size // mult * img_size // mult * ngf * mult,
                          ngf * mult,
                          bias=False),
                nn.ReLU(True),
                nn.Linear(ngf * mult, ngf * mult, bias=False),
                nn.ReLU(True)
            ]
        self.gamma = nn.Linear(ngf * mult, ngf * mult, bias=False)
        self.beta = nn.Linear(ngf * mult, ngf * mult, bias=False)

        # Up-Sampling Bottleneck
        for i in range(n_blocks):
            setattr(self, 'UpBlock1_' + str(i + 1),
                    ResnetAdaILNBlock(ngf * mult, use_bias=False))

        # Up-Sampling
        UpBlock2 = []
        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
            UpBlock2 += [
                nn.Upsample(scale_factor=2, mode='nearest'),
                nn.ReflectionPad2d(1),
                nn.Conv2d(ngf * mult,
                          int(ngf * mult / 2),
                          kernel_size=3,
                          stride=1,
                          padding=0,
                          bias=False),
                ILN(int(ngf * mult / 2)),
                nn.ReLU(True)
            ]

        UpBlock2 += [
            nn.ReflectionPad2d(3),
            nn.Conv2d(ngf,
                      output_nc,
                      kernel_size=7,
                      stride=1,
                      padding=0,
                      bias=False),
            nn.Tanh()
        ]

        self.DownBlock = nn.Sequential(*DownBlock)
        self.FC = nn.Sequential(*FC)
        self.UpBlock2 = nn.Sequential(*UpBlock2)
예제 #17
0
 def __init__(self, style_dim, num_features):
     super().__init__()
     self.norm = nn.InstanceNorm2d(num_features, affine=False)
     self.fc = nn.Linear(style_dim, num_features * 2)