def learn(learning_rate, iterations, x, y, validation=None, stop_early=False, run_comment=''): # Define a neural network using high-level modules. writer = SummaryWriter(comment=run_comment) model = Sequential( Linear(len(x[0]), len(y[0]), bias=True) # n inputs -> 1 output ) loss_fn = BCEWithLogitsLoss(reduction='sum') # reduction=mean converges slower. # TODO: Add an option to twiddle pos_weight, which lets us trade off precision and recall. Maybe also graph using add_pr_curve(), which can show how that tradeoff is going. optimizer = Adam(model.parameters(),lr=learning_rate) if validation: validation_ins, validation_outs = validation previous_validation_loss = None with progressbar(range(iterations)) as bar: for t in bar: y_pred = model(x) # Make predictions. loss = loss_fn(y_pred, y) writer.add_scalar('loss', loss, t) if validation: validation_loss = loss_fn(model(validation_ins), validation_outs) if stop_early: if previous_validation_loss is not None and previous_validation_loss < validation_loss: print('Stopping early at iteration {t} because validation error rose.'.format(t=t)) model.load_state_dict(previous_model) break else: previous_validation_loss = validation_loss previous_model = model.state_dict() writer.add_scalar('validation_loss', validation_loss, t) writer.add_scalar('training_accuracy_per_tag', accuracy_per_tag(model, x, y), t) optimizer.zero_grad() # Zero the gradients. loss.backward() # Compute gradients. optimizer.step() # Horizontal axis is what confidence. Vertical is how many samples were that confidence. writer.add_histogram('confidence', confidences(model, x), t) writer.close() return model
def __init__(self, num_classes: int, is_test=False, config=None, device=None): """ Create default SSD model. """ super(SSD, self).__init__() self.num_classes = num_classes self.base_net = MobileNetV1(self.num_classes).model self.source_layer_indexes = [ 12, 14, ] self.extras = ModuleList([ Sequential( Conv2d(in_channels=1024, out_channels=256, kernel_size=1), ReLU(), Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=512, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()) ]) self.regression_headers = ModuleList([ Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? ]) self.classification_headers = ModuleList([ Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? ]) self.is_test = is_test self.config = config # register layers in source_layer_indexes by adding them to a module list self.source_layer_add_ons = nn.ModuleList( [t[1] for t in self.source_layer_indexes if isinstance(t, tuple)]) if device: self.device = device else: self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") if is_test: self.config = config self.priors = config.priors.to(self.device)
def __init__(self, useIntraGCN=True, useInterGCN=True, useRandomMatrix=False, useAllOneMatrix=False, useCov=False, useCluster=False, inverted_residual_setting=None, block=None): super(Backbone_MobileNetV2, self).__init__() if block is None: block = InvertedResidual if inverted_residual_setting is None: inverted_residual_setting = [ # t, c, n, s [1, 64, 1, 1], [6, 64, 2, 2], [6, 128, 3, 2], [6, 256, 4, 2], [6, 256, 3, 1], [6, 512, 3, 2], [6, 512, 1, 1], [6, 256, 4, 2], [6, 512, 3, 2] ] # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format(inverted_residual_setting)) features = [] input_channel = 3 for index, (t, c, n, s) in enumerate(inverted_residual_setting): feature, input_channel, output_channel = [], input_channel if index != 7 else 128, c for i in range(n): stride = s if i == 0 else 1 feature.append(block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel features.append(feature) self.layer1 = Sequential(*(features[0]+features[1])) self.layer2 = Sequential(*(features[2])) self.layer3 = Sequential(*(features[3]+features[4])) self.layer4 = Sequential(*(features[5]+features[6])) self.output_layer = Sequential(nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)), nn.ReLU(), nn.AdaptiveAvgPool2d((1,1))) self.Crop_Net = nn.ModuleList([ Sequential( *features[7], *features[8], nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)), nn.ReLU() ) for i in range(5) ]) self.fc = nn.Linear(64 + 320, 7) self.loc_fc = nn.Linear(320, 7) self.GAP = nn.AdaptiveAvgPool2d((1,1)) #self.GCN = GCN(64, 128, 64) self.GCN = GCNwithIntraAndInterMatrix(64, 128, 64, useIntraGCN=useIntraGCN, useInterGCN=useInterGCN, useRandomMatrix=useRandomMatrix, useAllOneMatrix=useAllOneMatrix) self.SourceMean = (CountMeanAndCovOfFeature(64+320) if useCov else CountMeanOfFeature(64+320)) if not useCluster else CountMeanOfFeatureInCluster(64+320) self.TargetMean = (CountMeanAndCovOfFeature(64+320) if useCov else CountMeanOfFeature(64+320)) if not useCluster else CountMeanOfFeatureInCluster(64+320) self.SourceBN = BatchNorm1d(64+320) self.TargetBN = BatchNorm1d(64+320)
class Backbone_MobileNetV2_onlyGlobal(nn.Module): def __init__(self, inverted_residual_setting=None, block=None): super(Backbone_MobileNetV2_onlyGlobal, self).__init__() if block is None: block = InvertedResidual if inverted_residual_setting is None: inverted_residual_setting = [ # t, c, n, s [1, 64, 1, 1], [6, 64, 2, 2], [6, 128, 3, 2], [6, 256, 4, 2], [6, 256, 3, 1], [6, 512, 3, 2], [6, 512, 1, 1], ] # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format(inverted_residual_setting)) features = [] input_channel = 3 for t, c, n, s in inverted_residual_setting: feature = [] output_channel = c for i in range(n): stride = s if i == 0 else 1 feature.append(block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel features.append(feature) self.layer1 = Sequential(*(features[0]+features[1])) self.layer2 = Sequential(*(features[2])) self.layer3 = Sequential(*(features[3]+features[4])) self.layer4 = Sequential(*(features[5]+features[6])) self.GAP = nn.AdaptiveAvgPool2d((1,1)) self.output_layer = Sequential(nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)), nn.ReLU(), nn.AdaptiveAvgPool2d((1,1))) self.fc = nn.Linear(64, 7) def classify(self, imgs, locations): featureMap1 = self.layer1(imgs) # Batch * 64 * 56 * 56 featureMap2 = self.layer2(featureMap1) # Batch * 128 * 28 * 28 featureMap3 = self.layer3(featureMap2) # Batch * 256 * 14 * 14 featureMap4 = self.layer4(featureMap3) # Batch * 512 * 7 * 7 feature = self.output_layer(featureMap4).view(imgs.size(0), -1) # Batch * 64 pred = self.fc(feature) # Batch * 7 loc_pred = None return feature, pred, loc_pred def transfer(self, imgs, locations, domain='Target'): assert domain in ['Source', 'Target'], 'Parameter domain should be Source or Target.' featureMap1 = self.layer1(imgs) # Batch * 64 * 56 * 56 featureMap2 = self.layer2(featureMap1) # Batch * 128 * 28 * 28 featureMap3 = self.layer3(featureMap2) # Batch * 256 * 14 * 14 featureMap4 = self.layer4(featureMap3) # Batch * 512 * 7 * 7 feature = self.output_layer(featureMap4).view(imgs.size(0), -1) # Batch * 64 pred = self.fc(feature) # Batch * 7 loc_pred = None return feature, pred, loc_pred def forward(self, imgs, locations, flag=True, domain='Target'): if flag: return self.classify(imgs, locations) return self.transfer(imgs, locations, domain) def output_num(self): return 64 def get_parameters(self): parameter_list = [ {"params":self.layer1.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer2.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer3.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer4.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.output_layer.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.fc.parameters(), "lr_mult":10, 'decay_mult':2}, \ ] return parameter_list
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): super(Residual, self).__init__() modules = [] for _ in range(num_block): modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups)) self.model = Sequential(*modules)
class ShallowFBCSPNet(PytorchModelBase): """ From the ConvNet for BrainData Paper """ @staticmethod def add_arguments(parser): PytorchModelBase.add_arguments(parser) parser.add_argument("n_filters_time", type=int, default=40, help="TODO") parser.add_argument("filter_time_length", type=int, default=25, help="TODO") parser.add_argument("n_filters_spat", type=int, default=40, help="TODO") parser.add_argument("pool_time_length", type=int, default=75, help="TODO") parser.add_argument("pool_time_stride", type=int, default=15, help="TODO") parser.add_argument("conv_nonlin", type=str, default="square", choices=nonlin_dict.keys(), help="TODO") parser.add_argument("pool_mode", type=str, default="mean", help="TODO") parser.add_argument("pool_nonlin", type=str, default="safe_log", help="TODO") parser.add_argument("batch_norm", type=int, default=1, help="TODO") parser.add_argument("batch_norm_alpha", type=float, default=0.1, help="TODO") parser.add_argument("drop_prob", type=float, default=0.5, help="TODO") parser.add_argument("final_conv_length", type=int, default=30, help="TODO") return parser def __init__(self, n_filters_time, filter_time_length, n_filters_spat, pool_time_length, pool_time_stride, conv_nonlin, pool_mode, pool_nonlin, batch_norm, batch_norm_alpha, drop_prob, final_conv_length, **kwargs): super().__init__(**kwargs) self.sequential = Sequential() split_cnn = SplitConv(in_size=self.input_size, middle_size=n_filters_time, out_size=n_filters_spat, time_kernel_size=filter_time_length, input_in_rnn_format=False) self.sequential.add_module('split_cnn', split_cnn) if batch_norm: bn = BatchNorm1d(n_filters_spat) self.sequential.add_module('batch_norm', bn) non_lin = Expression(square) self.sequential.add_module('non_lin_0', non_lin) pool = AvgPool1d(kernel_size=pool_time_length, stride=pool_time_stride) self.sequential.add_module('pool_1', pool) non_lin = Expression(safe_log) self.sequential.add_module('non_lin_1', non_lin) dropout = Dropout(p=drop_prob) self.sequential.add_module('dropout', dropout) conv = nn.Conv1d(in_channels=n_filters_spat, out_channels=self.output_size, kernel_size=final_conv_length, bias=True) self.sequential.add_module('conv', conv) def forward(self, x, hidden, context): # Input is given as N x L x C # ConvNets expect N x C x L x = transpose(x, 1, 2) x = self.sequential(x) # Convert back to N x L x C x = transpose(x, 1, 2) # x = x[:, i:10, :] x = x.contiguous() return x, hidden def offset_size(self, sequence_size): # Forward dummy vector and find out what is the output shape v = np.zeros((1, sequence_size, self.input_size), np.float32) v = Variable(from_numpy(v)) if next(self.parameters()).is_cuda: v = v.cuda() o, h = self.forward(v, None, None) o_size = o.size(1) return sequence_size - o_size
def __init__(self, numOfLayer): super(Backbone, self).__init__() unit_module = bottleneck_IR self.input_layer = Sequential( Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False), BatchNorm2d(64), PReLU(64)) blocks = get_blocks(numOfLayer) self.layer1 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[0] ]) #get_block(in_channel=64, depth=64, num_units=3)]) self.layer2 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[1] ]) #get_block(in_channel=64, depth=128, num_units=4)]) self.layer3 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[2] ]) #get_block(in_channel=128, depth=256, num_units=14)]) self.layer4 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[3] ]) #get_block(in_channel=256, depth=512, num_units=3)]) self.output_layer = Sequential( nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))) cropNet_modules = [] cropNet_blocks = [ get_block(in_channel=128, depth=256, num_units=2), get_block(in_channel=256, depth=512, num_units=2) ] for block in cropNet_blocks: for bottleneck in block: cropNet_modules.append( unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)) cropNet_modules += [ nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU() ] self.Crop_Net = nn.ModuleList( [copy.deepcopy(nn.Sequential(*cropNet_modules)) for i in range(5)]) self.fc = nn.Linear(64 + 320, 7) self.fc.apply(init_weights) self.loc_fc = nn.Linear(320, 7) self.loc_fc.apply(init_weights) self.GAP = nn.AdaptiveAvgPool2d((1, 1))
def __init__(self): super(Net, self).__init__() #name self.name = "DirCNN3" #optimizer self.lr = 0.001 self.optimizer_name = 'Adam-Tri' #data self.data_name = "ModelNet10" #self.data_name = "Geometry" self.batch_size = 40 self.nr_points = 1024 self.nr_classes = 10 if self.data_name == 'ModelNet10' else 40 #train_info self.max_epochs = 120 self.save_every = 3 #model self.k = 20 self.l = 7 # DD1 self.in_size = 3 self.out_size = 64 layers = [] layers.append(Linear(self.in_size, 64)) layers.append(ReLU()) layers.append(torch.nn.BatchNorm1d(64)) layers.append(Linear(64 , 64)) layers.append(ReLU()) layers.append(torch.nn.BatchNorm1d(64)) layers.append(Linear(64, self.out_size)) layers.append(ReLU()) layers.append(torch.nn.BatchNorm1d(self.out_size)) dense3dnet = Sequential(*layers) self.dd = DD(l = self.l, k = self.k, mlp = dense3dnet, conv_p = True, conv_fc = False, conv_fn = False, out_3d = True) # DD2 self.in_size_2 = 64 * 3 self.out_size_2 = 128 layers2 = [] layers2.append(Linear(self.in_size_2, self.out_size_2)) layers2.append(ReLU()) layers2.append(torch.nn.BatchNorm1d(self.out_size_2)) dense3dnet2 = Sequential(*layers2) self.dd2 = DD(l = self.l, k = self.k, mlp = dense3dnet2, conv_p = False, conv_fc = False, conv_fn = True, out_3d = False) self.nn1 = torch.nn.Linear(self.out_size_2, 1024) self.bn1 = torch.nn.BatchNorm1d(1024) self.nn2 = torch.nn.Linear(1024, 512) self.bn2 = torch.nn.BatchNorm1d(512) self.nn3 = torch.nn.Linear(512, 265) self.bn3 = torch.nn.BatchNorm1d(265) self.nn4 = torch.nn.Linear(265, self.nr_classes) self.sm = torch.nn.LogSoftmax(dim=1)
def create_vgg_ssd(num_classes, is_test=False, device_id=None): # VOC 21类 # 数字为输出通道数 'M'为maxpooling 2 倍下采样 ‘C’maxpooling 时使用ceil 向上取整 vgg_config = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512 ] base_net = ModuleList(vgg(vgg_config)) # 构建vgg基本基本网络结构 source_layer_indexes = [ (23, BatchNorm2d(512)), # 23层为Conv4_3的输出特征图38*38*512 len(base_net), # vgg 基础层数35(包含pooling和relu) ] extras = ModuleList([ Sequential( Conv2d(in_channels=1024, out_channels=256, kernel_size=1), ReLU(), Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=512, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential(Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3), ReLU()), Sequential(Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3), ReLU()) ]) regression_headers = ModuleList([ Conv2d(in_channels=512, out_channels=4 * 4, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * 4, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? ]) classification_headers = ModuleList([ Conv2d(in_channels=512, out_channels=4 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * num_classes, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? ]) return SSD(num_classes, base_net, source_layer_indexes, extras, classification_headers, regression_headers, is_test=is_test, config=config, device_id=device_id)
def __init__(self, input_dim, output_dim, activate, bn_decay): super(ResidualFC, self).__init__() self.seq = Sequential(Linear(input_dim, output_dim), BatchNorm1d(output_dim, momentum=bn_decay), activate())
def create_vgg_ssd(num_classes, is_test=False): vgg_config = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512 ] base_net = ModuleList(vgg(vgg_config)) source_layer_indexes = [ (23, BatchNorm2d(512)), len(base_net), ] extras = ModuleList([ Sequential( Conv2d(in_channels=1024, out_channels=256, kernel_size=1), ReLU(), Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=512, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential(Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3), ReLU()), Sequential(Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3), ReLU()) ]) regression_headers = ModuleList([ Conv2d(in_channels=512, out_channels=4 * 4, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * 4, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? ]) classification_headers = ModuleList([ Conv2d(in_channels=512, out_channels=4 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=1024, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=4 * num_classes, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0? ]) return SSD(num_classes, base_net, source_layer_indexes, extras, classification_headers, regression_headers, is_test=is_test, config=config)
def test_add_param_group(debias_ewma): """Test AdaScale supports add_param_group() API.""" model1 = Linear(2, 2, bias=True) with torch.no_grad(): # make weights and bias deterministic, which is needed for # multi-layer models. For them, adascale gain is affected by # parameters from other layers. model1.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0]).reshape(2, 2)) model1.bias.fill_(0.1) optim = AdaScale(SGD(model1.parameters(), lr=0.1), num_gradients_to_accumulate=2, debias_ewma=debias_ewma) assert len(optim._hook_handles) == 2 model2 = Linear(2, 3, bias=True) with torch.no_grad(): # make weights and bias deterministic model2.weight.copy_( Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(3, 2)) model2.bias.fill_(0.2) optim.add_param_group({"params": model2.parameters()}) assert len(optim._hook_handles) == 4 # make sure we can run the model. model = Sequential(model1, model2).cuda() in_data_0 = Tensor([1.0, 2.0]).cuda() out = model(in_data_0) out.sum().backward() in_data_1 = Tensor([3.0, 4.0]).cuda() out = model(in_data_1) out.sum().backward() # make sure the gains are right and we can step. # since this is the first step, debias_ewma doesn't affect the value. assert np.allclose(optim.gain(), 1.1440223454935758), optim.gain() assert np.allclose(optim.gain(0), 1.1428571428571428), optim.gain(0) assert np.allclose(optim.gain(1), 1.1471258476157762), optim.gain(1) optim.step() optim.zero_grad() # make sure we can add a PG again after stepping. model3 = Linear(3, 4, bias=True) with torch.no_grad(): # make weights and bias deterministic model3.weight.copy_( Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0] * 2).reshape(4, 3)) model3.bias.fill_(0.2) optim.add_param_group({"params": model3.parameters()}) assert len(optim._hook_handles) == 6 # make sure we can run the model. model = Sequential(model1, model2, model3).cuda() in_data_0 = Tensor([1.0, 2.0]).cuda() out = model(in_data_0) out.sum().backward() in_data_1 = Tensor([3.0, 4.0]).cuda() out = model(in_data_1) out.sum().backward() # make sure gains are right and we can step. # the last PG's gain is not affected by debias_ewma since it is the first step for that PG. assert np.allclose( optim.gain(), 1.1191193589460822 if debias_ewma else 1.1192783954732368), optim.gain() assert np.allclose( optim.gain(0), 1.1428571880897151 if debias_ewma else 1.142857188085096), optim.gain(0) assert np.allclose( optim.gain(1), 1.1167103578364508 if debias_ewma else 1.1167104954034948), optim.gain(1) assert np.allclose(optim.gain(2), 1.117381091722702), optim.gain(2) optim.step() optim.zero_grad()
""" Compute the gradient with PyTorch and the gradient variance with BackPACK. """ from torch.nn import CrossEntropyLoss, Flatten, Linear, Sequential from backpack import backpack, extend, extensions from backpack.utils.examples import load_mnist_data B = 4 X, y = load_mnist_data(B) print("# Gradient with PyTorch, gradient variance with BackPACK | B =", B) model = Sequential( Flatten(), Linear(784, 10), ) lossfunc = CrossEntropyLoss() model = extend(model) lossfunc = extend(lossfunc) loss = lossfunc(model(X), y) with backpack(extensions.Variance()): loss.backward() for name, param in model.named_parameters(): print(name) print(".grad.shape: ", param.grad.shape) print(".variance.shape: ", param.variance.shape)
class Discriminator(torch.nn.Module): def __init__(self,config): super(Discriminator,self).__init__() self.parse_config(config) self.discriminator = Sequential() self.final_layer = Sequential() #first image layer c_layer = self.g_feature_size self.discriminator.add_module('Conv1',Conv2d(self.img_c, c_layer,self.kernel_size, self.stride, self.g_input_pad,bias=False)) #no batch norm in input payer acc to paper self.discriminator.add_module('LeakyReLU',LeakyReLU(self.leaky_slope,inplace=True)) layer_number = 2 for i in range(1,self.g_layers): c_input = copy(c_layer) c_layer = int(self.g_feature_size *(2**i)) self.discriminator.add_module('Conv'+str(layer_number),Conv2d(c_input,c_layer,self.kernel_size, self.stride,self.g_input_pad,bias=False)) self.discriminator.add_module('BN'+str(layer_number),BatchNorm2d(c_layer)) self.discriminator.add_module('LeakyReLU'+str(layer_number),LeakyReLU(self.leaky_slope,inplace=True)) layer_number+=1 #flatten and sigmoid height = int(self.img_h/2**self.g_layers) self.final_layer.add_module('MapTo1', Conv2d(c_layer,1,height,bias=False)) self.final_layer.add_module('Sigmoid', Sigmoid()) def parse_config(self, config): self.g_feature_size=config['g_feature_size'] self.g_layers = config['g_layers'] self.len_z=config['len_z'] self.img_h=config['img_h'] self.img_w=config['img_w'] self.img_c=config['img_c'] self.c_input = config['len_z'] self.stride = config['g_stride'] self.kernel_size = config['g_kernel_size'] self.g_input_pad = config['g_input_pad'] self.g_output_pad = config['g_output_pad'] self.leaky_slope = config['leaky_ReLU_slope'] def forward(self,images): logging.info("Input Shape = " + str(images.shape)) logging.info(self.discriminator) feature_cube = self.discriminator(images) #decide if data image or generated image decision = self.final_layer(feature_cube) decision = decision.reshape(decision.shape[0],-1) #shape[0]=batch size return decision
def __init__(self, *args, **kwargs): # pylint: disable=unused-argument super().__init__() self.options = kwargs.get("options", {}) self.layers = Sequential() self.skip_connection = False self.skip_layers = Identity()
def __init__(self, args): super(GIN, self).__init__() self.args = args self.num_layer = int(self.args["num_layers"]) assert self.num_layer > 2, "Number of layers in GIN should not less than 3" missing_keys = list( set([ "features_num", "num_class", "num_graph_features", "num_layers", "hidden", "dropout", "act", "mlp_layers", "eps", ]) - set(self.args.keys())) if len(missing_keys) > 0: raise Exception("Missing keys: %s." % ",".join(missing_keys)) if not self.num_layer == len(self.args["hidden"]) + 1: LOGGER.warn( "Warning: layer size does not match the length of hidden units" ) self.num_graph_features = self.args["num_graph_features"] if self.args["act"] == "leaky_relu": act = LeakyReLU() elif self.args["act"] == "relu": act = ReLU() elif self.args["act"] == "elu": act = ELU() elif self.args["act"] == "tanh": act = Tanh() else: act = ReLU() train_eps = True if self.args["eps"] == "True" else False self.convs = torch.nn.ModuleList() self.bns = torch.nn.ModuleList() nn = [Linear(self.args["features_num"], self.args["hidden"][0])] for _ in range(self.args["mlp_layers"] - 1): nn.append(act) nn.append(Linear(self.args["hidden"][0], self.args["hidden"][0])) # nn.append(BatchNorm1d(self.args['hidden'][0])) self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps)) self.bns.append(BatchNorm1d(self.args["hidden"][0])) for i in range(self.num_layer - 3): nn = [Linear(self.args["hidden"][i], self.args["hidden"][i + 1])] for _ in range(self.args["mlp_layers"] - 1): nn.append(act) nn.append( Linear(self.args["hidden"][i + 1], self.args["hidden"][i + 1])) # nn.append(BatchNorm1d(self.args['hidden'][i+1])) self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps)) self.bns.append(BatchNorm1d(self.args["hidden"][i + 1])) self.fc1 = Linear( self.args["hidden"][self.num_layer - 3] + self.num_graph_features, self.args["hidden"][self.num_layer - 2], ) self.fc2 = Linear(self.args["hidden"][self.num_layer - 2], self.args["num_class"])
if model_name == "siren": model = ImageSiren( hidden_features, hidden_layers=hidden_layers, hidden_omega=30, ) elif model_name == "mlp_relu": layers = [Linear(2, hidden_features), ReLU()] for _ in range(hidden_layers): layers.append(Linear(hidden_features, hidden_features)) layers.append(ReLU()) layers.append(Linear(hidden_features, 1)) model = Sequential(*layers) for module in model.modules(): if not isinstance(module, Linear): continue torch.nn.init.xavier_normal_(module.weight) else: raise ValueError("Unsupported model") dataloader = DataLoader(dataset, batch_size=batch_size) optim = torch.optim.Adam(lr=1e-4, params=model.parameters()) # Training loop for e in range(n_epochs): losses = [] for d_batch in tqdm.tqdm(dataloader):
class Backbone(nn.Module): def __init__(self, numOfLayer): super(Backbone, self).__init__() unit_module = bottleneck_IR self.input_layer = Sequential( Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False), BatchNorm2d(64), PReLU(64)) blocks = get_blocks(numOfLayer) self.layer1 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[0] ]) #get_block(in_channel=64, depth=64, num_units=3)]) self.layer2 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[1] ]) #get_block(in_channel=64, depth=128, num_units=4)]) self.layer3 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[2] ]) #get_block(in_channel=128, depth=256, num_units=14)]) self.layer4 = Sequential(*[ unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride) for bottleneck in blocks[3] ]) #get_block(in_channel=256, depth=512, num_units=3)]) self.output_layer = Sequential( nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))) cropNet_modules = [] cropNet_blocks = [ get_block(in_channel=128, depth=256, num_units=2), get_block(in_channel=256, depth=512, num_units=2) ] for block in cropNet_blocks: for bottleneck in block: cropNet_modules.append( unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)) cropNet_modules += [ nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU() ] self.Crop_Net = nn.ModuleList( [copy.deepcopy(nn.Sequential(*cropNet_modules)) for i in range(5)]) self.fc = nn.Linear(64 + 320, 7) self.fc.apply(init_weights) self.loc_fc = nn.Linear(320, 7) self.loc_fc.apply(init_weights) self.GAP = nn.AdaptiveAvgPool2d((1, 1)) def forward(self, imgs, locations): featureMap = self.input_layer(imgs) featureMap1 = self.layer1(featureMap) # Batch * 64 * 56 * 56 featureMap2 = self.layer2(featureMap1) # Batch * 128 * 28 * 28 featureMap3 = self.layer3(featureMap2) # Batch * 256 * 14 * 14 featureMap4 = self.layer4(featureMap3) # Batch * 512 * 7 * 7 global_feature = self.output_layer(featureMap4).view( featureMap.size(0), -1) # Batch * 64 loc_feature = self.crop_featureMap(featureMap2, locations) # Batch * 320 feature = torch.cat((global_feature, loc_feature), 1) # Batch * (64+320) pred = self.fc(feature) # Batch * 7 loc_pred = self.loc_fc(loc_feature) # Batch * 7 return feature, pred, loc_pred def output_num(self): return 64 * 6 def get_parameters(self): parameter_list = [ {"params":self.input_layer.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer1.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer2.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer3.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer4.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.output_layer.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.Crop_Net.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.fc.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.loc_fc.parameters(), "lr_mult":10, 'decay_mult':2}, \ ] return parameter_list def crop_featureMap(self, featureMap, locations): batch_size = featureMap.size(0) map_ch = featureMap.size(1) map_len = featureMap.size(2) grid_ch = map_ch grid_len = 7 # 14, 6, 4 feature_list = [] for i in range(5): grid_list = [] for j in range(batch_size): w_min = locations[j, i, 0] - int(grid_len / 2) w_max = locations[j, i, 0] + int(grid_len / 2) h_min = locations[j, i, 1] - int(grid_len / 2) h_max = locations[j, i, 1] + int(grid_len / 2) map_w_min = max(0, w_min) map_w_max = min(map_len - 1, w_max) map_h_min = max(0, h_min) map_h_max = min(map_len - 1, h_max) grid_w_min = max(0, 0 - w_min) grid_w_max = grid_len + min(0, map_len - 1 - w_max) grid_h_min = max(0, 0 - h_min) grid_h_max = grid_len + min(0, map_len - 1 - h_max) grid = torch.zeros(grid_ch, grid_len, grid_len) if featureMap.is_cuda: grid = grid.cuda() grid[:, grid_h_min:grid_h_max + 1, grid_w_min:grid_w_max + 1] = featureMap[j, :, map_h_min:map_h_max + 1, map_w_min:map_w_max + 1] grid_list.append(grid) feature = torch.stack(grid_list, dim=0) feature_list.append(feature) # feature list: 5 * [ batch_size * channel * 3 * 3 ] output_list = [] for i in range(5): output = self.Crop_Net[i](feature_list[i]) output = self.GAP(output) output_list.append(output) loc_feature = torch.stack(output_list, dim=1) # batch_size * 5 * 64 * 1 * 1 loc_feature = loc_feature.view(batch_size, -1) # batch_size * 320 return loc_feature
class SIVILayer(torch.nn.Module): def init_weights(self, _module): if type(_module) == torch.nn.Linear: torch.nn.init.orthogonal_(_module.weight, gain=1.) if type(_module.bias) != None: _module.bias.data.normal_(0., 0.01) def __init__(self, in_features, out_features, _dim_noise_input): super().__init__() self.dim_input = in_features self.dim_output = out_features self.dim_noise_input = _dim_noise_input self.dim_output_params = in_features * out_features * 2 + out_features * 2 self.num_hidden = np.min([ self.dim_output_params, int((_dim_noise_input + self.dim_output_params) / 2) ]) self.prior_sigma = torch.scalar_tensor(1.0) self.sivi_net = Sequential( Linear(self.dim_noise_input, self.num_hidden), Tanh(), Linear(self.num_hidden, self.num_hidden), Tanh(), Linear(self.num_hidden, self.dim_output_params) ) # weight matrix x mu x logsigma + bias x mu x logsigma self.noise_dist = Normal(loc=torch.zeros((self.dim_noise_input, )), scale=torch.ones((self.dim_noise_input, ))) self.sivi_net.apply(self.init_weights) def forward(self, x): ''' :param x: 3 dimensional tensor of shape [N_MC, M_BatchSize, d_Input] :return: ''' assert x.dim( ) == 3, 'Input tensor not of shape [N_MC, BatchSize, Features]' # assert x.shape[0]==_base_noise.shape[0], 'Input and base_noise should have the same number of num_MC samples' # assert _base_noise.shape[1]==self.dim_noise_input num_MC = x.shape[0] batch_size = x.shape[1] noise = self.noise_dist.sample( (num_MC, )).to(next(self.sivi_net.parameters()).device) sivi_out = self.sivi_net(noise) sivi_w = sivi_out[:, :self.dim_input * self.dim_output * 2] sivi_b = sivi_out[:, self.dim_input * self.dim_output * 2:] w_mu, w_logsigma = torch.chunk(sivi_w, chunks=2, dim=-1) self.w_mu = w_mu.reshape((num_MC, self.dim_input, self.dim_output)) self.w_std = F.softplus( w_logsigma.reshape((num_MC, self.dim_input, self.dim_output))) self.b_mu, b_logsigma = torch.chunk(sivi_b, chunks=2, dim=-1) self.b_std = F.softplus(b_logsigma) dist_w = Normal(self.w_mu, self.w_std) dist_b = Normal(self.b_mu, self.b_std) # print(dist_w) self.sampled_w = dist_w.rsample() self.sampled_b = dist_b.rsample() # print(f"{x.shape=}") # print(f"{self.sampled_w.shape=}") # print(f"{self.sampled_b.shape=}") # out = torch.bmm(x, dist_w.rsample()) + dist_b.rsample().unsqueeze(1) out = torch.baddbmm(self.sampled_b.unsqueeze(-1), x, self.sampled_w) # out = torch.bmm(self.sampled_b, x, self.sampled_w) # exit() prior_w = torch.distributions.kl_divergence(dist_w, Normal(0, 1.)) prior_b = torch.distributions.kl_divergence(dist_b, Normal(0, 1.)) self.kl_div = prior_w.mean(dim=0).sum() #+ prior_b.mean(dim=0).sum() return out def sample_posterior_dist(self, _samples=2000, _plot=True, _str=''): with torch.no_grad(): sivi_noise = self.noise_dist.sample( sample_shape=(_samples, )).float() sivi_out = self.sivi_net(sivi_noise) sivi_w = sivi_out[:, :self.dim_input * self.dim_output * 2] sivi_b = sivi_out[:, self.dim_input * self.dim_output * 2:] w_mu, w_logsigma = torch.chunk(sivi_w, chunks=2, dim=-1) self.w_mu = w_mu.reshape( (_samples, self.dim_input, self.dim_output)) self.w_std = F.softplus( w_logsigma.reshape( (_samples, self.dim_input, self.dim_output))) self.b_mu, b_logsigma = torch.chunk(sivi_b, chunks=2, dim=-1) self.b_std = F.softplus(b_logsigma) dist_w = Normal(self.w_mu, self.w_std) dist_b = Normal(self.b_mu, self.b_std) w = dist_w.sample() b = dist_b.sample() if _plot: w = w.flatten(start_dim=1, end_dim=2).T if w.shape[0] > 3: ncols_nrows = int(w.shape[0]**0.5) # odd_offset = w.shape[0]%2 fig, axs = plt.subplots(nrows=ncols_nrows, ncols=ncols_nrows, figsize=(10, 10), sharex=True, sharey=True) else: fig, axs = plt.subplots(nrows=w.shape[0], ncols=1, figsize=(10, 10), sharex=True, sharey=True) fig.suptitle(_str) axs = axs.flat for i in range(w.shape[0] - 1): axs[i].hist(w[i].numpy(), bins=150, density=True, color='r', alpha=0.5) axs[i].set_ylim(0, 5) plt.show()
X = torch.randn(N, C, H, W) elif input_type is "LINEAR": X = torch.randn(N, D) else: raise NotImplementedError if output_type is "CE": Y = torch.randint(high=2, size=(N, )) else: raise NotImplementedError return (X, Y) models = [ Sequential(xtd(Linear(D, 2))), Sequential(xtd(Linear(D, 2)), xtd(ReLU())), Sequential(xtd(Linear(D, 2)), xtd(Sigmoid())), Sequential(xtd(Linear(D, 2)), xtd(Tanh())), Sequential(xtd(Linear(D, 2)), xtd(Dropout())), ] img_models = [ Sequential(xtd(Conv2d(3, 2, 2)), Flatten(), xtd(Linear(18, 2))), Sequential(xtd(MaxPool2d(3)), Flatten(), xtd(Linear(3, 2))), Sequential(xtd(AvgPool2d(3)), Flatten(), xtd(Linear(3, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(MaxPool2d(3)), Flatten(), xtd(Linear(2, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(AvgPool2d(3)), Flatten(), xtd(Linear(2, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(ReLU()), Flatten(), xtd(Linear(18, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(Sigmoid()), Flatten(), xtd(Linear(18, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(Tanh()), Flatten(), xtd(Linear(18, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(Dropout()), Flatten(), xtd(Linear(18, 2))),
class ConvSIVILayer(torch.nn.Module): def __init__(self, _dim_in=-1, _dim_out=-1, _dim_noise_input=10, _kernel_size=1, _stride=1): super().__init__() self.dim_in = _dim_in self.dim_out = _dim_out self.dim_noise_in = _dim_noise_input self.kernel_size = _kernel_size self.stride = _stride self.dim_params = _dim_in * _dim_out * _kernel_size * _kernel_size * 2 + _dim_out * 2 self.num_hidden = np.min( [self.dim_params, int((_dim_noise_input + self.dim_params) / 2)]) self.prior_sigma = torch.scalar_tensor(1.0) self.sivi_net = Sequential( Linear(self.dim_noise_in, self.num_hidden), ReLU(), Linear(self.num_hidden, self.num_hidden), ReLU(), Linear(self.num_hidden, self.dim_params )) # weight matrix x mu x logsigma + bias x mu x logsigma self.noise_dist = Normal(loc=torch.zeros((self.dim_noise_in, )), scale=torch.ones((self.dim_noise_in, ))) # self.sivi_net.apply(self.init_weights) def forward(self, x: torch.Tensor, _prior): assert x.dim( ) == 5, 'Input tensor not of shape [Num_MC, BatchSize, Features, height, width]' num_MC = x.shape[0] batch_size = x.shape[1] out = x.permute(1, 0, 2, 3, 4) out = out.flatten(1, 2).contiguous() noise = self.noise_dist.sample( (num_MC, )).to(next(self.sivi_net.parameters()).device) sivi_out = self.sivi_net(noise) w, b = sivi_out.split(self.dim_in * self.dim_out * self.kernel_size**2 * 2, dim=1) w_mu, w_logsigma = torch.chunk(w, chunks=2, dim=-1) b_mu, b_logsigma = torch.chunk(b, chunks=2, dim=-1) w_mu = w_mu.reshape((num_MC * self.dim_out, self.dim_in, self.kernel_size, self.kernel_size)) w_std = F.softplus( w_logsigma.reshape((num_MC * self.dim_out, self.dim_in, self.kernel_size, self.kernel_size))) b_mu = b_mu.flatten() b_std = F.softplus(b_logsigma).flatten() # print(b_mu.shape, b_logsigma.shape) # exit() dist_w = Normal(w_mu, w_std) dist_b = Normal(b_mu, b_std) w = dist_w.rsample() b = dist_b.rsample() out = F.conv2d( input=out, weight=w, bias=b, groups=num_MC, stride=1) # shape=[batch_size, num_MC*dim_out, height, width] print(out.shape) out = out.reshape(batch_size, num_MC, self.dim_out, out.shape[-2], out.shape[-1]) out = out.permute(1, 0, 2, 3, 4) # out = out.chunk(num_MC,1) # num_MC tuple of [ batch_size, dim_out, height, width] # out = torch.stack(out,dim=0) # shape = [num_MC, batch_size, dim_out, height, width] # print(out.shape) # exit() prior = _prior + torch.distributions.kl_divergence( Normal(torch.zeros_like(w_mu), self.prior_sigma * torch.ones_like(w_mu)), dist_w).mean(dim=0).sum() return out, None, prior
def __init__(self): super(ROOTNET, self).__init__() self.shape_encoder = ShapeEncoder() self.joint_encoder = JointEncoder() self.back_layers = Sequential(MLP([128 + 128, 200, 64]), Linear(64, 1))
def __init__(self, dim): super(NetGIN, self).__init__() num_features = 492 nn1_1_l = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim)) nn1_2_l = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim)) nn1_1_g = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim)) nn1_2_g = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim)) self.conv1_1_l = GINConv(nn1_1_l, train_eps=True) self.conv1_2_l = GINConv(nn1_2_l, train_eps=True) self.conv1_1_g = GINConv(nn1_1_g, train_eps=True) self.conv1_2_g = GINConv(nn1_2_g, train_eps=True) self.bn1 = torch.nn.BatchNorm1d(dim) self.mlp_1 = Sequential(Linear(4 * dim, dim), ReLU(), Linear(dim, dim)) nn2_1_l = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn2_2_l = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn2_1_g = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn2_2_g = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) self.conv2_1_l = GINConv(nn2_1_l, train_eps=True) self.conv2_2_l = GINConv(nn2_2_l, train_eps=True) self.conv2_1_g = GINConv(nn2_1_g, train_eps=True) self.conv2_2_g = GINConv(nn2_2_g, train_eps=True) self.bn2 = torch.nn.BatchNorm1d(dim) self.mlp_2 = Sequential(Linear(4 * dim, dim), ReLU(), Linear(dim, dim)) nn3_1_l = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn3_2_l = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn3_1_g = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn3_2_g = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) self.conv3_1_l = GINConv(nn3_1_l, train_eps=True) self.conv3_2_l = GINConv(nn3_2_l, train_eps=True) self.conv3_1_g = GINConv(nn3_1_g, train_eps=True) self.conv3_2_g = GINConv(nn3_2_g, train_eps=True) self.bn3 = torch.nn.BatchNorm1d(dim) self.mlp_3 = Sequential(Linear(4 * dim, dim), ReLU(), Linear(dim, dim)) nn4_1_l = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn4_2_l = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn4_1_g = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) nn4_2_g = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) self.conv4_1_l = GINConv(nn4_1_l, train_eps=True) self.conv4_2_l = GINConv(nn4_2_l, train_eps=True) self.conv4_1_g = GINConv(nn4_1_g, train_eps=True) self.conv4_2_g = GINConv(nn4_2_g, train_eps=True) self.bn4 = torch.nn.BatchNorm1d(dim) self.mlp_4 = Sequential(Linear(4 * dim, dim), ReLU(), Linear(dim, dim)) self.fc1 = Linear(4 * dim, dim) self.fc2 = Linear(dim, dim) self.fc3 = Linear(dim, dim) self.fc4 = Linear(dim, 1)
def load_model(name, size=4, hidden=32, mult=0.0001): activation = None if name == 'relu': activation = Relu elif name == 'linear': activation = Nne elif name == 'linear-lambda': activation = Nne elif name == 'linear-bn': activation = BN elif name == 'sigmoid': activation = Sigmoid elif name == 'relu-lambda': activation = Relu elif name == 'sigmoid-lambda': activation = Sigmoid elif name == 'relu-sigloss': activation = Relu elif name == 'sigmoid-sigloss': activation = Sigmoid elif name == 'bn-relu': activation = BNRelu elif name == 'relu-bn': activation = ReluBN elif name == 'bn-sigmoid': activation = BNSigmoid elif name == 'sigmoid-bn': activation = SigmoidBN else: raise Exception('Model "{}" not recognized.'.format(name)) model = Sequential( Linear(size, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, hidden), activation(hidden), Linear(hidden, size), ) # shrink the weights to induc e vanishing gradient for layer in list(model.modules())[:10]: if isinstance(layer, Linear): size = layer.weight.size() torch.rand(size, out=layer.weight.data) layer.weight.data *= -mult return model
def create_efficientnet(num_classes, is_test=False): # base_net = EfficientNet.from_pretrained('efficientnet-b5').extract_features # disable dropout layer base_net = efficientnet_b5(pretrained=True).features # version2 # print("*********************************************************************") # print(len(base_net)) source_layer_indexes = [ (40, Conv2d(in_channels=512, out_channels=256, kernel_size=1)), (len(base_net), Conv2d(in_channels=2048, out_channels=256, kernel_size=1)), ] extras = ModuleList([ Sequential( Conv2d(in_channels=2048, out_channels=256, kernel_size=1), ReLU(), DUAttn(256), SeperableConv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), DUAttn(128), SeperableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), DUAttn(128), SeperableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), DUAttn(128), SeperableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()) ]) regression_headers = ModuleList([ SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=1), ]) classification_headers = ModuleList([ SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), SeperableConv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=1), ]) return SSD(num_classes, base_net, source_layer_indexes, extras, classification_headers, regression_headers, is_test=is_test, config=config)
class Backbone_MobileNetV2(nn.Module): def __init__(self, useIntraGCN=True, useInterGCN=True, useRandomMatrix=False, useAllOneMatrix=False, useCov=False, useCluster=False, inverted_residual_setting=None, block=None): super(Backbone_MobileNetV2, self).__init__() if block is None: block = InvertedResidual if inverted_residual_setting is None: inverted_residual_setting = [ # t, c, n, s [1, 64, 1, 1], [6, 64, 2, 2], [6, 128, 3, 2], [6, 256, 4, 2], [6, 256, 3, 1], [6, 512, 3, 2], [6, 512, 1, 1], [6, 256, 4, 2], [6, 512, 3, 2] ] # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: raise ValueError("inverted_residual_setting should be non-empty " "or a 4-element list, got {}".format(inverted_residual_setting)) features = [] input_channel = 3 for index, (t, c, n, s) in enumerate(inverted_residual_setting): feature, input_channel, output_channel = [], input_channel if index != 7 else 128, c for i in range(n): stride = s if i == 0 else 1 feature.append(block(input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel features.append(feature) self.layer1 = Sequential(*(features[0]+features[1])) self.layer2 = Sequential(*(features[2])) self.layer3 = Sequential(*(features[3]+features[4])) self.layer4 = Sequential(*(features[5]+features[6])) self.output_layer = Sequential(nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)), nn.ReLU(), nn.AdaptiveAvgPool2d((1,1))) self.Crop_Net = nn.ModuleList([ Sequential( *features[7], *features[8], nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1)), nn.ReLU() ) for i in range(5) ]) self.fc = nn.Linear(64 + 320, 7) self.loc_fc = nn.Linear(320, 7) self.GAP = nn.AdaptiveAvgPool2d((1,1)) #self.GCN = GCN(64, 128, 64) self.GCN = GCNwithIntraAndInterMatrix(64, 128, 64, useIntraGCN=useIntraGCN, useInterGCN=useInterGCN, useRandomMatrix=useRandomMatrix, useAllOneMatrix=useAllOneMatrix) self.SourceMean = (CountMeanAndCovOfFeature(64+320) if useCov else CountMeanOfFeature(64+320)) if not useCluster else CountMeanOfFeatureInCluster(64+320) self.TargetMean = (CountMeanAndCovOfFeature(64+320) if useCov else CountMeanOfFeature(64+320)) if not useCluster else CountMeanOfFeatureInCluster(64+320) self.SourceBN = BatchNorm1d(64+320) self.TargetBN = BatchNorm1d(64+320) def classify(self, imgs, locations): featureMap1 = self.layer1(imgs) # Batch * 64 * 56 * 56 featureMap2 = self.layer2(featureMap1) # Batch * 128 * 28 * 28 featureMap3 = self.layer3(featureMap2) # Batch * 256 * 14 * 14 featureMap4 = self.layer4(featureMap3) # Batch * 512 * 7 * 7 global_feature = self.output_layer(featureMap4).view(imgs.size(0), -1) # Batch * 64 loc_feature = self.crop_featureMap(featureMap2, locations) # Batch * 320 feature = torch.cat((global_feature, loc_feature), 1) # Batch * (64+320) # GCN if self.training: feature = self.SourceMean(feature) feature = torch.cat( ( self.SourceBN(feature), self.TargetBN(self.TargetMean.getSample(feature.detach())) ), 1) # Batch * (64+320 + 64+320) feature = self.GCN(feature.view(feature.size(0), 12, -1)) # Batch * 12 * 64 feature = feature.view(feature.size(0), -1).narrow(1, 0, 64+320) # Batch * (64+320) loc_feature = feature.narrow(1, 64, 320) # Batch * 320 pred = self.fc(feature) # Batch * 7 loc_pred = self.loc_fc(loc_feature) # Batch * 7 return feature, pred, loc_pred def transfer(self, imgs, locations, domain='Target'): assert domain in ['Source', 'Target'], 'Parameter domain should be Source or Target.' featureMap1 = self.layer1(imgs) # Batch * 64 * 56 * 56 featureMap2 = self.layer2(featureMap1) # Batch * 128 * 28 * 28 featureMap3 = self.layer3(featureMap2) # Batch * 256 * 14 * 14 featureMap4 = self.layer4(featureMap3) # Batch * 512 * 7 * 7 global_feature = self.output_layer(featureMap4).view(imgs.size(0), -1) # Batch * 64 loc_feature = self.crop_featureMap(featureMap2, locations) # Batch * 320 feature = torch.cat((global_feature, loc_feature), 1) # Batch * (64+320) if self.training: # Compute Feature SourceFeature = feature.narrow(0, 0, feature.size(0)//2) # Batch/2 * (64+320) TargetFeature = feature.narrow(0, feature.size(0)//2, feature.size(0)//2) # Batch/2 * (64+320) SourceFeature = self.SourceMean(SourceFeature) # Batch/2 * (64+320) TargetFeature = self.TargetMean(TargetFeature) # Batch/2 * (64+320) SourceFeature = self.SourceBN(SourceFeature) # Batch/2 * (64+320) TargetFeature = self.TargetBN(TargetFeature) # Batch/2 * (64+320) # Compute Mean SourceMean = self.SourceMean.getSample(TargetFeature.detach()) # Batch/2 * (64+320) TargetMean = self.TargetMean.getSample(SourceFeature.detach()) # Batch/2 * (64+320) SourceMean = self.SourceBN(SourceMean) # Batch/2 * (64+320) TargetMean = self.TargetBN(TargetMean) # Batch/2 * (64+320) # GCN feature = torch.cat( ( torch.cat((SourceFeature,TargetMean), 1), torch.cat((SourceMean,TargetFeature), 1) ), 0) # Batch * (64+320 + 64+320) feature = self.GCN(feature.view(feature.size(0), 12, -1)) # Batch * 12 * 64 feature = feature.view(feature.size(0), -1) # Batch * (64+320 + 64+320) feature = torch.cat( (feature.narrow(0, 0, feature.size(0)//2).narrow(1, 0, 64+320), \ feature.narrow(0, feature.size(0)//2, feature.size(0)//2).narrow(1, 64+320, 64+320) ), 0) # Batch * (64+320) loc_feature = feature.narrow(1, 64, 320) # Batch * 320 pred = self.fc(feature) # Batch * 7 loc_pred = self.loc_fc(loc_feature) # Batch * 7 return feature, pred, loc_pred # Inference if domain=='Source': SourceFeature = feature # Batch * (64+320) TargetMean = self.TargetMean.getSample(SourceFeature.detach()) # Batch * (64+320) SourceFeature = self.SourceBN(SourceFeature) # Batch * (64+320) TargetMean = self.TargetBN(TargetMean) # Batch * (64+320) feature = torch.cat((SourceFeature,TargetMean), 1) # Batch * (64+320 + 64+320) feature = self.GCN(feature.view(feature.size(0), 12, -1)) # Batch * 12 * 64 elif domain=='Target': TargetFeature = feature # Batch * (64+320) SourceMean = self.SourceMean.getSample(TargetFeature.detach()) # Batch * (64+320) SourceMean = self.SourceBN(SourceMean) # Batch * (64+320) TargetFeature = self.TargetBN(TargetFeature) # Batch * (64+320) feature = torch.cat((SourceMean,TargetFeature), 1) # Batch * (64+320 + 64+320) feature = self.GCN(feature.view(feature.size(0), 12, -1)) # Batch * 12 * 64 feature = feature.view(feature.size(0), -1) # Batch * (64+320 + 64+320) if domain=='Source': feature = feature.narrow(1, 0, 64+320) # Batch * (64+320) elif domain=='Target': feature = feature.narrow(1, 64+320, 64+320) # Batch * (64+320) loc_feature = feature.narrow(1, 64, 320) # Batch * 320 pred = self.fc(feature) # Batch * 7 loc_pred = self.loc_fc(loc_feature) # Batch * 7 return feature, pred, loc_pred def forward(self, imgs, locations, flag=True, domain='Target'): if flag: return self.classify(imgs, locations) return self.transfer(imgs, locations, domain) def output_num(self): return 64*6 def get_parameters(self): parameter_list = [ {"params":self.layer1.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer2.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer3.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.layer4.parameters(), "lr_mult":1, 'decay_mult':2}, \ {"params":self.output_layer.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.fc.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.loc_fc.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.Crop_Net.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.GCN.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.SourceBN.parameters(), "lr_mult":10, 'decay_mult':2}, \ {"params":self.TargetBN.parameters(), "lr_mult":10, 'decay_mult':2}, \ ] return parameter_list def crop_featureMap(self, featureMap, locations): batch_size = featureMap.size(0) map_ch = featureMap.size(1) map_len = featureMap.size(2) grid_ch = map_ch grid_len = 7 # 14, 6, 4 feature_list = [] for i in range(5): grid_list = [] for j in range(batch_size): w_min = locations[j,i,0]-int(grid_len/2) w_max = locations[j,i,0]+int(grid_len/2) h_min = locations[j,i,1]-int(grid_len/2) h_max = locations[j,i,1]+int(grid_len/2) map_w_min = max(0, w_min) map_w_max = min(map_len-1, w_max) map_h_min = max(0, h_min) map_h_max = min(map_len-1, h_max) grid_w_min = max(0, 0-w_min) grid_w_max = grid_len + min(0, map_len-1-w_max) grid_h_min = max(0, 0-h_min) grid_h_max = grid_len + min(0, map_len-1-h_max) grid = torch.zeros(grid_ch, grid_len, grid_len) if featureMap.is_cuda: grid = grid.cuda() grid[:, grid_h_min:grid_h_max+1, grid_w_min:grid_w_max+1] = featureMap[j, :, map_h_min:map_h_max+1, map_w_min:map_w_max+1] grid_list.append(grid) feature = torch.stack(grid_list, dim=0) feature_list.append(feature) # feature list: 5 * [ batch_size * channel * 3 * 3 ] output_list = [] for i in range(5): output = self.Crop_Net[i](feature_list[i]) output = self.GAP(output) output_list.append(output) loc_feature = torch.stack(output_list, dim=1) # batch_size * 5 * 64 * 1 * 1 loc_feature = loc_feature.view(batch_size, -1) # batch_size * 320 return loc_feature
class PyTorch: def __init__(self, in_features, out_features, n_epochs, patience): self.in_features = in_features self.out_features = out_features self.n_epochs = n_epochs self.patience = patience def init_model(self): # define a models self.model = Sequential( weight_norm(Linear(self.in_features, 128)), ReLU(), weight_norm(Linear(128, 128)), ReLU(), weight_norm(Linear(128, self.out_features))) # initialize models for t in self.model: if isinstance(t, Linear): nn.init.kaiming_normal_(t.weight_v) nn.init.kaiming_normal_(t.weight_g) nn.init.constant_(t.bias, 0) # define loss function self.loss_func = MSELoss() # define optimizer self.optimizer = Adam(self.model.parameters(), lr=1e-3) def fit(self, x_train, y_train, x_valid, y_valid): self.init_model() x_train_tensor = torch.as_tensor(x_train, dtype=torch.float32) y_train_tensor = torch.as_tensor(y_train, dtype=torch.float32) x_valid_tensor = torch.as_tensor(x_valid, dtype=torch.float32) y_valid_tensor = torch.as_tensor(y_valid, dtype=torch.float32) min_loss = np.inf counter = 0 for epoch in range(self.n_epochs): self.model.train() y_pred = self.model(x_train_tensor) loss = self.loss_func(y_pred, y_train_tensor) loss.backward() self.optimizer.step() self.optimizer.zero_grad() epoch_loss = loss.item() # print('Epoch %5d / %5d. Loss = %.5f' % (epoch + 1, self.n_epochs, epoch_loss)) # calculate loss for validation set self.model.eval() with torch.no_grad(): valid_loss = self.loss_func(self.model(x_valid_tensor), y_valid_tensor).item() # print('Epoch %5d / %5d. Validation loss = %.5f' % (epoch + 1, self.n_epochs, valid_loss)) # early stopping if valid_loss < min_loss: min_loss = valid_loss counter = 0 else: counter += 1 # print('Early stopping: %i / %i' % (counter, self.patience)) if counter >= self.patience: # print('Early stopping at epoch', epoch + 1) break def predict(self, x): x_tenson = torch.as_tensor(x, dtype=torch.float32) self.model.eval() with torch.no_grad(): return self.model(x_tenson).numpy()
def __init__(self, num_classes=10, weight_bit_width=None, act_bit_width=None, in_bit_width=None, in_ch=3, device="cpu"): super(CNV, self).__init__() self.device = device weight_quant_type = commons.get_quant_type(weight_bit_width) act_quant_type = commons.get_quant_type(act_bit_width) in_quant_type = commons.get_quant_type(in_bit_width) stats_op = commons.get_stats_op(weight_quant_type) self.conv_features = ModuleList() self.linear_features = ModuleList() self.conv_features.append( commons.get_act_quant(in_bit_width, in_quant_type)) # convolution layers for i, out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( commons.get_quant_conv1d(in_ch=in_ch, out_ch=out_ch, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) in_ch = out_ch self.conv_features.append(BatchNorm1d(in_ch)) if i == (NUM_CONV_LAYERS - 1): self.conv_features.append(Sequential()) else: self.conv_features.append( commons.get_act_quant(act_bit_width, act_quant_type)) if is_pool_enabled: self.conv_features.append(MaxPool1d(kernel_size=MAXPOOL_SIZE)) # fully connected layers self.linear_features.append( commons.get_act_quant(in_bit_width, in_quant_type)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( commons.get_quant_linear( in_features=in_features, out_features=out_features, per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) self.linear_features.append(BatchNorm1d(out_features)) self.linear_features.append( commons.get_act_quant(act_bit_width, act_quant_type)) # last layer self.fc = commons.get_quant_linear( in_features=LAST_FC_IN_FEATURES, out_features=num_classes, per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)
from torch.optim import SGD n_symbols = 32 indices = np.random.randint(low=0, high=4, size=n_symbols) target_symbols = np.array([_qpsk_constellation[i] for i in indices]) target_symbols = np.stack((target_symbols.real, target_symbols.imag)) _target_symbols = torch.from_numpy( target_symbols[np.newaxis, np.newaxis, ::].astype(np.float32)) mean = torch.zeros((1, 1, 2, _target_symbols.shape[3])) std = torch.ones((1, 1, 2, _target_symbols.shape[3])) tx_symbols = torch.nn.Parameter(torch.normal(mean, std)) optimizer = SGD((tx_symbols, ), lr=10e-2, momentum=0.9) tx_chain = Sequential(Upsample(i=8), RRC(alpha=0.35, sps=8, filter_span=8, add_pad=True)) rx_chain = Sequential(RRC(alpha=0.35, sps=8, filter_span=8, add_pad=False), Downsample(offset=8 * 8, d=8)) n_epochs = 151 for i in range(n_epochs): tx_signal = tx_chain(tx_symbols) rx_symbols = rx_chain(tx_signal) loss = torch.mean(evm(rx_symbols, _target_symbols)) if i % 15 == 0: print("Loss @ epoch {}: {:3f}".format(i, loss)) loss.backward() optimizer.step() tx_symbols.grad.zero_()
class KPConvPaper(UnwrappedUnetBasedModel): def __init__(self, option, model_type, dataset, modules): # Extract parameters from the dataset self._num_classes = dataset.num_classes self._weight_classes = dataset.weight_classes self._use_category = getattr(option, "use_category", False) if self._use_category: if not dataset.class_to_segments: raise ValueError( "The dataset needs to specify a class_to_segments property when using category information for segmentation" ) self._class_to_seg = dataset.class_to_segments self._num_categories = len(self._class_to_seg) log.info( "Using category information for the predictions with %i categories", self._num_categories) else: self._num_categories = 0 # Assemble encoder / decoder UnwrappedUnetBasedModel.__init__(self, option, model_type, dataset, modules) # Build final MLP last_mlp_opt = option.mlp_cls if self._use_category: self.FC_layer = MultiHeadClassifier( last_mlp_opt.nn[0], self._class_to_seg, dropout_proba=last_mlp_opt.dropout, bn_momentum=last_mlp_opt.bn_momentum, ) else: in_feat = last_mlp_opt.nn[0] + self._num_categories self.FC_layer = Sequential() for i in range(1, len(last_mlp_opt.nn)): self.FC_layer.add_module( str(i), Sequential(*[ Linear(in_feat, last_mlp_opt.nn[i], bias=False), FastBatchNorm1d(last_mlp_opt.nn[i], momentum=last_mlp_opt.bn_momentum), LeakyReLU(0.2), ]), ) in_feat = last_mlp_opt.nn[i] if last_mlp_opt.dropout: self.FC_layer.add_module("Dropout", Dropout(p=last_mlp_opt.dropout)) self.FC_layer.add_module( "Class", Lin(in_feat, self._num_classes, bias=False)) self.FC_layer.add_module("Softmax", nn.LogSoftmax(-1)) self.loss_names = ["loss_seg"] self.lambda_reg = self.get_from_opt(option, ["loss_weights", "lambda_reg"]) if self.lambda_reg: self.loss_names += ["loss_reg"] self.lambda_internal_losses = self.get_from_opt( option, ["loss_weights", "lambda_internal_losses"]) self.visual_names = ["data_visual"] def set_input(self, data, device): """Unpack input data from the dataloader and perform necessary pre-processing steps. Parameters: input: a dictionary that contains the data itself and its metadata information. """ data = data.to(device) data.x = add_ones(data.pos, data.x, True) if isinstance(data, MultiScaleBatch): self.pre_computed = data.multiscale self.upsample = data.upsample del data.upsample del data.multiscale else: self.upsample = None self.pre_computed = None self.input = data self.labels = data.y self.batch_idx = data.batch if self._use_category: self.category = data.category def forward(self, *args, **kwargs) -> Any: """Run forward pass. This will be called by both functions <optimize_parameters> and <test>.""" stack_down = [] data = self.input for i in range(len(self.down_modules) - 1): data = self.down_modules[i](data, precomputed=self.pre_computed) stack_down.append(data) data = self.down_modules[-1](data, precomputed=self.pre_computed) innermost = False if not isinstance(self.inner_modules[0], Identity): stack_down.append(data) data = self.inner_modules[0](data) innermost = True for i in range(len(self.up_modules)): if i == 0 and innermost: data = self.up_modules[i]((data, stack_down.pop())) else: data = self.up_modules[i]((data, stack_down.pop()), precomputed=self.upsample) last_feature = data.x if self._use_category: self.output = self.FC_layer(last_feature, self.category) else: self.output = self.FC_layer(last_feature) if self.labels is not None: self.compute_loss() self.data_visual = self.input self.data_visual.pred = torch.max(self.output, -1)[1] return self.output def compute_loss(self): if self._weight_classes is not None: self._weight_classes = self._weight_classes.to(self.output.device) self.loss = 0 # Get regularization on weights if self.lambda_reg: self.loss_reg = self.get_regularization_loss( regularizer_type="l2", lambda_reg=self.lambda_reg) self.loss += self.loss_reg # Collect internal losses and set them with self and them to self for later tracking if self.lambda_internal_losses: self.loss += self.collect_internal_losses( lambda_weight=self.lambda_internal_losses) # Final cross entrop loss self.loss_seg = F.nll_loss(self.output, self.labels, weight=self._weight_classes, ignore_index=IGNORE_LABEL) self.loss += self.loss_seg def backward(self): """Calculate losses, gradients, and update network weights; called in every training iteration""" # caculate the intermediate results if necessary; here self.output has been computed during function <forward> # calculate loss given the input and intermediate results self.loss.backward() # calculate gradients of network G w.r.t. loss_G
def __init__(self): super(Net, self).__init__() #name self.name = "2layer" #optimizer self.lr = 0.001 self.optimizer_name = 'Adam' #data self.data_name = "Geometry" self.batch_size = 20 self.nr_points = 1000 self.nr_classes = 10 if self.data_name == 'ModelNet10' else 40 #train_info self.max_epochs = 1000 self.save_every = 100 #model self.k = 20 self.l = 7 self.filter_nr = 32 self.kernel_size = 7 self.dsc3d = DirectionalSplineConv3D(filter_nr=self.filter_nr, kernel_size=self.kernel_size, l=self.l, k=self.k) # self.in_size = self.filter_nr * 3 + 3 self.out_size = 64 layers = [] layers.append(Linear(self.in_size, 128)) layers.append(ReLU()) layers.append(Linear(128, self.out_size)) layers.append(ReLU()) dense3dnet = Sequential(*layers) self.dd = DirectionalDense3D(l=self.l, k=self.k, in_size=self.in_size, mlp=dense3dnet, out_size=self.out_size, with_pos=True) # self.in_size_2 = self.out_size * 3 self.out_size_2 = 256 layers2 = [] layers2.append(Linear(self.in_size_2, 128)) layers2.append(ReLU()) layers2.append(Linear(128, self.out_size_2)) layers2.append(ReLU()) dense3dnet2 = Sequential(*layers2) self.dd2 = DirectionalDense(l=self.l, k=self.k, in_size=self.in_size_2, mlp=dense3dnet2, out_size=self.out_size_2, with_pos=False) self.nn1 = torch.nn.Linear(self.out_size_2, 1024) self.nn2 = torch.nn.Linear(1024, self.nr_classes) self.sm = torch.nn.LogSoftmax(dim=1)