def __init__(self, width_mult=1.0, classifier=True, classifier_out_features=1000): super(MobileNetV3, self).__init__() conv_0_0 = nn.Conv2d(in_channels=3, out_channels=int(16 * width_mult), kernel_size=(3, 3), stride=2, padding=3 // 2, bias=False) conv_0_1 = nn.BatchNorm2d(num_features=int(16 * width_mult)) conv_0_2 = nn.Hardswish(inplace=True) conv_1_0 = Bottleneck(in_channels=int(16 * width_mult), out_channels=int(16 * width_mult), dw_kernel_size=(3, 3), expand_size=16, squeeze_excite=True, nonlinearity='relu', stride=2) conv_2_0 = Bottleneck(in_channels=int(16 * width_mult), out_channels=int(24 * width_mult), dw_kernel_size=(3, 3), expand_size=72, squeeze_excite=False, nonlinearity='relu', stride=2) conv_3_0 = Bottleneck(in_channels=int(24 * width_mult), out_channels=int(24 * width_mult), dw_kernel_size=(3, 3), expand_size=88, squeeze_excite=False, nonlinearity='relu', stride=1) conv_4_0 = Bottleneck(in_channels=int(24 * width_mult), out_channels=int(40 * width_mult), dw_kernel_size=(5, 5), expand_size=96, squeeze_excite=True, nonlinearity='hardswish', stride=2) conv_5_0 = Bottleneck(in_channels=int(40 * width_mult), out_channels=int(40 * width_mult), dw_kernel_size=(5, 5), expand_size=240, squeeze_excite=True, nonlinearity='hardswish', stride=1) conv_6_0 = Bottleneck(in_channels=int(40 * width_mult), out_channels=int(40 * width_mult), dw_kernel_size=(5, 5), expand_size=240, squeeze_excite=True, nonlinearity='hardswish', stride=1) conv_7_0 = Bottleneck(in_channels=int(40 * width_mult), out_channels=int(48 * width_mult), dw_kernel_size=(5, 5), expand_size=120, squeeze_excite=True, nonlinearity='hardswish', stride=1) conv_8_0 = Bottleneck(in_channels=int(48 * width_mult), out_channels=int(48 * width_mult), dw_kernel_size=(5, 5), expand_size=144, squeeze_excite=True, nonlinearity='hardswish', stride=1) conv_9_0 = Bottleneck(in_channels=int(48 * width_mult), out_channels=int(96 * width_mult), dw_kernel_size=(5, 5), expand_size=288, squeeze_excite=True, nonlinearity='hardswish', stride=2) conv_10_0 = Bottleneck(in_channels=int(96 * width_mult), out_channels=int(96 * width_mult), dw_kernel_size=(5, 5), expand_size=576, squeeze_excite=True, nonlinearity='hardswish', stride=1) conv_11_0 = Bottleneck(in_channels=int(96 * width_mult), out_channels=int(96 * width_mult), dw_kernel_size=(5, 5), expand_size=576, squeeze_excite=True, nonlinearity='hardswish', stride=1) conv_12_0 = nn.Conv2d(in_channels=int(96 * width_mult), out_channels=int(576 * width_mult), kernel_size=(1, 1), bias=False) conv_12_1 = nn.Hardswish(inplace=True) conv_12_2 = nn.BatchNorm2d(num_features=int(576 * width_mult)) self.features = nn.Sequential(conv_0_0, conv_0_1, conv_0_2, conv_1_0, conv_2_0, conv_3_0, conv_4_0, conv_5_0, conv_6_0, conv_7_0, conv_8_0, conv_9_0, conv_10_0, conv_11_0, conv_12_0, conv_12_1, conv_12_2) if classifier: self.classifiers = nn.Sequential( nn.AdaptiveAvgPool2d(output_size=1), nn.Flatten(start_dim=1), nn.Linear(int(576 * width_mult), int(1024 * width_mult)), nn.Dropout(p=0.2), nn.Linear(int(1024 * width_mult), classifier_out_features)) else: self.classifiers = nn.Identity()
ACTIONS = 4 # number of valid actions GAMMA = 0.99 # decay rate of past observations OBSERVE = 3000. # timesteps to observe before training EXPLORE = 2000000. # frames over which to anneal epsilon FINAL_EPSILON = 0.0001 # final value of epsilon INITIAL_EPSILON = 0.1 # starting value of epsilon REPLAY_MEMORY = 50000 # number of previous transitions to remember BATCH = 128 # size of minibatch LEARNING_RATE = 1e-6 # learning rate weightfile = 'weight2048.pt' # if gpu is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = nn.Sequential(nn.Conv2d(1, 1024, 2), nn.ReLU(), nn.Conv2d(1024, 1024, 2), nn.ReLU(), nn.Flatten(), nn.Linear(4096, 1024), nn.ReLU(), nn.Linear(1024, 256), nn.ReLU(), nn.Linear(256, 4), nn.ReLU()).to(device) optimizer = torch.optim.Adam(net.parameters(), lr=LEARNING_RATE) try: net.load_state_dict(torch.load(weightfile)) net.eval() print("Successfully loaded:") except: print("Could not find old network weights") def trainNetwork(model): game_state = game.GameState() # store the previous observations in replay memory
def __init__(self, n_base_channels=16): super(PoseNet, self).__init__() self.vgg_part = nn.ModuleList([ VggBlock(in_channels=3, out_channels=n_base_channels, kernel_size=7, padding=3, maxpool=False), # 16 VggBlock(in_channels=n_base_channels, out_channels=n_base_channels, kernel_size=7, padding=3, maxpool=True), # 16 VggBlock(in_channels=n_base_channels, out_channels=n_base_channels * 2, kernel_size=5, padding=2, maxpool=False), # 32 VggBlock(in_channels=n_base_channels * 2, out_channels=n_base_channels * 2, kernel_size=5, padding=2, maxpool=True), # 32 VggBlock(in_channels=n_base_channels * 2, out_channels=n_base_channels * 4, kernel_size=3, padding=1, maxpool=False), # 64 VggBlock(in_channels=n_base_channels * 4, out_channels=n_base_channels * 4, kernel_size=3, padding=1, maxpool=True), # 64 VggBlock(in_channels=n_base_channels * 4, out_channels=n_base_channels * 8, kernel_size=3, padding=1, maxpool=False), # 128 VggBlock(in_channels=n_base_channels * 8, out_channels=n_base_channels * 8, kernel_size=3, padding=1, maxpool=True), # 128 VggBlock(in_channels=n_base_channels * 8, out_channels=n_base_channels * 16, kernel_size=3, padding=1, maxpool=False), # 256 VggBlock(in_channels=n_base_channels * 16, out_channels=n_base_channels * 16, kernel_size=3, padding=1, maxpool=True), # 256 VggBlock(in_channels=n_base_channels * 16, out_channels=n_base_channels * 16, kernel_size=3, padding=1, maxpool=False), # 256 VggBlock(in_channels=n_base_channels * 16, out_channels=n_base_channels * 16, kernel_size=3, padding=1, maxpool=True), # 256 VggBlock(in_channels=n_base_channels * 16, out_channels=n_base_channels * 32, kernel_size=3, padding=1, maxpool=False), # 512 VggBlock(in_channels=n_base_channels * 32, out_channels=n_base_channels * 32, kernel_size=3, padding=1, maxpool=True), # 512 ]) self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) self.flatten = nn.Flatten() self.rot1 = nn.Linear(n_base_channels * 32 * 7 * 7, 512) self.rot2 = nn.Linear(512, 512) self.rot3 = nn.Linear(512, 3) self.transl1 = nn.Linear(n_base_channels * 32 * 7 * 7, 512) self.transl2 = nn.Linear(512, 512) self.transl3 = nn.Linear(512, 3)
def graft_net(args): global logger_net logger_net = Logger('log/graft_net_{}_{}_{}perclass.txt'.\ format(args.dataset, time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()), args.num_per_class)) # ---------------------- Datasets ---------------------- if args.dataset == 'CIFAR10': train_loader = DataLoader(CIFAR10Few(args.data_path, args.num_per_class, transform=get_transformer( args.dataset, cropsize=32, crop_padding=4, hflip=True)), batch_size=args.batch_size, num_workers=4, shuffle=True) elif args.dataset == 'CIFAR100': train_loader = DataLoader(CIFAR100Few(args.data_path, args.num_per_class, transform=get_transformer( args.dataset, cropsize=32, crop_padding=4, hflip=True)), batch_size=args.batch_size, num_workers=4, shuffle=True) test_loader = DataLoader(get_dataset(args, train_flag=False), batch_size=args.batch_size, num_workers=4, shuffle=False) cfg_t = cfgs['vgg16'] cfg_s = cfgs['vgg16-graft'] cfg_blocks_t = split_block(cfg_t) cfg_blocks_s = split_block(cfg_s) num_block = len(block_graft_ids) # ---------------------- Adaption ---------------------- adaptions_t2s = [ nn.Conv2d(cfg_blocks_t[block_graft_ids[i]][-2], cfg_blocks_s[block_graft_ids[i]][-2], kernel_size=1).cuda() for i in range(0, num_block - 1) ] adaptions_s2t = [ nn.Conv2d(cfg_blocks_s[block_graft_ids[i]][-2], cfg_blocks_t[block_graft_ids[i]][-2], kernel_size=1).cuda() for i in range(0, num_block - 1) ] # ---------------------- Teacher ---------------------- teacher = vgg_stock(cfg_t, args.dataset, args.num_class) params_t = torch.load(args.ckpt) teacher.cuda().eval() teacher.load_state_dict(params_t) # ---------------------- Blocks ---------------------- params_s = {} for key in params_t.keys(): key_split = key.split('.') if key_split[0] == 'features' and \ key_split[1] in ['0', '1', '2']: params_s[key] = params_t[key] student = vgg_bw(cfg_s, True, args.dataset, args.num_class) student.cuda().train() student.load_state_dict(params_s, strict=False) blocks_s = [student.features[i] for i in block_graft_ids[:-1]] blocks_s += [nn.Sequential(nn.Flatten().cuda(), student.classifier)] blocks = [] for block_id in range(num_block): blocks.append( warp_block(blocks_s, block_id, adaptions_t2s, adaptions_s2t).cuda()) params = torch.load('ckpt/student/vgg16-student-graft-block-{}-{}perclass.pth'.\ format(args.dataset, args.num_per_class)) for block_id in range(num_block): blocks[block_id].load_state_dict(params['block-{}'.format(block_id)]) for i in range(num_block - 1): block = nn.Sequential(*blocks[:(i + 2)]) optimizer = optim.Adam(block.parameters(), lr=0.0001) scion_len = sum(blocks_s_len[:(i + 2)]) accuracy_best_block = 0.0 params_best_save = None for epoch in range(args.num_epoch[i]): if logger_net: logger_net.write('Epoch', epoch) loss_value = train_epoch(args, teacher, block, scion_len, train_loader, optimizer) accuracy = test(teacher, test_loader) if accuracy > accuracy_best_block: accuracy_best_block = accuracy params_tmp = block.cpu().state_dict() params_best_save = params_tmp.copy() block.cuda() if epoch == (args.num_epoch[i] - 1) and \ i == (num_block - 2): block.load_state_dict(params_best_save) if logger_net: logger_net.write('Accuracy-length-{}'.format(scion_len), accuracy) if logger_net: logger_net.write('Student Best Accuracy', accuracy_best_block) with open('ckpt/student/vgg16-student-graft-net-{}-{}perclass.pth'\ .format(args.dataset, args.num_per_class), 'wb') as f: torch.save(block.state_dict(), f) if logger_net: logger_net.close() return accuracy_best_block
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): super(Classify, self).__init__() self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) self.flat = nn.Flatten()
def get_classifier(self): return nn.Sequential(self.avgpool, nn.Flatten(start_dim=1), self.fc)
def forward(self,x): x = nn.Flatten(x) x = F.relu(self.fc1(x)) x = self.fc2(x) return x
#!/usr/bin/env python # coding: utf-8 # In[1]: #4-3 Concise Implementation of Multilayer Perceptrons import torch from torch import nn from pytorch_d2l.d2l import torch as d2l #Model: hidden layer 256 units, output layer and ReLU activation function. net = nn.Sequential(nn.Flatten(), nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 10)) def init_weights(m): if type(m) == nn.Linear: nn.init.normal_(m.weight, std=0.01) net.apply(init_weights) #Parameters, hyperparameters batch_size, lr, num_epochs = 256, 0.1, 10 loss = nn.CrossEntropyLoss() trainer = torch.optim.SGD(net.parameters(), lr=lr) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
def __init__(self, lr, inputDim, outputDim): super(forwardNet, self).__init__() modelStr = [ nn.Linear(inputDim, 300), nn.BatchNorm1d(300), nn.LeakyReLU(), nn.Linear(300, 300), nn.BatchNorm1d(300), nn.LeakyReLU(), nn.Dropout(p=0.5), nn.Linear(300, 300), nn.BatchNorm1d(300), nn.LeakyReLU(), nn.Dropout(p=0.5), nn.Linear(300, 300), nn.BatchNorm1d(300), nn.LeakyReLU(), nn.Dropout(p=0.5), nn.Linear(300, 300), nn.BatchNorm1d(300), nn.LeakyReLU(), nn.Dropout(p=0.5), nn.Linear(300, outputDimS), nn.Sigmoid() ] modelHole = [ nn.Conv2d(3, 32, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(32, 128, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(1, 2), # output: 128x5x10 nn.Conv2d(128, 200, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(200, 200, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(1, 2), # output: 200x5x5 nn.Conv2d(200, 250, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(250, 250, kernel_size=3, stride=1, padding=1), nn.ReLU(), #nn.MaxPool2d(2, 2), # output: 250 x 4 x 4 nn.Flatten(), nn.Linear(2500, 1000), nn.ReLU(), nn.Linear(1000, 500), nn.ReLU(), nn.Dropout(0.2), nn.Linear(500, outputDimH) ] outputDecoder = [ nn.Linear(outputDimH + outputDimS, 100), nn.BatchNorm1d(100), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(100, 200), nn.BatchNorm1d(200), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(200, 300), nn.BatchNorm1d(300), nn.LeakyReLU(), nn.Dropout(p=0.5), nn.Linear(300, outputDim), nn.ReLU() ] self.modelStr = nn.Sequential(*modelStr) self.modelHole = nn.Sequential(*modelHole) self.outputDecoder = nn.Sequential(*outputDecoder) self.lr = lr self.decayRate = decayRate self.momentum = momentum self.optim = torch.optim.RMSprop( list(self.modelStr.parameters()) + list(self.modelHole.parameters()) + list(self.outputDecoder.parameters()), self.lr, self.decayRate) self.criterion = nn.MSELoss() self.accuracy = nn.L1Loss()
def __init__(self, input_size, num_classes, l0_strength=7e-6, l2_strength=0, droprate_init=0.5, learn_weight=True, random_weight=True, decay_mean=False, deterministic=False, use_batch_norm=True, cnn_out_channels=(64, 64), kernel_size=5, linear_units=1000, maxpool_stride=2, bn_track_running_stats=True): feature_map_sidelength = ((( (input_size[1] - kernel_size + 1) / maxpool_stride) - kernel_size + 1) / maxpool_stride) assert (feature_map_sidelength == int(feature_map_sidelength)) feature_map_sidelength = int(feature_map_sidelength) modules = [ # ------------- # Conv Block # ------------- ("cnn1", VDropConv2d(input_size[0], cnn_out_channels[0], kernel_size)), ("cnn1_maxpool", nn.MaxPool2d(maxpool_stride)), ] if use_batch_norm: modules.append( ("cnn1_bn", nn.BatchNorm2d(cnn_out_channels[0], affine=False, track_running_stats=bn_track_running_stats))) modules += [ ("cnn1_relu", nn.ReLU(inplace=True)), # ------------- # Conv Block # ------------- ("cnn2", VDropConv2d(cnn_out_channels[0], cnn_out_channels[1], kernel_size)), ("cnn2_maxpool", nn.MaxPool2d(maxpool_stride)), ] if use_batch_norm: modules.append( ("cnn2_bn", nn.BatchNorm2d(cnn_out_channels[1], affine=False, track_running_stats=bn_track_running_stats))) modules += [ ("cnn2_relu", nn.ReLU(inplace=True)), ("flatten", nn.Flatten()), # ------------- # Linear Block # ------------- ("fc1", VDropLinear((feature_map_sidelength**2) * cnn_out_channels[1], linear_units)), ] if use_batch_norm: modules.append( ("fc1_bn", nn.BatchNorm1d(linear_units, affine=False, track_running_stats=bn_track_running_stats))) modules += [ ("fc1_relu", nn.ReLU(inplace=True)), # ------------- # Output Layer # ------------- ("fc2", VDropLinear(linear_units, num_classes)), ] super().__init__(OrderedDict(modules))
def __init__(self, embedding_dim, dropout_rate, num_class, vocab_size=0, seq_length=0, num_blocks=3, num_filters=250, kernel_sizes=3, embedding_matrix=None, requires_grads=False): ''' initialization ⚠️In default,the way to initialize embedding is loading pretrained embedding look-up table! :param embedding_dim: embedding dim :param num_class: the number of label :param dropout_rate: dropout rate :param vocab_size: vocabulary size :param seq_length: max length of sequence after padding :param num_blocks: the number of block in DPCNN model :param num_filters: the number of filters of conv kernel :param kernel_sizes: conv kernel size :param embedding_matrix: pretrained embedding look up table :param requires_grads: whether to update gradient of embedding in training stage ''' super(DPCNN, self).__init__() self.vocab_size = vocab_size self.seq_length = seq_length self.embedding_dim = embedding_dim self.num_filters = num_filters self.dropout_rate=dropout_rate self.num_blocks = num_blocks self.num_class = num_class self.kernel_sizes = kernel_sizes self.embedding_matrix = embedding_matrix self.requires_grads = requires_grads # embedding if self.embedding_matrix is None: self.embedding = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=self.embedding_dim, padding_idx=0) else: self.embedding = nn.Embedding.from_pretrained(self.embedding_matrix, freeze=self.requires_grads) self.vocab_size = self.embedding_matrix.shape[0] # text region embedding self.region_embedding = nn.Conv2d(in_channels=1, out_channels=self.num_filters, stride=1, kernel_size=(self.kernel_sizes, self.embedding_dim)) # two conv self.conv2d1 = nn.Conv2d(in_channels=self.num_filters, out_channels=self.num_filters, stride=2, kernel_size=(self.kernel_sizes, 1), padding=0) self.conv2d2 = nn.Conv2d(in_channels=self.num_filters, out_channels=self.num_filters, stride=2, kernel_size=(self.kernel_sizes, 1), padding=0) self.padding1 = nn.ZeroPad2d((0, 0, (self.kernel_sizes-1)//2, (self.kernel_sizes-1)-((self.kernel_sizes-1)//2))) # top bottom self.padding2 = nn.ZeroPad2d((0, 0, 0, self.kernel_sizes-2)) # bottom # one block self.block_max_pool = nn.MaxPool2d(kernel_size=(self.kernel_sizes, 1), stride=2) self.conv2d3 = nn.Conv2d(in_channels=self.num_filters, out_channels=self.num_filters, stride=1, kernel_size=(self.kernel_sizes, 1), padding=0) self.conv2d4 = nn.Conv2d(in_channels=self.num_filters, out_channels=self.num_filters, stride=1, kernel_size=(self.kernel_sizes, 1), padding=0) # final pool and softmax self.flatten = nn.Flatten() self.dropout=nn.Dropout(p=self.dropout_rate) self.classifier = nn.Linear(in_features=self.num_filters, out_features=self.num_class)
def __init__( self, input_shape=(1, 32, 32), cnn_out_channels=(64, 64), num_classes=12, use_batch_norm=True, z_logvar_init=-10, vdrop_data_class=VDropCentralData, kernel_size=5, linear_units=1000, maxpool_stride=2, bn_track_running_stats=True, conv_target_density=(1.0, 1.0), linear_target_density=(1.0, 1.0), ): feature_map_sidelength = (( (input_shape[1] - kernel_size + 1) / maxpool_stride) - kernel_size + 1) / maxpool_stride vdrop_data = vdrop_data_class(z_logvar_init=z_logvar_init) assert feature_map_sidelength == int(feature_map_sidelength) feature_map_sidelength = int(feature_map_sidelength) modules = [ # ------------- # Conv Block # ------------- ( "vdrop_cnn1", prunable_vdrop_conv2d( input_shape[0], cnn_out_channels[0], kernel_size, vdrop_data, target_density=conv_target_density[0], ), ), ("cnn1_maxpool", nn.MaxPool2d(maxpool_stride)), ] if use_batch_norm: modules.append(( "cnn1_bn", nn.BatchNorm2d( cnn_out_channels[0], affine=False, track_running_stats=bn_track_running_stats, ), )) modules += [ ("cnn1_relu", nn.ReLU(inplace=True)), # ------------- # Conv Block # ------------- ( "vdrop_cnn2", prunable_vdrop_conv2d( cnn_out_channels[0], cnn_out_channels[1], kernel_size, vdrop_data, target_density=conv_target_density[1], ), ), ("cnn2_maxpool", nn.MaxPool2d(maxpool_stride)), ] if use_batch_norm: modules.append(( "cnn2_bn", nn.BatchNorm2d( cnn_out_channels[1], affine=False, track_running_stats=bn_track_running_stats, ), )) modules += [ ("cnn2_relu", nn.ReLU(inplace=True)), ("flatten", nn.Flatten()), # ------------- # Linear Block # ------------- ( "vdrop_fc1", prunable_vdrop_linear( (feature_map_sidelength**2) * cnn_out_channels[1], linear_units, vdrop_data, target_density=linear_target_density[0], ), ), ] if use_batch_norm: modules.append(( "fc1_bn", nn.BatchNorm1d( linear_units, affine=False, track_running_stats=bn_track_running_stats, ), )) modules += [ ("fc1_relu", nn.ReLU(inplace=True)), # ------------- # Output Layer # ------------- ( "vdrop_fc2", prunable_vdrop_linear( linear_units, num_classes, vdrop_data, target_density=linear_target_density[1], ), ), ] super().__init__(OrderedDict(modules)) vdrop_data.finalize() self.vdrop_data = vdrop_data
def __init__(self): super(Convolution3D, self).__init__() self.Convolution1 = nn.Conv3d(in_channels=3, out_channels=64, kernel_size=(3, 3, 3), stride=1, padding=(1, 0, 0), dilation=1, groups=1, bias=True, padding_mode='zeros') self.BatchN1 = nn.BatchNorm3d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.MaxPooling1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=(0, 0, 0), dilation=1, return_indices=False, ceil_mode=False) self.MaxPooling2 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=(0, 0, 0), dilation=1, return_indices=False, ceil_mode=False) self.Convolution2 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=(1, 0, 0)) self.BatchN2 = nn.BatchNorm3d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.ResConvolution1 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=(1, 1, 1)) self.averagePool1 = nn.AvgPool3d(kernel_size=1, stride=1, padding=(0, 0, 0)) self.ResBatchN1 = nn.BatchNorm3d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.Convolution3 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=(1, 0, 0)) self.BatchN3 = nn.BatchNorm3d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.ResConvolution2 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=(1, 1, 1)) self.averagePool2 = nn.AvgPool3d(kernel_size=1, stride=1, padding=(0, 0, 0)) self.ResBatchN2 = nn.BatchNorm3d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.Convolution4 = nn.Conv3d(in_channels=64, out_channels=8, kernel_size=(3, 3, 3), stride=1, padding=(1, 0, 0), dilation=1, groups=1, bias=True, padding_mode='zeros') self.BatchN4 = nn.BatchNorm3d(num_features=8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.Convolution5 = nn.Conv3d(in_channels=8, out_channels=8, kernel_size=(3, 3, 3), stride=1, padding=(1, 0, 0), dilation=1, groups=1, bias=True, padding_mode='zeros') self.BatchN5 = nn.BatchNorm3d(num_features=8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.Convolution6 = nn.Conv3d(in_channels=8, out_channels=8, kernel_size=(3, 3, 3), stride=1, padding=(1, 0, 0), dilation=1, groups=1, bias=True, padding_mode='zeros') self.BatchN6 = nn.BatchNorm3d(num_features=8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.Flatten1 = nn.Flatten(start_dim=2) self.LSTM1 = nn.LSTM(input_size=10488, hidden_size=64, num_layers=1, batch_first=True) self.LSTM2 = nn.LSTM(input_size=64, hidden_size=16, num_layers=1, batch_first=True) self.fc1 = nn.Linear(in_features=16, out_features=512, bias=True) self.fc2 = nn.Linear(in_features=512, out_features=128, bias=True) self.fc3 = nn.Linear(in_features=128, out_features=64, bias=True) self.fc4 = nn.Linear(in_features=64, out_features=16, bias=True) self.fc5 = nn.Linear(in_features=16, out_features=1, bias=True)
nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(16 * 5 * 5, 120), nn.ReLU(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10)) loss 0.203, train acc 0.923, test acc 0.897 50960.2 examples/sec on cuda:0 """ net = torch.nn.Sequential(Reshape(), nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(16 * 5 * 5, 120), nn.ReLU(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10)) """ Original LeNet net = torch.nn.Sequential( Reshape(), nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10)) """ X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
def __init__(self, device, in_nc=3, kernel_size=4, nf=48, im_size=256): """ :param in_nc: number of in channels :param kernel_size: kernel size :param nf: number of convolution filters after 1 layer """ super(InpaintingDiscriminator, self).__init__() self.patch_dis = nn.ModuleList([ SNBlock(in_channels=in_nc, out_channels=nf, kernel_size=kernel_size, stride=2, padding=get_pad(im_size, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf, out_channels=nf * 2, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 2, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 2, out_channels=nf * 2, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 4, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 2, out_channels=nf * 4, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 8, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 4, out_channels=nf * 4, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 16, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 4, out_channels=nf * 4, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 32, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), nn.Flatten(), nn.Linear(nf * 4 * im_size // 64 * im_size // 64, 512) ]) self.flat = nn.Flatten() self.edge_dis = nn.Sequential( SobelFilter(device, in_nc=3, filter_c=1, padding=get_pad(256, 3, 1), stride=1), SNBlock(in_channels=2, out_channels=nf // 2, kernel_size=kernel_size, stride=4, padding=get_pad(im_size, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf // 2, out_channels=nf, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 4, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf, out_channels=nf * 2, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 8, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 2, out_channels=nf * 4, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 16, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 4, out_channels=nf * 4, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 32, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), SNBlock(in_channels=nf * 4, out_channels=nf * 4, kernel_size=kernel_size, stride=2, padding=get_pad(im_size // 64, kernel_size, 2), norm='in', activation='relu', pad_type='zero'), nn.Flatten(), nn.Linear(nf * 4 * im_size // 128 * im_size // 128, 512)) self.out = nn.Sequential(_activation('relu'), nn.Linear(1024, 1))
data, labels = next(train_it) #imshow(torchvision.utils.make_grid(data)) # Resnet # resnet = models.resnet152(pretrained=True) # num_ftrs_resnet = resnet.fc.in_features # Number of features before FC # modules = list(resnet.children())[:-1] # resnet = nn.Sequential(*modules) # for p in resnet.parameters(): # p.requires_grad = False resnet = models.resnet152(pretrained=True) num_ftrs_resnet = resnet.fc.in_features for param in resnet.parameters(): param.requires_grad = False resnet.fc = nn.Flatten() # Vgg16 vgg16 = models.vgg16(pretrained=True) vgg16 = vgg16.features for p in vgg16.parameters(): p.requires_grad = False num_ftrs_vgg16 = 512 * 7 * 7 # Choose extractor feature_extractor = resnet num_ftrs = num_ftrs_resnet feature_extractor = feature_extractor.to(device) # summary(feature_extractor, input_size=(TRAIN_BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE))
import torch import torch.nn as nn from torch.utils.data import random_split from torchvision.datasets import MNIST from torchvision.transforms import ToTensor from poutyne import Model # Instanciate the MNIST dataset train_valid_dataset = MNIST('./datasets', train=True, download=True, transform=ToTensor()) test_dataset = MNIST('./datasets', train=False, download=True, transform=ToTensor()) train_dataset, valid_dataset = random_split(train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)) # Select CUDA device if available cuda_device = 0 device = torch.device('cuda:%d' % cuda_device if torch.cuda.is_available() else 'cpu') # Define the network network = nn.Sequential( nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10) ) epochs = 5 # Define the Model and train model = Model(network, 'sgd', 'cross_entropy', batch_metrics=['accuracy'], epoch_metrics=['f1'], device=device) model.fit_dataset(train_dataset, valid_dataset, epochs=epochs)
def __init__(self, channels_prev: int, num_classes: int): super().__init__() self.pool = nn.AvgPool2d(7) self.flat = nn.Flatten() self.fc = nn.Linear(channels_prev, num_classes)
def build_resnet(layers: List[int], num_classes: int = 1000, inplace: bool = False ) -> nn.Sequential: """Builds a ResNet as a simple sequential model. Note: The implementation is copied from :mod:`torchvision.models.resnet`. """ inplanes = 64 def make_layer(planes: int, blocks: int, stride: int = 1, inplace: bool = False, ) -> nn.Sequential: nonlocal inplanes downsample = None if stride != 1 or inplanes != planes * 4: downsample = nn.Sequential( nn.Conv2d(inplanes, planes * 4, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * 4), ) layers = [] layers.append(bottleneck(inplanes, planes, stride, downsample, inplace)) inplanes = planes * 4 for _ in range(1, blocks): layers.append(bottleneck(inplanes, planes, inplace=inplace)) return nn.Sequential(*layers) # Build ResNet as a sequential model. model = nn.Sequential(OrderedDict([ ('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', nn.BatchNorm2d(64)), ('relu', nn.ReLU()), ('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ('layer1', make_layer(64, layers[0], inplace=inplace)), ('layer2', make_layer(128, layers[1], stride=2, inplace=inplace)), ('layer3', make_layer(256, layers[2], stride=2, inplace=inplace)), ('layer4', make_layer(512, layers[3], stride=2, inplace=inplace)), ('avgpool', nn.AdaptiveAvgPool2d((1, 1))), ('flat', nn.Flatten()), ('fc', nn.Linear(512 * 4, num_classes)), ])) # Flatten nested sequentials. model = flatten_sequential(model) # Initialize weights for Conv2d and BatchNorm2d layers. # Stolen from torchvision-0.4.0. def init_weight(m: nn.Module) -> None: if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') return if isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) return model.apply(init_weight) return model
model = NeuralNetwork() print(model) X = torch.rand(1, 28, 28) logits = model(X) pred_probab = nn.Softmax(dim=1)(logits) y_pred = pred_probab.argmax(1) print(f"Predicted class: {y_pred}") input_image = torch.rand(3, 28, 28) print(input_image.size()) flatten = nn.Flatten() flat_image = flatten(input_image) print(flat_image.size()) layer1 = nn.Linear(in_features=28 * 28, out_features=20) hidden1 = layer1(flat_image) print(hidden1.size()) print(f"Before ReLU: {hidden1}\n\n") hidden1 = nn.ReLU()(hidden1) print(f"After ReLU: {hidden1}") seq_modules = nn.Sequential(flatten, layer1, nn.ReLU(), nn.Linear(20, 10)) input_image = torch.rand(3, 28, 28) logits = seq_modules(input_image)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", type=str, default="BreakoutNoFrameskip-v4") parser.add_argument( "--outdir", type=str, default="results", help=("Directory path to save output files." " If it does not exist, it will be created."), ) parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 31)") parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--demo", action="store_true", default=False) parser.add_argument("--load-pretrained", action="store_true", default=False) parser.add_argument("--pretrained-type", type=str, default="best", choices=["best", "final"]) parser.add_argument("--load", type=str, default=None) parser.add_argument("--final-exploration-frames", type=int, default=10**6) parser.add_argument("--final-epsilon", type=float, default=0.01) parser.add_argument("--eval-epsilon", type=float, default=0.001) parser.add_argument("--steps", type=int, default=5 * 10**7) parser.add_argument( "--max-frames", type=int, default=30 * 60 * 60, # 30 minutes with 60 fps help="Maximum number of frames for each episode.", ) parser.add_argument("--replay-start-size", type=int, default=5 * 10**4) parser.add_argument("--target-update-interval", type=int, default=10**4) parser.add_argument("--eval-interval", type=int, default=250000) parser.add_argument("--eval-n-steps", type=int, default=125000) parser.add_argument("--update-interval", type=int, default=4) parser.add_argument("--batch-size", type=int, default=32) parser.add_argument( "--log-level", type=int, default=20, help="Logging level. 10:DEBUG, 20:INFO etc.", ) parser.add_argument( "--render", action="store_true", default=False, help="Render env states in a GUI window.", ) parser.add_argument( "--monitor", action="store_true", default=False, help= ("Monitor env. Videos and additional information are saved as output files." ), ) parser.add_argument("--batch-accumulator", type=str, default="mean", choices=["mean", "sum"]) parser.add_argument("--quantile-thresholds-N", type=int, default=64) parser.add_argument("--quantile-thresholds-N-prime", type=int, default=64) parser.add_argument("--quantile-thresholds-K", type=int, default=32) parser.add_argument("--n-best-episodes", type=int, default=200) args = parser.parse_args() import logging logging.basicConfig(level=args.log_level) # Set a random seed used in PFRL. utils.set_random_seed(args.seed) # Set different random seeds for train and test envs. train_seed = args.seed test_seed = 2**31 - 1 - args.seed args.outdir = experiments.prepare_output_dir(args, args.outdir) print("Output files are saved in {}".format(args.outdir)) def make_env(test): # Use different random seeds for train and test envs env_seed = test_seed if test else train_seed env = atari_wrappers.wrap_deepmind( atari_wrappers.make_atari(args.env, max_frames=args.max_frames), episode_life=not test, clip_rewards=not test, ) env.seed(int(env_seed)) if test: # Randomize actions like epsilon-greedy in evaluation as well env = pfrl.wrappers.RandomizeAction(env, args.eval_epsilon) if args.monitor: env = pfrl.wrappers.Monitor( env, args.outdir, mode="evaluation" if test else "training") if args.render: env = pfrl.wrappers.Render(env) return env env = make_env(test=False) eval_env = make_env(test=True) n_actions = env.action_space.n q_func = pfrl.agents.iqn.ImplicitQuantileQFunction( psi=nn.Sequential( nn.Conv2d(4, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), ), phi=nn.Sequential( pfrl.agents.iqn.CosineBasisLinear(64, 3136), nn.ReLU(), ), f=nn.Sequential( nn.Linear(3136, 512), nn.ReLU(), nn.Linear(512, n_actions), ), ) # Use the same hyper parameters as https://arxiv.org/abs/1710.10044 opt = torch.optim.Adam(q_func.parameters(), lr=5e-5, eps=1e-2 / args.batch_size) rbuf = replay_buffers.ReplayBuffer(10**6) explorer = explorers.LinearDecayEpsilonGreedy( 1.0, args.final_epsilon, args.final_exploration_frames, lambda: np.random.randint(n_actions), ) def phi(x): # Feature extractor return np.asarray(x, dtype=np.float32) / 255 agent = pfrl.agents.IQN( q_func, opt, rbuf, gpu=args.gpu, gamma=0.99, explorer=explorer, replay_start_size=args.replay_start_size, target_update_interval=args.target_update_interval, update_interval=args.update_interval, batch_accumulator=args.batch_accumulator, phi=phi, quantile_thresholds_N=args.quantile_thresholds_N, quantile_thresholds_N_prime=args.quantile_thresholds_N_prime, quantile_thresholds_K=args.quantile_thresholds_K, ) if args.load or args.load_pretrained: # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: agent.load(args.load) else: agent.load( utils.download_model("IQN", args.env, model_type=args.pretrained_type)[0]) if args.demo: eval_stats = experiments.eval_performance( env=eval_env, agent=agent, n_steps=args.eval_n_steps, n_episodes=None, ) print("n_steps: {} mean: {} median: {} stdev {}".format( args.eval_n_steps, eval_stats["mean"], eval_stats["median"], eval_stats["stdev"], )) else: experiments.train_agent_with_evaluation( agent=agent, env=env, steps=args.steps, eval_n_steps=args.eval_n_steps, eval_n_episodes=None, eval_interval=args.eval_interval, outdir=args.outdir, save_best_so_far_agent=True, eval_env=eval_env, ) dir_of_best_network = os.path.join(args.outdir, "best") agent.load(dir_of_best_network) # run 200 evaluation episodes, each capped at 30 mins of play stats = experiments.evaluator.eval_performance( env=eval_env, agent=agent, n_steps=None, n_episodes=args.n_best_episodes, max_episode_len=args.max_frames / 4, logger=None, ) with open(os.path.join(args.outdir, "bestscores.json"), "w") as f: json.dump(stats, f) print("The results of the best scoring network:") for stat in stats: print(str(stat) + ":" + str(stats[stat]))
def train_experiment(device): with TemporaryDirectory() as logdir: model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), } runner = dl.SupervisedRunner(input_key="features", output_key="logits", target_key="targets", loss_key="loss") # model training runner.train( engine=dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=1, callbacks=[ dl.AccuracyCallback(input_key="logits", target_key="targets", topk_args=(1, 3, 5)), dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets", num_classes=10), dl.AUCCallback(input_key="logits", target_key="targets"), dl.ConfusionMatrixCallback(input_key="logits", target_key="targets", num_classes=10), ], logdir=logdir, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, verbose=False, load_best_on_end=True, timeit=False, check=False, overfit=False, fp16=False, ddp=False, ) # model inference for prediction in runner.predict_loader(loader=loaders["valid"]): assert prediction["logits"].detach().cpu().numpy().shape[-1] == 10 # model post-processing features_batch = next(iter(loaders["valid"]))[0] # model stochastic weight averaging model.load_state_dict( utils.get_averaged_weights_by_path_mask(logdir=logdir, path_mask="*.pth")) # model tracing utils.trace_model(model=runner.model, batch=features_batch) # model to onnx # utils.onnx_export( # model=runner.model, batch=features_batch, file=f"./{logdir}/mnist.onnx", verbose=False # ) if SETTINGS.quantization_required: # model quantization utils.quantize_model(model=runner.model) if SETTINGS.pruning_required: # model pruning utils.prune_model(model=runner.model, pruning_fn="l1_unstructured", amount=0.8)
def _atari_arch(self, input_shape, action_dim, config): channels = input_shape[0] layers_encoder = [ nn.Conv2d(channels, 32, kernel_size=7, stride=3, padding=2), nn.LeakyReLU(), nn.Conv2d(32, 32, kernel_size=5, stride=3, padding=0), nn.LeakyReLU(), nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=0), nn.LeakyReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), nn.LeakyReLU(), nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0), nn.LeakyReLU(), nn.Conv2d(128, 128, kernel_size=2, stride=1, padding=0), nn.Flatten() ] nn.init.xavier_uniform_(layers_encoder[0].weight) nn.init.xavier_uniform_(layers_encoder[2].weight) nn.init.xavier_uniform_(layers_encoder[4].weight) nn.init.xavier_uniform_(layers_encoder[6].weight) nn.init.xavier_uniform_(layers_encoder[8].weight) nn.init.xavier_uniform_(layers_encoder[10].weight) layers_model = [ Linear(in_features=256, out_features=config.forward_model_h1, bias=True), LeakyReLU(), Linear(in_features=config.forward_model_h1, out_features=config.forward_model_h1, bias=True), LeakyReLU(), Linear(in_features=config.forward_model_h1, out_features=config.forward_model_h2, bias=True), LeakyReLU(), Linear(in_features=config.forward_model_h2, out_features=config.forward_model_h2, bias=True), LeakyReLU(), Linear(in_features=config.forward_model_h2, out_features=128, bias=True) ] nn.init.xavier_uniform_(layers_model[0].weight) nn.init.xavier_uniform_(layers_model[2].weight) nn.init.xavier_uniform_(layers_model[4].weight) nn.init.xavier_uniform_(layers_model[6].weight) nn.init.xavier_uniform_(layers_model[8].weight) return layers_encoder, layers_model #AE arch # def _atari_arch(self, input_shape, action_dim, config): # channels = input_shape[0] # # layers_encoder = [ # nn.Conv2d(channels, 64, kernel_size=7, stride=3, padding=2), # nn.LeakyReLU(), # # nn.Conv2d(64, 64, kernel_size=5, stride=3, padding=0), # nn.LeakyReLU(), # # nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=0), # nn.LeakyReLU(), # # nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=0), # nn.LeakyReLU(), # # nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0), # nn.LeakyReLU(), # # nn.Conv2d(256, 256, kernel_size=2, stride=1, padding=0), # nn.LeakyReLU(), # # nn.Flatten() # ] # # nn.init.xavier_uniform_(layers_encoder[0].weight) # nn.init.xavier_uniform_(layers_encoder[2].weight) # nn.init.xavier_uniform_(layers_encoder[4].weight) # nn.init.xavier_uniform_(layers_encoder[6].weight) # nn.init.xavier_uniform_(layers_encoder[8].weight) # nn.init.xavier_uniform_(layers_encoder[10].weight) # # layers_model = [ # nn.Linear(512, 256), # nn.LeakyReLU(), # nn.Linear(256, 256), # nn.LeakyReLU(), # ] # # nn.init.xavier_uniform_(layers_model[0].weight) # nn.init.xavier_uniform_(layers_model[2].weight) # # layers_decoder = [ # nn.ConvTranspose2d(256, 256, kernel_size=2, stride=1, padding=0), # nn.LeakyReLU(), # # nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, padding=0), # nn.LeakyReLU(), # # nn.ConvTranspose2d(128, 128, kernel_size=3, stride=1, padding=0), # nn.LeakyReLU(), # # nn.ConvTranspose2d(128, 64, kernel_size=5, stride=1, padding=0), # nn.LeakyReLU(), # # nn.ConvTranspose2d(64, 64, kernel_size=5, stride=3, padding=0), # nn.LeakyReLU(), # # nn.ConvTranspose2d(64, channels, kernel_size=7, stride=3, padding=2), # ] # # nn.init.xavier_uniform_(layers_decoder[0].weight) # nn.init.xavier_uniform_(layers_decoder[2].weight) # nn.init.xavier_uniform_(layers_decoder[4].weight) # nn.init.xavier_uniform_(layers_decoder[6].weight) # nn.init.xavier_uniform_(layers_decoder[8].weight) # nn.init.xavier_uniform_(layers_decoder[10].weight) # # return layers_encoder, layers_model, layers_decoder
def __init__(self, in_channels, device, len_sequence, n_conv_layers=3, feed_layers=[256, 128], output_size=None, out_activation=None): """ :param in_channels: number of input channels :param n_conv_layers: how many convolutional layers (output layer excluded) :param feed_layers: list of feedforward layers size :param output_size: None if output does not have to be computed. An integer otherwise. Default None. :param out_activation: if None last layer is linear, else out_activation is used as output function. out_activation must be passed in the form torch.nn.Function(args). If output_size is None this option has no effect. Default None. """ super(CNN1DSpeechWords, self).__init__() self.output_type = OUTPUT_TYPE.ALL_OUTS self.is_recurrent = False self.in_channels = in_channels self.n_conv_layers = n_conv_layers self.feed_layers = feed_layers self.len_sequence = len_sequence self.output_size = output_size self.device = device out_chs = [self.in_channels] + \ [ 40*(i+1) for i in range(n_conv_layers) ] ks = [3**(i + 1) for i in range(n_conv_layers)] self.out_activation = out_activation self.layers = nn.ModuleDict() output_size_conv = self.len_sequence for i in range(self.n_conv_layers): output_size_conv = compute_conv_out_shape_1d( output_size_conv, 0, 1, ks[i], 1) # convolution output_size_conv = compute_conv_out_shape_1d( output_size_conv, 0, 1, 2, 1) # pooling self.layers.update([[ f'conv{i}', nn.Conv1d(out_chs[i], out_chs[i + 1], kernel_size=ks[i], stride=1) ], [f'relu{i}', nn.ReLU() ], [f'pool{i}', nn.MaxPool1d(kernel_size=2, stride=1)]]) self.layers.update([['flatten', nn.Flatten()]]) for i, el in enumerate(feed_layers): if i == 0: input_size = output_size_conv * out_chs[-1] else: input_size = feed_layers[i - 1] self.layers.update([ [f'l{i}', nn.Linear(input_size, el, bias=True)], [f'relu_l{i}', nn.ReLU()], ]) if self.output_size is not None: self.layers.update({ 'out': nn.Linear(feed_layers[-1], self.output_size, bias=True) }) self.layers = self.layers.to(self.device)
def _init_model(self): ''' input: flatten_att_nbhd_inputs: shape=[att_lstm_num * att_lstm_seq_len, (batch_size, nbhd_size, nbhd_size, nbhd_type)] -> att_nbhd_inputs: shape=[att_lstm_num, att_lstm_seq_len, (batch_size, nbhd_size, nbhd_size, nbhd_type)] flatten_att_flow_inputs: shape=[att_lstm_num * att_lstm_seq_len, (batch_size, nbhd_size, nbhd_size, flow_type)] -> att_flow_inputs: shape=[att_lstm_num, att_lstm_seq_len, (batch_size, nbhd_size, nbhd_size, flow_type)] att_lstm_inputs: shape=[att_lstm_num, (batch_size, att_lstm_seq_len, feature_vec_len)] nbhd_inputs: shaoe=[lstm_seq_len, (batch_size, nbhd_size, nbhd_size, nbhd_type)] flow_inputs: shape=[lstm_seq_len, (batch_size, nbhd_size, nbhd_size, flow_type)] lstm_inputs: shape=(batch_size, lstm_seq_len, feature_vec_len) remark: tensor part should have shape of (batch_size, input_channel, H, W), use permute ''' # 1st level gate self.nbhd_cnns_1st = nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=self.nbhd_type, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for i in range(self.lstm_seq_len) ]) self.flow_cnns_1st = nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=self.flow_type, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU(), nn.Sigmoid()).to(self.device) for i in range(self.lstm_seq_len) ]) # [nbhd * flow] shape=[lstm_seq_len, (batch_size, 64, nbhd_size, nbhd_size)] # 2nd level gate self.nbhd_cnns_2nd = nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for i in range(self.lstm_seq_len) ]) self.flow_cnns_2nd = nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=self.flow_type, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU(), nn.Sigmoid()).to(self.device) for i in range(self.lstm_seq_len) ]) # [nbhd * flow] shape=[lstm_seq_len, (batch_size, 64, nbhd_size, nbhd_size)] # 3rd level gate self.nbhd_cnns_3rd = nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for i in range(self.lstm_seq_len) ]) self.flow_cnns_3rd = nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=self.flow_type, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU(), nn.Sigmoid()).to(self.device) for i in range(self.lstm_seq_len) ]) # [nbhd * flow] shape=[lstm_seq_len, (batch_size, 64, nbhd_size, nbhd_size)] # dense part self.nbhd_vecs = nn.ModuleList([ nn.Sequential( nn.Flatten(), nn.Linear(64 * self.nbhd_size * self.nbhd_size, self.cnn_flat_size), nn.ReLU()).to(self.device) for i in range(self.lstm_seq_len) ]) # shape=[lstm_seq_len, (batch_size, cnn_flat_size)] # feature concatenate # torch.cat(list, dim=-1), shape=(batch_size, cnn_flat_size * lstm_seq_len) # torch.reshape(tensor, (tensor.shape[0], lstm_seq_len, cnn_flat_size)) # torch.cat(list, dim=-1), shape=(batch_size, lstm_seq_len, feature_vec_len + cnn_flat_size) # lstm self.lstm = nn.LSTM(input_size=self.feature_vec_len + self.cnn_flat_size, hidden_size=self.lstm_out_size, batch_first=True, dropout=0.1).to(self.device) # result shape=(batch_size, lstm_seq_len, lstm_out_size) # result, (hn, cn) = lstm -> hn[-1] shape=(batch, lstm_out_size) # attention part self.att_nbhd_cnns_1st = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=self.nbhd_type, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) self.att_flow_cnns_1st = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=self.flow_type, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) self.att_flow_gate_1st = nn.ModuleList([ nn.ModuleList([ nn.Sigmoid().to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) # [[nbhd * flow]] shape=[att_lstm_num, att_lstm_seq_len, (batch_size, 64, nbhd_size, nbhd_size)] self.att_nbhd_cnns_2nd = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) self.att_flow_cnns_2nd = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) self.att_flow_gate_2nd = nn.ModuleList([ nn.ModuleList([ nn.Sigmoid().to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) # [[nbhd * flow]] shape=[att_lstm_num, att_lstm_seq_len, (batch_size, 64, nbhd_size, nbhd_size)] self.att_nbhd_cnns_3rd = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) self.att_flow_cnns_3rd = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) self.att_flow_gate_3rd = nn.ModuleList([ nn.ModuleList([ nn.Sigmoid().to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) # [[nbhd * flow]] shape=[att_lstm_num, att_lstm_seq_len, (batch_size, 64, nbhd_size, nbhd_size)] self.att_nbhd_vecs = nn.ModuleList([ nn.ModuleList([ nn.Sequential( nn.Flatten(), nn.Linear(64 * self.nbhd_size * self.nbhd_size, self.cnn_flat_size), nn.ReLU()).to(self.device) for j in range(self.att_lstm_seq_len) ]) for i in range(self.att_lstm_num) ]) # shape=[att_lstm_num, att_lstm_seq_len, (batch_size, cnn_flat_size)] # [torch.cat(list, dim=-1)], shape=[att_lstm_num, (batch_size, cnn_flat_size * att_lstm_seq_len)] # [torch.reshape(tensor, (tensor.shape[0], att_lstm_seq_len, cnn_flat_size))] # [torch.cat(list, dim=-1)], shape=[att_lstm_num, (batch_size, att_lstm_seq_len, feature_vec_len + cnn_flat_size)] self.att_lstms = nn.ModuleList([ nn.LSTM(input_size=self.feature_vec_len + self.cnn_flat_size, hidden_size=self.lstm_out_size, batch_first=True, dropout=0.1).to(self.device) for i in range(self.att_lstm_num) ]) # [result] shape=[att_lstm_num, (batch_size, lstm_seq_len, lstm_out_size)] # [result, (hn, cn) = lstm -> result] [att_lstm_num, (batch_size, lstm_seq_len, lstm_out_size)] # compare self.att_low_level = nn.ModuleList([ CBAAttention(self.device, self.lstm_out_size, self.lstm_out_size) for i in range(self.att_lstm_num) ]) # shape=[att_lstm_num, (batch_size, lstm_out_size)] # torch.cat(list, dim=-1), shape=(batch_size, lstm_out_size * att_lstm_num) # torch.reshape(tensor, (tensor.shape[0], att_lstm_num, lstm_out_size)) # shape=(batch_size, att_lstm_num, lstm_out_size) self.att_high_level = nn.LSTM(input_size=self.lstm_out_size, hidden_size=self.lstm_out_size, batch_first=True, dropout=0.1).to(self.device) # result shape=(batch_size, att_lstm_num, lstm_out_size) # result, (hn, cn) = lstm -> hn[-1] shape=(batch_size, lstm_out_size) self.lstm_all = nn.Linear(self.lstm_out_size + self.lstm_out_size, self.output_dim).to(self.device) self.pred_volume = nn.Tanh().to(self.device)
def __init__(self): super().__init__() self.classifier = nn.Sequential(nn.Flatten(), nn.Linear(12544, 10))
def __init__(self, hidden_size=128, feature_size=40): super(GeneratorModel, self).__init__() self.hidden_size = hidden_size self.feature_size = feature_size self.total_size = self.hidden_size + self.feature_size self.ones = torch.ones([1, 1, 64, 64]).to('cuda') self.encoder = nn.Sequential( nn.Conv2d(in_channels=3 + self.feature_size, out_channels=32, kernel_size=4, stride=2, padding=1), # 32 * 32 * 32 nn.BatchNorm2d(32), nn.GELU(), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2, padding=1), # 64 * 16 * 16 nn.BatchNorm2d(64), nn.GELU(), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1), # 128 * 8 * 8 nn.BatchNorm2d(128), nn.GELU(), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1), # 256 * 4 * 4 nn.BatchNorm2d(256), nn.GELU(), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1), # 512 * 2 * 2 nn.BatchNorm2d(512), nn.GELU(), nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4, stride=2, padding=1), # 1024 * 1 * 1 nn.BatchNorm2d(1024), nn.GELU(), nn.Flatten(), nn.Linear(in_features=1024, out_features=self.hidden_size * 2)) self.decoder = nn.Sequential( nn.Linear(in_features=self.total_size, out_features=1024), nn.Unflatten(dim=1, unflattened_size=(1024, 1, 1)), # batch x 1024 x 1 x 1 nn.GELU(), nn.ConvTranspose2d(in_channels=1024, out_channels=512, stride=2, kernel_size=4, padding=1), # batch x 512 x 2 x 2 nn.BatchNorm2d(512), nn.GELU(), nn.ConvTranspose2d(in_channels=512, out_channels=256, stride=2, kernel_size=4, padding=1), # batch x 256 x 4 x 4 nn.BatchNorm2d(256), nn.GELU(), nn.ConvTranspose2d(in_channels=256, out_channels=128, stride=2, kernel_size=4, padding=1), # batch x 128 x 8 x 8 nn.BatchNorm2d(128), nn.GELU(), nn.ConvTranspose2d(in_channels=128, out_channels=64, stride=2, kernel_size=4, padding=1), # batch x 64 x 16 x 16 nn.BatchNorm2d(64), nn.GELU(), nn.ConvTranspose2d(in_channels=64, out_channels=32, stride=2, kernel_size=4, padding=1), # batch x 32 x 32 x 32 nn.BatchNorm2d(32), nn.GELU(), nn.ConvTranspose2d(in_channels=32, out_channels=3, stride=2, kernel_size=4, padding=1), # batch x 3 x 64 x 64 nn.Sigmoid())
def lin_1(input_dim=3072, num_classes=10): model = nn.Sequential(nn.Flatten(), nn.Linear(input_dim, num_classes)) return model
def __init__(self, image_channels, num_classes, batch_normalization: bool = False, drop_out: bool = False, conv_stride: bool = False, init_weights: bool = False): """ Is called when model is initialized. Args: image_channels. Number of color channels in image (3) num_classes: Number of classes we want to predict (10) """ super().__init__() num_filters = 32 # Set number of filters in first conv layer self.num_classes = num_classes # Define the convolutional layers self.feature_extractor = nn.Sequential( # Layer 1 nn.Conv2d(in_channels=image_channels, out_channels=num_filters, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(num_features=num_filters) if batch_normalization else nn.Identity(), nn.Conv2d( num_filters, num_filters, kernel_size=2, stride=2, padding=0) if conv_stride else nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout() if drop_out else nn.Identity(), # Layer 2 nn.Conv2d(num_filters, num_filters * 2, 5, 1, 2), nn.ReLU(), nn.BatchNorm2d(num_filters * 2) if batch_normalization else nn.Identity(), nn.Conv2d(num_filters * 2, num_filters * 2, kernel_size=2, stride=2, padding=0) if conv_stride else nn.MaxPool2d(2, 2), nn.Dropout() if drop_out else nn.Identity(), # Layer 3 nn.Conv2d(num_filters * 2, num_filters * 4, 5, 1, 2), nn.ReLU(), nn.BatchNorm2d(num_filters * 4) if batch_normalization else nn.Identity(), nn.Conv2d(num_filters * 4, num_filters * 4, kernel_size=2, stride=2, padding=0) if conv_stride else nn.MaxPool2d(2, 2), nn.Dropout() if drop_out else nn.Identity(), # Flatten nn.Flatten()) self.num_output_features = num_filters * 4 * 4 * 4 self.classifier = nn.Sequential( nn.Linear(self.num_output_features, 64), nn.ReLU(), nn.BatchNorm1d(64) if batch_normalization else nn.Identity(), nn.Linear(64, num_classes), ) if init_weights: self.feature_extractor.apply(self.initialize_weights) self.classifier.apply(self.initialize_weights)
import torch.nn as nn from . import LATENT_SIZE discriminator = nn.Sequential( nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False), nn.Flatten(), nn.Sigmoid()) generator = nn.Sequential( nn.ConvTranspose2d(LATENT_SIZE, 512, kernel_size=4, stride=1, padding=0, bias=False), nn.BatchNorm2d(512), nn.ReLU(True), nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(True), nn.ConvTranspose2d(256, 128,