def regular_loss(self): loss = 0.0 # 1 conv 1,2,3 for (name, conv_layer) in zip(['conv1', 'conv2', 'conv3'], [self.conv1, self.conv2, self.conv3]): c = self.length[name] g_conv = torch.exp(self.a[name]) / torch.sum( torch.exp(self.a[name])) for i in range(c): loss += g_conv[c + i] * utils.model_size(conv_layer[i][-1]) loss += g_conv[2 * c] * utils.model_size(conv_layer[c][-1]) # 2 fc 1,2 for (name, fc_layer) in zip(['fc1', 'fc2'], [self.fc1, self.fc2]): c = self.length[name] g_conv = torch.exp(self.a[name]) / torch.sum( torch.exp(self.a[name])) loss += g_conv[c] * utils.model_size(fc_layer[c]) return loss
def regular_loss(self): loss = 0.0 # loss of stem c = self.length['stem'] g_stem = torch.exp(self.a['stem']) / torch.sum( torch.exp(self.a['stem'])) loss += g_stem[c] * utils.model_size(self.stem[c]) # loss of cells layer for i in range(self.cell_nums): # loss of cells layer i c_1 = self.length['cell' + str(i)] c_2 = len(self.genotype[i]) g_cell = torch.exp(self.a['cell' + str(i)]) / torch.sum( torch.exp(self.a['cell' + str(i)])) num_l = c_1 + (1 + self.mu_s) * c_2 for k in range(c_1, num_l): loss += g_cell[k] * utils.model_size(self.cells[i][k]) return loss
def _save_test_result(self, ckpt='best'): with open(self.result_path, 'w') as f: json.dump(self.r_test.detail, f, indent=2) result = OrderedDict() result['version'] = self.config.version result['ckpt'] = ckpt result['model_size'] = utils.model_size(self.model) result['time'] = utils.get_time() for key, val in self.r_test.state.items(): result[key] = val self.config['result'] = dict(result) self.config.save()
targets = tf.placeholder(tf.int64, [batch_size, input_seq_length], name='targets') keep_prob = tf.placeholder(tf.float32) # Model embedder = WordEmbeddingBackend(vocab_size, embedding_dim) network = Network(input_, targets, keep_prob, batch_size, vocab_size, num_layers, hidden_dim, input_seq_length, embedder) # Create session config = tf.ConfigProto() config.gpu_options.allow_growth=True session = tf.Session(config=config) # Run initializers session.run(tf.global_variables_initializer()) session.run(embedder.reset_op) print 'Model size:', model_size() sys.stdout.flush() best_valid_loss = None rnn_state = session.run(network.initial_state) saver = tf.train.Saver(max_to_keep=5000) best_filename = None try: os.makedirs("saves/words") except OSError: pass for epoch in range(25): epoch_start_time = time.time()
nn.LeakyReLU(0.2, inplace=True) ]) self.block4 = nn.Sequential(*[ nn.Conv2d(256, 512, 4, padding=1), nn.InstanceNorm2d(512, affine=True), nn.LeakyReLU(0.2, inplace=True) ]) # FCN classification layer self.fc = nn.Conv2d(512, 1, 4, padding=1) def forward(self, x): h1 = self.block1(x) h2 = self.block2(h1) h3 = self.block3(h2) h4 = self.block4(h3) out = self.fc(h4) # Average pooling and flatten out = F.avg_pool2d(out, out.size()[2:]).view(out.size()[0], -1) D_outputs = namedtuple("DOutputs", ['h1', 'h2', 'h3', 'h4', 'out']) return D_outputs(h1, h2, h3, h4, out) if __name__ == "__main__": from utils import measure_model, model_size g = Generator(3, 3, transconv=False) measure_model(g, 256, 256) print(model_size(g))
transform=train_transform) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=1) # Build the network n = (4, 'down') net = HyperNet(3, nclasses, [(4, 'down'), (4, 'down'), (4, 'up'), (4, 'up'), (4, None)], h=1e-3, verbose=True, clear_grad=False, classifier_type='conv').to(device) print('Model Size: %6.2f' % model_size(net)) get_optim = lambda net: torch.optim.SGD( net.parameters(), lr=1e-1, momentum=0.0) # misfit = nn.CrossEntropyLoss() misfit = nn.MSELoss() # Move to GPU and build fake label images, _ = next(iter(train_loader)) images = images.to(device) YN, _ = net(images) N, _, H, W = YN.shape labels = torch.rand(N, nclasses, H, W).to(device) ################################################################### ### First pass a batch thru and perform manual grad calculation ###
def init_weights(self, pretrained=None): self.backbone.init_weights(pretrained=pretrained) self.neck.init_weights() self.head.init_weights() model_size(self)
num_channels = 4 num_classes = 2 layers = [ (10, None), ] net = HyperNet( num_channels, num_classes, layers, h=1e-1, verbose=False, clear_grad=True, classifier_type='conv3', ).to(device) print('\n### Model Statistics') print('Model Size: %8.1f mb' % utils.model_size(net)) print('Number of Parameters: %9d' % utils.num_params(net)) print(' ') nex = 4 images = torch.randn((4, num_channels, 16, 16, 16)).to(device) fwd_start = timer() Y_N, Y_Nm1 = net(images) fwd_time = timer() - fwd_start dYN = torch.randn_like(Y_N) get_optim = lambda net: torch.optim.Adam(net.parameters(), lr=1e-2) bwd_start = timer() Y0, Y1 = net.backward(Y_N, Y_Nm1, dYN, get_optim, False) bwd_time = timer() - bwd_start
download=False, transform=val_transform) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=1) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=1) # Build the network n = 4 net = HyperNet(3, 10, [n, n, n, n], h=1, verbose=False, clear_grad=False).to(device) print('Model Size: %6.2f' % model_size(net)) get_optim = lambda net: torch.optim.SGD( net.parameters(), lr=1e-2, momentum=0.0, weight_decay=5e-4) optimizer = get_optim(net) if autograd else None misfit = nn.CrossEntropyLoss() eps_fwd = [] eps_back = [] for epoch in range(epochs): print('Epoch %d' % epoch) acc = [] start_time_back = time.time() for i, (images, labels) in enumerate(train_loader):
def init_weights(self, pretrained=None): self.backbone.init_weights(pretrained=pretrained) model_size(self)
ref_lang) src_vocab = pickle.load(open(sys.argv[5], 'rb')) ref_vocab = pickle.load(open(sys.argv[6], 'rb')) tst_dataset = NMTDataset(load_data(conf.dev_src_path), load_data(conf.dev_ref_path), src_vocab, ref_vocab) tst_dataloader = NMTDataLoader(tst_dataset, batch_size=conf.batch_size, num_workers=0) print('%d test dataset loaded.' % len(tst_dataset)) encoder = torch.load(sys.argv[7]) decoder = torch.load(sys.argv[8]) encoder_size = utils.model_size(encoder) decoder_size = utils.model_size(decoder) total_size = encoder_size + decoder_size print('[*] Number of model parameters: \nencoder -- {:,}, \ decoder -- {:,}\ntotal size -- {:,}'.format( encoder_size, decoder_size, total_size)) if conf.cuda: encoder.cuda() decoder.cuda() eval_fn = locals()['evaluate_%s' % model_type] start_time = time.time() print('Evaluating ...') bleus = 0 for _, (src, ref, cand, bleu_) in enumerate( eval_fn(encoder, decoder, tst_dataloader, conf.beam)):