def __init__(self, opt, size, continious=0, binary=0): super().__init__() assert not binary assert not continious input_dim = opt.latent_dim + opt.code_dim self.dense_block = nn.Sequential( nn.Linear(input_dim, 512), nn.BatchNorm1d(512), nn.LeakyReLU(inplace=True), nn.Linear(512, 512), nn.BatchNorm1d(512), nn.LeakyReLU(inplace=True), nn.Linear(512, size), ) self.binary = binary self.continious = continious self.degree = opt.degree self.n_studies = opt.n_classes self.poly_param = nn.Parameter( torch.tensor(np.random.random( (self.degree + 1, self.n_studies, size - binary - continious)).astype(np.float32), requires_grad=True)) init_weights(self)
def inference(X_test, X_mean, init_file, outdir): bs = 128 fixed_bs = True print('building model...') l_out = model.build(bs, 10) print('initializing weights from %s...' % (init_file)) network.init_weights(l_out, init_file) test_iter = iter_funcs.create_iter_funcs_test(l_out, bs, N=50) for test_idx in network.get_batch_idx(X_test.shape[0], bs, fixed=fixed_bs, shuffle=False): X_test_batch = X_test[test_idx] y_hat = test_iter(X_test_batch) # get the test images that were misclassified with low certainty for X, y in zip(X_test_batch, y_hat): if y.max() > 0.9: # undo the initial transformations: shift, scale transpose img = ((X + X_mean) * 255.).transpose(1, 2, 0) fname = '-'.join('%.5f' % p for p in y) cv2.imwrite(join(outdir, '%s.png' % fname), img)
def build_model(self): """Build our deep learning model.""" if self.model_type == 'U_Net': self.unet = U_Net(img_ch=1, output_ch=1, first_layer_numKernel=self.first_layer_numKernel) elif self.model_type == 'R2U_Net': self.unet = R2U_Net( img_ch=1, output_ch=1, t=self.t, first_layer_numKernel=self.first_layer_numKernel) elif self.model_type == 'AttU_Net': self.unet = AttU_Net( img_ch=1, output_ch=1, first_layer_numKernel=self.first_layer_numKernel) elif self.model_type == 'R2AttU_Net': self.unet = R2AttU_Net( img_ch=1, output_ch=1, t=self.t, first_layer_numKernel=self.first_layer_numKernel) elif self.model_type == 'ResAttU_Net': self.unet = ResAttU_Net( UnetLayer=self.UnetLayer, img_ch=1, output_ch=1, first_layer_numKernel=self.first_layer_numKernel) if self.initialization != 'NA': init_weights(self.unet, init_type=self.initialization) self.unet.to(self.device)
def __init__(self, opt, size, continious=0, binary=0): """ Construct generator Parameters --------------- opt: argparse.Namespace size: int number of outputs continious: int number of continious outputs in the total of size output neurons binary: int number of binary outputs in the total of size output neurons """ super(Generator, self).__init__() input_dim = opt.latent_dim + opt.n_classes + opt.code_dim self.dense_block = nn.Sequential( nn.Linear(input_dim, 512), nn.BatchNorm1d(512), nn.LeakyReLU(inplace=True), nn.Linear(512, 512), nn.BatchNorm1d(512), nn.LeakyReLU(inplace=True), nn.Linear(512, size), ) self.binary = binary self.continious = continious init_weights(self)
def __init__(self, opt, size, continious=0, binary=0): super().__init__() assert not binary self.dense_blocks = nn.Sequential( nn.utils.spectral_norm(nn.Linear(size - binary, 512)), nn.LeakyReLU(inplace=True), nn.utils.spectral_norm(nn.Linear(512, 512)), nn.LeakyReLU(inplace=True), nn.utils.spectral_norm(nn.Linear(512, 512)), nn.LeakyReLU(inplace=True), ) # Output layers self.adv_layer = nn.Sequential(nn.Linear(512, 1), nn.Sigmoid()) self.category_layer = nn.Sequential(nn.Linear(512, opt.n_classes)) tmp = (nn.Linear(512, 512), nn.LeakyReLU(), nn.Linear(512, opt.code_dim)) if opt.distribution == 'uniform': tmp = tmp + (nn.Tanh(), ) self.latent_layer = nn.Sequential(*tmp) self.binary = binary if binary: self.adv_layer = nn.Sequential( nn.Linear(opt.code_dim + binary, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 1), nn.Sigmoid()) init_weights(self)
def retrieveWeights(sess, filter_shape, layer_name): conv1_weights = None with tf.name_scope(layer_name): with tf.variable_scope('weights', reuse=True): weights = init_weights(filter_shape, layer_name) conv1_weights = sess.run(weights) return conv1_weights
def build_model(self, mode = 'normal'): self.Encoder = DQNEncoder(input_size= self.num_slot + self.num_test) self.SymptomDecoder = DQNSymptomDecoder(output_size=self.num_slot+2) self.TestDecoder = DQNTestDecoder(output_size=self.num_test) self.DiseaseDecoder = DQNDiseaseDecoder(output_size=self.num_disease) self.AuxiliaryDecoder = DQNAuxiliaryDecoder(output_size = self.num_test + self.num_slot) self.optimizer_para = list(self.Encoder.parameters()) + list(self.SymptomDecoder.parameters()) + list(self.TestDecoder.parameters()) + list(self.AuxiliaryDecoder.parameters()) self.optimizer = optim.Adam(self.optimizer_para, self.parameter['lr']) self.device = torch.device('cuda:' + str(self.parameter['cuda_idx']) if torch.cuda.is_available() else 'cpu') self.Encoder.to(self.device) self.SymptomDecoder.to(self.device) self.TestDecoder.to(self.device) self.DiseaseDecoder.to(self.device) self.AuxiliaryDecoder.to(self.device) init_weights(self.Encoder, mode) init_weights(self.SymptomDecoder, mode) init_weights(self.TestDecoder, mode) init_weights(self.DiseaseDecoder, mode) init_weights(self.AuxiliaryDecoder, mode) if self.load_model: self.load(self.parameter['model_savepath'] + '/newest/')
def train(X_train, X_valid, y_train, y_valid, weights_file=None, init_file=None): # model parameters wd = 0.0005 bs = 128 base_lr = 0.01 gamma = 0.0001 p = 0.75 mntm = 0.9 fixed_bs = True mc_dropout = True #mc_dropout = False lr_update = lambda itr: base_lr * (1 + gamma * itr)**(-p) snapshot_every = 5 max_epochs = 100000 print('building model...') l_out = model.build(bs, np.unique(y_train).shape[0]) network.print_layers(l_out) # check if we need to load pre-trained weights if init_file is not None: print('initializing weights from %s...' % (init_file)) network.init_weights(l_out, init_file) else: print('initializing weights randomly...') # do theano stuff print('creating shared variables...') lr_shared = theano.shared(floatX(base_lr)) print('compiling theano functions...') train_iter = iter_funcs.create_iter_funcs_train(l_out, base_lr, mntm, wd) valid_iter = iter_funcs.create_iter_funcs_valid(l_out, bs, N=50, mc_dropout=mc_dropout) # prepare to start training best_epoch = -1 best_train_losses_mean, best_valid_losses_mean = np.inf, np.inf print('starting training at %s' % (network.get_current_time())) epoch_train_losses, epoch_valid_losses = [], [] gradient_updates = 0 epochs = [] # start training try: for epoch in range(1, max_epochs + 1): t_epoch_start = time() train_losses, train_accs = [], [] # print run training for each batch for train_idx in network.get_batch_idx(X_train.shape[0], bs, fixed=fixed_bs, shuffle=True): X_train_batch = X_train[train_idx] y_train_batch = y_train[train_idx] #print X_train_batch.shape, y_train_batch.shape train_loss, train_acc = train_iter(X_train_batch, y_train_batch) #train_loss, train_acc = 0, 0 # learning rate policy gradient_updates += 1 lr = lr_update(gradient_updates) lr_shared.set_value(floatX(lr)) train_losses.append(train_loss) train_accs.append(train_acc) # run validation for each batch valid_losses, valid_accs = [], [] for valid_idx in network.get_batch_idx(X_valid.shape[0], bs, fixed=fixed_bs, shuffle=False): X_valid_batch = X_valid[valid_idx] y_valid_batch = y_valid[valid_idx] #print X_valid_batch.shape, y_valid_batch.shape valid_loss, valid_acc = valid_iter(X_valid_batch, y_valid_batch) #valid_loss, valid_acc = 0, 0 valid_losses.append(valid_loss) valid_accs.append(valid_acc) # average over the batches train_losses_mean = np.mean(train_losses) train_accs_mean = np.mean(train_accs) valid_losses_mean = np.mean(valid_losses) valid_accs_mean = np.mean(valid_accs) epochs.append(epoch) epoch_train_losses.append(train_losses_mean) epoch_valid_losses.append(valid_losses_mean) # display useful info epoch_color = ('', '') if valid_losses_mean < best_valid_losses_mean: best_epoch = epoch best_train_losses_mean = train_losses_mean best_valid_losses_mean = valid_losses_mean best_weights = layers.get_all_param_values(l_out) epoch_color = ('\033[32m', '\033[0m') t_epoch_end = time() duration = t_epoch_end - t_epoch_start print('{}{:>4}{} | {:>10.6f} | {:>10.6f} | ' '{:>3.2f}% | {:>3.2f}% | ' '{:>1.8} | {:>4.2f}s | '.format( epoch_color[0], epoch, epoch_color[1], train_losses_mean, valid_losses_mean, 100 * train_accs_mean, 100 * valid_accs_mean, lr, duration)) if (epoch % snapshot_every) == 0: network.save_weights(best_weights, weights_file) except KeyboardInterrupt: print('caught ctrl-c... stopped training.') # display final results and save weights print('training finished at %s\n' % (network.get_current_time())) print('best local minimum for validation data at epoch %d' % (best_epoch)) print(' train loss = %.6f' % (best_train_losses_mean)) print(' valid loss = %.6f' % (best_valid_losses_mean)) if best_weights is not None: print('saving best weights to %s' % (weights_file)) network.save_weights(best_weights, weights_file) # plot the train/val loss over epochs print('plotting training/validation loss...') plt.plot(epochs, epoch_train_losses, 'b') plt.plot(epochs, epoch_valid_losses, 'g') plt.legend(('training', 'validation')) plt.ylabel('loss') plt.xlabel('epochs') plt.xlim((1, epochs[-1])) train_val_log = join('logs', '%s.png' % network.get_current_time()) plt.savefig(train_val_log, bbox_inches='tight')
def train(self, model, train_loader, val_loader, num_epochs=10, log_nth=0): """ Train a given model with the provided data. Inputs: - model: object initialized from a torch.nn.Module - train_loader: train data (currently using nonsense data) - val_loader: val data (currently using nonsense data) - num_epochs: total number of epochs - log_nth: log training accuracy and loss every nth iteration """ optim = self.optim(model.parameters(), **self.optim_args) self._reset_histories() iter_per_epoch = len(train_loader) init_weights(model, "xavier") model.train() print("START TRAIN") start = time.time() for epoch in range(num_epochs): # Training for i, (inputs, targets) in enumerate(train_loader, 1): inputs, targets = inputs.cuda().to(dtype=torch.float), \ targets.cuda().to(dtype=torch.long) optim.zero_grad() outputs = model(inputs) # print("Out: ", outputs.size()) loss = self.loss_func(outputs, targets) loss.backward() optim.step() self.train_loss_history.append(loss.detach().cpu().numpy()) self.train_loss_history.append(loss.detach().cpu().numpy()) if log_nth and i % log_nth == 0: last_log_nth_losses = self.train_loss_history[-log_nth:] train_loss = np.mean(last_log_nth_losses) print('[Iteration %d/%d] TRAIN loss: %.3f' % (i + epoch * iter_per_epoch, iter_per_epoch * num_epochs, train_loss)) _, preds = torch.max(outputs, 1) print("Preds: ", preds) print("Targets: ", targets) train_acc = np.mean((preds == targets).detach().cpu().numpy()) self.train_acc_history.append(train_acc) if log_nth: print('[Epoch %d/%d] TRAIN time/acc/loss/: %.3f/%.3f/%.3f' % (epoch + 1, num_epochs, time.time() - start, train_acc, train_loss)) # Validation val_losses = [] val_scores = [] model.eval() for j, (inputs, targets) in enumerate(val_loader, 1): inputs, targets = inputs.cuda().to(dtype=torch.float), \ targets.cuda().to(dtype=torch.long) outputs = model(inputs) loss = self.loss_func(outputs, targets) val_losses.append(loss.detach().cpu().numpy()) _, preds = torch.max(outputs, 1) scores = np.mean((preds == targets).detach().cpu().numpy()) val_scores.append(scores) val_acc, val_loss = np.mean(val_scores), np.mean(val_losses) if log_nth: print('[Epoch %d/%d] VAL acc/loss: %.3f/%.3f' % (epoch + 1, num_epochs, val_acc, val_loss)) model.train() ################################################################# end = time.time() print("FINISH") print("TIME ELAPSED: {0}".format(end - start))
batch_size=1, pin_memory=True, num_workers=4, shuffle=False, collate_fn=JAADCollateClassification) if args.rgb: i3d_rgb = I3D(num_classes=400, modality='rgb') i3d_rgb.load_state_dict(torch.load(args.rgb_weights_path)) i3d_rgb.conv3d_0c_1x1 = Unit3Dpy(in_channels=1024 * 3, out_channels=2, kernel_size=(1, 1, 1), activation=None, use_bias=True, use_bn=False) init_weights(i3d_rgb.conv3d_0c_1x1) i3d_rgb.cuda() if args.flow: i3d_flow = I3D(num_classes=400, modality='flow') i3d_flow.load_state_dict(torch.load(args.flow_weights_path)) i3d_flow.conv3d_0c_1x1 = Unit3Dpy(in_channels=1024, out_channels=2, kernel_size=(1, 1, 1), activation=None, use_bias=True, use_bn=False) init_weights(i3d_flow.conv3d_0c_1x1) i3d_flow.cuda() optimizer = optim.Adam([{"params": i3d_rgb.parameters()}], lr=0.001) for epoch in range(args.epoch):
def train( X_train, X_valid, y_train, y_valid, weights_file=None, init_file=None): # model parameters wd = 0.0005 bs = 128 base_lr = 0.01 gamma = 0.0001 p = 0.75 mntm = 0.9 fixed_bs = True mc_dropout = True #mc_dropout = False lr_update = lambda itr: base_lr * (1 + gamma * itr) ** (-p) snapshot_every = 5 max_epochs = 100000 print('building model...') l_out = model.build(bs, np.unique(y_train).shape[0]) network.print_layers(l_out) # check if we need to load pre-trained weights if init_file is not None: print('initializing weights from %s...' % (init_file)) network.init_weights(l_out, init_file) else: print('initializing weights randomly...') # do theano stuff print('creating shared variables...') lr_shared = theano.shared(floatX(base_lr)) print('compiling theano functions...') train_iter = iter_funcs.create_iter_funcs_train(l_out, base_lr, mntm, wd) valid_iter = iter_funcs.create_iter_funcs_valid( l_out, bs, N=50, mc_dropout=mc_dropout) # prepare to start training best_epoch = -1 best_train_losses_mean, best_valid_losses_mean = np.inf, np.inf print('starting training at %s' % ( network.get_current_time())) epoch_train_losses, epoch_valid_losses = [], [] gradient_updates = 0 epochs = [] # start training try: for epoch in range(1, max_epochs + 1): t_epoch_start = time() train_losses, train_accs = [], [] # print run training for each batch for train_idx in network.get_batch_idx( X_train.shape[0], bs, fixed=fixed_bs, shuffle=True): X_train_batch = X_train[train_idx] y_train_batch = y_train[train_idx] #print X_train_batch.shape, y_train_batch.shape train_loss, train_acc = train_iter( X_train_batch, y_train_batch) #train_loss, train_acc = 0, 0 # learning rate policy gradient_updates += 1 lr = lr_update(gradient_updates) lr_shared.set_value(floatX(lr)) train_losses.append(train_loss) train_accs.append(train_acc) # run validation for each batch valid_losses, valid_accs = [], [] for valid_idx in network.get_batch_idx( X_valid.shape[0], bs, fixed=fixed_bs, shuffle=False): X_valid_batch = X_valid[valid_idx] y_valid_batch = y_valid[valid_idx] #print X_valid_batch.shape, y_valid_batch.shape valid_loss, valid_acc = valid_iter( X_valid_batch, y_valid_batch) #valid_loss, valid_acc = 0, 0 valid_losses.append(valid_loss) valid_accs.append(valid_acc) # average over the batches train_losses_mean = np.mean(train_losses) train_accs_mean = np.mean(train_accs) valid_losses_mean = np.mean(valid_losses) valid_accs_mean = np.mean(valid_accs) epochs.append(epoch) epoch_train_losses.append(train_losses_mean) epoch_valid_losses.append(valid_losses_mean) # display useful info epoch_color = ('', '') if valid_losses_mean < best_valid_losses_mean: best_epoch = epoch best_train_losses_mean = train_losses_mean best_valid_losses_mean = valid_losses_mean best_weights = layers.get_all_param_values(l_out) epoch_color = ('\033[32m', '\033[0m') t_epoch_end = time() duration = t_epoch_end - t_epoch_start print('{}{:>4}{} | {:>10.6f} | {:>10.6f} | ' '{:>3.2f}% | {:>3.2f}% | ' '{:>1.8} | {:>4.2f}s | '.format( epoch_color[0], epoch, epoch_color[1], train_losses_mean, valid_losses_mean, 100 * train_accs_mean, 100 * valid_accs_mean, lr, duration)) if (epoch % snapshot_every) == 0: network.save_weights(best_weights, weights_file) except KeyboardInterrupt: print('caught ctrl-c... stopped training.') # display final results and save weights print('training finished at %s\n' % ( network.get_current_time())) print('best local minimum for validation data at epoch %d' % ( best_epoch)) print(' train loss = %.6f' % ( best_train_losses_mean)) print(' valid loss = %.6f' % ( best_valid_losses_mean)) if best_weights is not None: print('saving best weights to %s' % (weights_file)) network.save_weights(best_weights, weights_file) # plot the train/val loss over epochs print('plotting training/validation loss...') plt.plot(epochs, epoch_train_losses, 'b') plt.plot(epochs, epoch_valid_losses, 'g') plt.legend(('training', 'validation')) plt.ylabel('loss') plt.xlabel('epochs') plt.xlim((1, epochs[-1])) train_val_log = join('logs', '%s.png' % network.get_current_time()) plt.savefig(train_val_log, bbox_inches='tight')