def train(self, batch, labels=None, loss="quadratic", learning_rate=0.01, epochs=1, mini_batch_size=1): if labels is not None: batch = np.c_[batch, labels] amount_of_labels = len(set(batch[:, -1])) for epoch in range(epochs): print("Epoch: ", epoch, end=", ") np.random.shuffle(batch) # avoids correlated mini batches or memorization of order avg_loss_epoch = [] # average loss over all samples in batch for this epoch sample_i = 0 while sample_i < (len(batch) - mini_batch_size): mini_batch = batch[sample_i:sample_i + mini_batch_size] input_values, labels = mini_batch[:, :-1], mini_batch[:, -1] # one-hot-encoding of numerical labels: labels = np.eye(amount_of_labels)[labels.astype(int)] raw_outputs, activations, activated_outputs = \ self.inference(input_values, save_outputs=True) ''' Get loss function and its derivatives: ("dx_y" means partial derivative of y to x) ''' minibatch_loss = get_loss(loss)(activated_outputs[-1], labels) avg_loss_epoch.append(minibatch_loss) try: da_loss = get_loss(loss, d="da_")(activated_outputs[-1], labels) dz_a = get_activation(activations[-1], d="dz_")(raw_outputs[-1]) dz_loss = np.multiply(da_loss, dz_a) # Hadamard product except AttributeError as e: dz_loss = get_loss(loss, d="dz_")(activated_outputs[-1], labels) for l in range(1, len(self.weights)): m, n = activated_outputs[-l-1].shape # faster than stacking ones to our activated outputs: activated_outputs_with_ones = np.ones((m, n + 1)) activated_outputs_with_ones[:, :-1] = activated_outputs[-l-1] dw_loss = np.matmul(activated_outputs_with_ones.T, dz_loss) self.weights[-l] = self.weights[-l] - learning_rate * dw_loss / len(batch) dz_a = get_activation(activations[-l-1], d="dz_")(raw_outputs[-l-1]) dz_loss = np.multiply( np.matmul(dz_loss, self.weights[-l][:-1, :].T), # removed biases dz_a ) m, n = activated_outputs[0].shape activated_outputs_with_ones = np.ones((m, n + 1)) activated_outputs_with_ones[:, :-1] = activated_outputs[0] dw_loss = np.matmul(activated_outputs_with_ones.T, dz_loss) self.weights[0] = self.weights[0] - learning_rate * dw_loss / len(batch) sample_i += mini_batch_size avg_loss_epoch = np.sum(np.array(avg_loss_epoch)) / np.array(avg_loss_epoch).size print("Loss: ", avg_loss_epoch)
def train(_config, resume: bool = False, test: bool = False): print(json.dumps(config, indent=4)) device = torch.device(_config['device']) os.environ["CUDA_VISIBLE_DEVICES"] = str(device.index) device = torch.device(0) dataset = _config['data']['dataset'] model_name = _config['model']['name'] optimizer_name = _config['optimizer']['name'] scheduler_name = _config['scheduler']['name'] loss = utils.get_loss(_config['loss']['name']) loss.to(device) model = create_model(dataset, _config['model'][model_name], _config['model']['stadaptor'], device) optimizer = utils.get_optimizer(optimizer_name, model.parameters(), **_config['optimizer'][optimizer_name]) scheduler = None if scheduler_name is not None: scheduler = utils.get_scheduler(scheduler_name, optimizer, **_config['scheduler'][scheduler_name]) save_folder = os.path.join('saves', dataset, _config['name']) if not resume and not test: shutil.rmtree(save_folder, ignore_errors=True) os.makedirs(save_folder) with open(os.path.join(save_folder, 'config.yaml'), 'w+') as _f: yaml.safe_dump(_config, _f) datasets = utils.get_datasets(dataset, _config['data']['input_dim'], _config['data']['output_dim']) scaler = utils.ZScoreScaler(datasets['train'].mean, datasets['train'].std) trainer = utils.OursTrainer(model, loss, scaler, device, optimizer, **_config['trainer']) if not test: utils.train_model(datasets=datasets, batch_size=_config['data']['batch-size'], folder=save_folder, trainer=trainer, scheduler=scheduler, epochs=config['epochs'], early_stop_steps=config['early_stop_steps']) utils.test_model(datasets=datasets, batch_size=_config['data']['batch-size'], trainer=trainer, folder=save_folder)
def build_model(inputs, labels): x = batch_norm_3d(inputs=inputs,name="input/batch_norm") net = model(x) loss = get_loss(labels=labels, predictions=net["output"], loss_type=FLAGS.loss_type, scope=FLAGS.loss_type, huber_delta=FLAGS.huber_delta) dsc = get_dsc(labels=labels, predictions=net["output"]) net["loss"] = loss net["dsc"] = dsc return net
def auto_train(self, cur_idx): tot_iter = Config.min_iters while True: utils.save_as_pkl("data/train_iter.pkl", tot_iter) seg_train() loss = utils.get_loss() if loss < Config.max_allowed_loss: Config.last_ckpt = cur_idx with open("best_ckpt.txt", "a", encoding="utf-8") as file: file.write("best checkpoint: stage_" + str(Config.last_ckpt) + "_ckpt\n") break else: print("Restart training ...", utils.read_from_pkl("data/selected_sents.pkl"), "examples with train iters of", tot_iter, "failed ... Abnormal loss:", loss) tot_iter += 5 if tot_iter > Config.max_iters: break utils.del_checkpoint() utils.save_ckpt()
def train(dset, model, optim, epoch, logger): dset.set_mode('train') train_loader = DataLoader(dset, batch_size=args.batch_size, shuffle=True, collate_fn=pad_collate) model.train() losses = AverageMeter() accs = AverageMeter() for i, data in enumerate(train_loader): optim.zero_grad() images, questions, answers = data images = Variable(images.float().cuda()) questions = Variable(questions.long().cuda()) answers = Variable(answers.long().cuda()) mask = Variable(get_mask(answers).cuda()) loss, acc = get_loss(model, images, questions, answers, mask) loss.sum().backward() # Clip gradients: gradients are modified in place _ = torch.nn.utils.clip_grad_norm_(model.parameters(), clip) # Keep track of metrics losses.update(loss.sum().item()) accs.update(acc) if i % print_freq == 0: logger.info( '[Epoch {}][{}/{}] [Training]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Accuracy {accs.val:.3f} ({accs.avg:.3f})'.format(epoch, i, len(train_loader), loss=losses, accs=accs )) optim.step()
def valid(dset, model, epoch, logger): dset.set_mode('val') valid_loader = DataLoader(dset, batch_size=args.batch_size, shuffle=False, collate_fn=pad_collate) model.eval() losses = AverageMeter() accs = AverageMeter() for batch_idx, data in enumerate(valid_loader): images, questions, answers = data images = Variable(images.float().cuda()) questions = Variable(questions.long().cuda()) answers = Variable(answers.long().cuda()) mask = Variable(get_mask(answers).cuda()) loss, acc = get_loss(model, images, questions, answers, mask) # Keep track of metrics losses.update(loss.sum().item()) accs.update(acc) logger.info('[Epoch {}] [Validate] Accuracy : {:.4f}'.format(epoch, accs.avg)) return accs.avg
def main(): # data data = 'flickr30k' batch_size = 100 setting = Setting(data) if data == 'flickr30k': txt_length = 80 data_set = Flickr30k(root=setting.root, ann_file=setting.ann_file, transform=setting.transforms) data_size = len(data_set) test_data, eval_data, train_data = random_split(data_set, [1000, 1000, data_size - 2000]) elif data == 'coco': txt_length = 60 train_data = Coco(root=setting.train_root, annFile=setting.train_ann_file, transform=setting.transforms) val_data = Coco( root=setting.val_root, annFile=setting.val_ann_file, transform=setting.transforms) data_set = train_data + val_data data_size = len(data_set) test_data, eval_data, train_data = random_split(data_set, [1000, 1000, data_size - 2000]) train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers=10) eval_loader = DataLoader(dataset=eval_data, batch_size=batch_size, shuffle=True, num_workers=10) test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True, num_workers=10) if USE_W2V: print('--------- loading the word2vec model ....... ---------') wv_model = KeyedVectors.load('/data1/yangdejie/data/glove.42B.300d.wv.bin', mmap='r') print('--------- loaded the word2vec model ........ ---------') lambdas = { 'tt': 1.0, 'vv': 1.0, 'tv': 0.9, 'vt': 0.5 } model = ObjectOrientedAttentionNetwork(lambdas=lambdas).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=1e-6) lr_scheduler = MultiStepLR(optimizer, milestones=[15], gamma=0.1) epochs = 24 for epoch in range(epochs): model.train() # model.img_net.FasterRCNN.eval() for i, (imgs, txts) in enumerate(train_loader): features = imgs.cuda() txts_vec = transfer_vec(txts, wv_model, padding=txt_length, dim=TXT_DIM) if USE_W2V: txts_tensor = torch.from_numpy(np.array(txts_vec)).cuda() else: pass optimizer.zero_grad() txts_tensor.requires_grad = True print('-' * 30) out = model(features, txts_tensor) print('*' * 30) print(out['Vvt'].size(), out['Evt'].size(), '\n', out['Vtv'].size(), out['Etv'].size(), '\n', out['Vvv'].size(), out['Ett'].size()) loss = 0.7 * (get_loss(out['Vvt'], out['Evt']) + get_loss(out['Vtv'], out['Etv'])) + \ 0.3 * get_loss(out['Vvv'], out['Ett']) print('epoch [%d/%d] , batch:%d' % (epochs, epoch, i), loss.data) loss.backward() optimizer.step() lr_scheduler.step(epoch)
data2.x = new_features2#new_block2.verts_packed().reshape(-1, 3) out_features2, offset2 = gcn2(data2) out_features2, offset2 = out_features2.squeeze(), offset2.squeeze() #TODO new_block2 = new_block2.offset_verts(offset2) #TODO + new_features2? new_block3, new_features3 = uppooling2(new_block2, torch.cat((new_block2.verts_packed(), out_features2), 1)) data3.x = new_features3#new_block3.verts_packed().reshape(-1, 3) out_features3, offset3 = gcn3(data3) out_features3, offset3 = out_features3.squeeze(), offset3.squeeze() new_block3 = new_block3.offset_verts(offset3) loss = get_loss(new_block1, trg_mesh, w_chamfer, w_edge, w_normal, w_laplacian) + \ get_loss(new_block2, trg_mesh, w_chamfer, w_edge, w_normal, w_laplacian) + \ get_loss(new_block3, trg_mesh, w_chamfer, w_edge, w_normal, w_laplacian) t.set_description("loss = {}".format(loss)) if WANDB: wandb.log({"Train Loss": loss}) losses.append(loss.detach()) # Plot mesh if i % plot_period == 0 and i!=0: plot_pointcloud(new_block1, title="iter: %d" % i) plot_pointcloud(new_block2, title="iter: %d" % i) plot_pointcloud(new_block3, title="iter: %d" % i) plt.imshow(image.squeeze().permute(1, 2, 0).detach().cpu().numpy())
def main(self): dataset_class = self.datasets[self.args.dataset]( root=self.args.root, add_labeled=self.args.add_labeled, advanced_transforms=True, merged=self.args.merged, remove_classes=self.args.remove_classes, oversampling=self.args.oversampling, unlabeled_subset_ratio=self.args.unlabeled_subset, seed=self.args.seed, start_labeled=self.args.start_labeled) _, labeled_dataset, unlabeled_dataset, labeled_indices, unlabeled_indices, test_dataset = \ dataset_class.get_dataset() labeled_loader, unlabeled_loader, val_loader = create_loaders( self.args, labeled_dataset, unlabeled_dataset, test_dataset, labeled_indices, unlabeled_indices, self.kwargs, dataset_class.unlabeled_subset_num) base_dataset = dataset_class.get_base_dataset_autoencoder() base_loader = create_base_loader(base_dataset, self.kwargs, self.args.batch_size) reconstruction_loss_log = [] bce_loss = nn.BCELoss().cuda() l1_loss = nn.L1Loss() l2_loss = nn.MSELoss() ssim_loss = SSIM(size_average=True, data_range=1.0, nonnegative_ssim=True) criterions_reconstruction = { 'bce': bce_loss, 'l1': l1_loss, 'l2': l2_loss, 'ssim': ssim_loss } criterion_cl = get_loss(self.args, dataset_class.labeled_class_samples, reduction='none') model, optimizer, self.args = create_model_optimizer_autoencoder( self.args, dataset_class) best_loss = np.inf metrics_per_cycle = pd.DataFrame([]) metrics_per_epoch = pd.DataFrame([]) num_class_per_cycle = pd.DataFrame([]) best_recall, best_report, last_best_epochs = 0, None, 0 best_model = deepcopy(model) self.args.start_epoch = 0 self.args.weak_supervision_strategy = "random_sampling" current_labeled = dataset_class.start_labeled for epoch in range(self.args.start_epoch, self.args.epochs): cl_train_loss, losses_avg_reconstruction, losses_reconstruction = \ self.train(labeled_loader, model, criterion_cl, optimizer, last_best_epochs, epoch, criterions_reconstruction, base_loader) val_loss, val_report = self.validate(val_loader, model, last_best_epochs, criterion_cl) reconstruction_loss_log.append(losses_avg_reconstruction.tolist()) best_loss = min(best_loss, losses_reconstruction.avg) is_best = val_report['macro avg']['recall'] > best_recall last_best_epochs = 0 if is_best else last_best_epochs + 1 val_report = pd.concat([val_report, cl_train_loss, val_loss], axis=1) metrics_per_epoch = pd.concat([metrics_per_epoch, val_report]) if epoch > self.args.labeled_warmup_epochs and last_best_epochs > self.args.add_labeled_epochs: metrics_per_cycle = pd.concat([metrics_per_cycle, best_report]) labeled_loader, unlabeled_loader, val_loader, labeled_indices, unlabeled_indices = \ perform_sampling(self.args, None, None, epoch, model, labeled_loader, unlabeled_loader, dataset_class, labeled_indices, unlabeled_indices, labeled_dataset, unlabeled_dataset, test_dataset, self.kwargs, current_labeled, model) current_labeled += self.args.add_labeled last_best_epochs = 0 if self.args.reset_model: model, optimizer, self.args = create_model_optimizer_autoencoder( self.args, dataset_class) if self.args.novel_class_detection: num_classes = [ np.sum( np.array(base_dataset.targets)[labeled_indices] == i) for i in range(len(base_dataset.classes)) ] num_class_per_cycle = pd.concat([ num_class_per_cycle, pd.DataFrame.from_dict( { cls: num_classes[i] for i, cls in enumerate(base_dataset.classes) }, orient='index').T ]) criterion_cl = get_loss(self.args, dataset_class.labeled_class_samples, reduction='none') else: best_recall = val_report['macro avg'][ 'recall'] if is_best else best_recall best_report = val_report if is_best else best_report best_model = deepcopy(model) if is_best else best_model if current_labeled > self.args.stop_labeled: break save_checkpoint( self.args, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_recall, }, is_best) if self.args.store_logs: store_logs(self.args, pd.DataFrame(reconstruction_loss_log, columns=['bce', 'l1', 'l2', 'ssim']), log_type='ae_loss') store_logs(self.args, metrics_per_cycle) store_logs(self.args, metrics_per_epoch, log_type='epoch_wise') store_logs(self.args, num_class_per_cycle, log_type='novel_class') self.model = model return model
def main(): opts = get_argparser().parse_args() opts = modify_command_options(opts) # Set up visualization vis = Visualizer(port=opts.vis_port, env=opts.vis_env) if opts.enable_vis else None if vis is not None: # display options vis.vis_table("Options", vars(opts)) os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_id device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Device: %s" % device) # Set up random seed torch.manual_seed(opts.random_seed) torch.cuda.manual_seed(opts.random_seed) np.random.seed(opts.random_seed) random.seed(opts.random_seed) # Set up dataloader train_dst, val_dst = get_dataset(opts) train_loader = data.DataLoader(train_dst, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers) val_loader = data.DataLoader( val_dst, batch_size=opts.batch_size if opts.crop_val else 1, shuffle=False, num_workers=opts.num_workers) print("Dataset: %s, Train set: %d, Val set: %d" % (opts.dataset, len(train_dst), len(val_dst))) # Set up model print("Backbone: %s" % opts.backbone) model = DeepLabv3(num_classes=opts.num_classes, backbone=opts.backbone, pretrained=True, momentum=opts.bn_mom, output_stride=opts.output_stride, use_separable_conv=opts.use_separable_conv) if opts.use_gn == True: print("[!] Replace BatchNorm with GroupNorm!") model = utils.convert_bn2gn(model) if opts.fix_bn == True: model.fix_bn() if torch.cuda.device_count() > 1: # Parallel print("%d GPU parallel" % (torch.cuda.device_count())) model = torch.nn.DataParallel(model) model_ref = model.module # for ckpt else: model_ref = model model = model.to(device) # Set up metrics metrics = StreamSegMetrics(opts.num_classes) # Set up optimizer decay_1x, no_decay_1x = model_ref.group_params_1x() decay_10x, no_decay_10x = model_ref.group_params_10x() optimizer = torch.optim.SGD(params=[ { "params": decay_1x, 'lr': opts.lr, 'weight_decay': opts.weight_decay }, { "params": no_decay_1x, 'lr': opts.lr }, { "params": decay_10x, 'lr': opts.lr * 10, 'weight_decay': opts.weight_decay }, { "params": no_decay_10x, 'lr': opts.lr * 10 }, ], lr=opts.lr, momentum=opts.momentum, nesterov=not opts.no_nesterov) del decay_1x, no_decay_1x, decay_10x, no_decay_10x if opts.lr_policy == 'poly': scheduler = utils.PolyLR(optimizer, max_iters=opts.epochs * len(train_loader), power=opts.lr_power) elif opts.lr_policy == 'step': scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=opts.lr_decay_step, gamma=opts.lr_decay_factor) print("Optimizer:\n%s" % (optimizer)) utils.mkdir('checkpoints') # Restore best_score = 0.0 cur_epoch = 0 if opts.ckpt is not None and os.path.isfile(opts.ckpt): checkpoint = torch.load(opts.ckpt) model_ref.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) cur_epoch = checkpoint["epoch"] + 1 best_score = checkpoint['best_score'] print("Model restored from %s" % opts.ckpt) del checkpoint # free memory else: print("[!] Retrain") def save_ckpt(path): """ save current model """ state = { "epoch": cur_epoch, "model_state": model_ref.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_score": best_score, } torch.save(state, path) print("Model saved as %s" % path) # Set up criterion criterion = utils.get_loss(opts.loss_type) #========== Train Loop ==========# vis_sample_id = np.random.randint( 0, len(val_loader), opts.vis_sample_num, np.int32) if opts.enable_vis else None # sample idxs for visualization label2color = utils.Label2Color(cmap=utils.color_map( opts.dataset)) # convert labels to images denorm = utils.Denormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # denormalization for ori images while cur_epoch < opts.epochs: # ===== Train ===== model.train() if opts.fix_bn == True: model_ref.fix_bn() epoch_loss = train(cur_epoch=cur_epoch, criterion=criterion, model=model, optim=optimizer, train_loader=train_loader, device=device, scheduler=scheduler, vis=vis) print("End of Epoch %d/%d, Average Loss=%f" % (cur_epoch, opts.epochs, epoch_loss)) if opts.enable_vis: vis.vis_scalar("Epoch Loss", cur_epoch, epoch_loss) # ===== Save Latest Model ===== if (cur_epoch + 1) % opts.ckpt_interval == 0: save_ckpt('checkpoints/latest_%s_%s.pkl' % (opts.backbone, opts.dataset)) # ===== Validation ===== if (cur_epoch + 1) % opts.val_interval == 0: print("validate on val set...") model.eval() val_score, ret_samples = validate(model=model, loader=val_loader, device=device, metrics=metrics, ret_samples_ids=vis_sample_id) print(metrics.to_str(val_score)) # ===== Save Best Model ===== if val_score['Mean IoU'] > best_score: # save best model best_score = val_score['Mean IoU'] save_ckpt('checkpoints/best_%s_%s.pkl' % (opts.backbone, opts.dataset)) if vis is not None: # visualize validation score and samples vis.vis_scalar("[Val] Overall Acc", cur_epoch, val_score['Overall Acc']) vis.vis_scalar("[Val] Mean IoU", cur_epoch, val_score['Mean IoU']) vis.vis_table("[Val] Class IoU", val_score['Class IoU']) for k, (img, target, lbl) in enumerate(ret_samples): img = (denorm(img) * 255).astype(np.uint8) target = label2color(target).transpose(2, 0, 1).astype(np.uint8) lbl = label2color(lbl).transpose(2, 0, 1).astype(np.uint8) concat_img = np.concatenate((img, target, lbl), axis=2) # concat along width vis.vis_image('Sample %d' % k, concat_img) if opts.val_on_trainset == True: # validate on train set print("validate on train set...") model.eval() train_score, _ = validate(model=model, loader=train_loader, device=device, metrics=metrics) print(metrics.to_str(train_score)) if vis is not None: vis.vis_scalar("[Train] Overall Acc", cur_epoch, train_score['Overall Acc']) vis.vis_scalar("[Train] Mean IoU", cur_epoch, train_score['Mean IoU']) cur_epoch += 1
hvd.callbacks.BroadcastGlobalVariablesCallback(0), hvd.callbacks.MetricAverageCallback(), hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1), keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1), ] if hvd.rank() == 0: callbacks.append( keras.callbacks.ModelCheckpoint( 'logdir/checkpoint-{}.h5'.format(ind))) model = Deeplabv3(input_shape=(img_rows, img_cols, num_channels), classes=N_CLASSES) #weights, w = get_weights(None, N_CLASSES, perPixel=0, enable=False) weights = np.ones(5) weights = K.variable(weights) model.compile(optimizer='adam', loss=get_loss(weights), metrics=['accuracy', get_mean_iou(N_CLASSES)]) print('steps_per_epoch:', steps_per_epoch // hvd.size()) results = model.fit_generator( train_generator, steps_per_epoch=steps_per_epoch // hvd.size(), validation_data=val_generator, validation_steps=validation_steps // hvd.size(), epochs=50, verbose=1, callbacks=callbacks, shuffle=True)
def loss(model, x, ds, training): y_ = model(x, training=training) return get_loss(y_, ds[1:4], ds[4], ds[5][0], ds[6][0])
val_data = SteelDataset(train_df.iloc[valid_idx], mode='valid', fine_size=args.fine_size, pad_left=args.pad_left, pad_right=args.pad_right, transforms=get_transforms()) val_loader = DataLoader( val_data, shuffle=False, batch_size=args.batch_size, num_workers=0, #cpu_count(), pin_memory=True) num_snapshot = 0 best_acc = 0 criterion = get_loss(args.loss) for epoch in tqdm(range(args.epoch)): train_loss = train(train_loader, steel, criterion) val_loss, accuracy = test(val_loader, steel, criterion) lr_scheduler.step() if accuracy > best_acc: best_acc = accuracy best_param = steel.state_dict() if (epoch + 1) % scheduler_step == 0: torch.save( best_param, os.path.join(args.save_weight, args.weight_name + str(num_snapshot) + '.pth')) optimizer = torch.optim.SGD(steel.parameters(),
neural_network = EncDecNetLite() # Initialize weights neural_network.init() losses = [] # Main cycle for i in range(UPDATES_NUM): # Get random batch for Stochastic Gradient Descent X_batch_train = get_random_batch(batches_train, BATCH_SIZE) # Forward pass, calculate network''s outputs Y_batch = neural_network.forward(X_batch_train) # Calculate sum squared loss loss = get_loss(Y_batch, X_batch_train) # Backward pass, calculate derivatives of loss w.r.t. weights dw = neural_network.backprop(Y_batch, X_batch_train) # Correct neural network''s weights neural_network.apply_dw(dw) # Print the loss every 1000 iterations if i % 10 == 0: print("Cost after iteration {}: {}".format(i, loss)) losses.append(loss) # -------------------------------------------------------------------------------------- # plot the loss plt.plot(losses)
torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True #create the models Generator, Discriminator = get_model(args) # optimizers for the generator and discriminator optimizer_G = torch.optim.Adam(params=Generator.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) optimizer_D = torch.optim.Adam(params=Discriminator.parameters(), lr=args.lr, betas=(args.beta1, args.beta2)) # get loss functions according to the given loss type gen_loss, disc_loss = get_loss(args.loss_type) # booleans for gradient penalty, relativistic loss and relativistic average loss. used for determining the loss parameters gradient_pen = args.loss_type in ["wgan-gp", "rsgan-gp", "rasgan-gp"] relativistic = args.loss_type in [ "rsgan", "rasgan", "ralsgan", "rahingegan", "rsgan-gp", "rasgan-gp" ] average = args.loss_type in ["rasgan", "ralsgan", "rahingegan", "rasgan-gp"] # get dataset (cifar10 or cat) dataset = get_dataset(args.dataset) if ( args.fid_iter ): # setting the number of samples that is going to be generated to the number of images in the training dataset args.fid_sample = len(dataset)
query_target) in enumerate(zip(support_inputs, support_targets, query_inputs, query_targets)): #meta inner loop support_logit = model(support_input) train_inner_loss = F.cross_entropy(support_logit, support_target) model.zero_grad() params = gradient_update_parameters(model, train_inner_loss, step_size=args.step_size, first_order=args.first_order) #meta outer loop if train_batch_i==int(args.train_tasks/args.batch_tasks)-1: teacher_model.eval() teacher_query_logit = teacher_model(query_input) query_logit = model(query_input, params=params) train_loss += get_loss(args, query_logit, query_target, teacher_query_logit) else: query_logit = model(query_input, params=params) train_loss += F.cross_entropy(query_logit, query_target) with torch.no_grad(): train_acc += count_acc(query_logit, query_target) #得到每个batch中平均的acc和loss train_loss.div_(args.batch_tasks) train_acc.div_(args.batch_tasks) train_loss.backward() optimizer.step()
def train_validate_classifier(self): if self.uncertainty_sampling_method == 'mc_dropout': uncertainty_sampler = UncertaintySamplingMCDropout() self.args.weak_supervision_strategy = 'semi_supervised_active_learning' elif self.uncertainty_sampling_method == 'augmentations_based': uncertainty_sampler = UncertaintySamplingAugmentationBased() self.args.weak_supervision_strategy = 'semi_supervised_active_learning' elif self.uncertainty_sampling_method == 'entropy_based': uncertainty_sampler = UncertaintySamplingEntropyBased( verbose=True, uncertainty_sampling_method='entropy_based') self.args.weak_supervision_strategy = 'semi_supervised_active_learning' else: uncertainty_sampler = None self.args.weak_supervision_strategy = "random_sampling" dataset_class = self.datasets[self.args.dataset]( root=self.args.root, add_labeled=self.args.add_labeled, advanced_transforms=True, merged=self.args.merged, remove_classes=self.args.remove_classes, oversampling=self.args.oversampling, unlabeled_subset_ratio=self.args.unlabeled_subset, unlabeled_augmentations=True if self.uncertainty_sampling_method == 'augmentations_based' else False, seed=self.args.seed, k_medoids=self.args.k_medoids, k_medoids_model=self.model, k_medoids_n_clusters=self.args.k_medoids_n_clusters, start_labeled=self.args.start_labeled) base_dataset, labeled_dataset, unlabeled_dataset, labeled_indices, unlabeled_indices, test_dataset = \ dataset_class.get_dataset() train_loader, unlabeled_loader, val_loader = create_loaders( self.args, labeled_dataset, unlabeled_dataset, test_dataset, labeled_indices, unlabeled_indices, self.kwargs, dataset_class.unlabeled_subset_num) model = self.model criterion = get_loss(self.args, dataset_class.labeled_class_samples, reduction='none') optimizer = torch.optim.Adam(model.parameters()) metrics_per_cycle = pd.DataFrame([]) metrics_per_epoch = pd.DataFrame([]) num_class_per_cycle = pd.DataFrame([]) best_recall, best_report, last_best_epochs = 0, None, 0 best_model = deepcopy(model) self.args.start_epoch = 0 current_labeled = dataset_class.start_labeled for epoch in range(self.args.start_epoch, self.args.epochs): train_loss = self.train_classifier(train_loader, model, criterion, optimizer, last_best_epochs, epoch) val_loss, val_report = self.validate_classifier( val_loader, model, last_best_epochs, criterion) is_best = val_report['macro avg']['recall'] > best_recall last_best_epochs = 0 if is_best else last_best_epochs + 1 val_report = pd.concat([val_report, train_loss, val_loss], axis=1) metrics_per_epoch = pd.concat([metrics_per_epoch, val_report]) if epoch > self.args.labeled_warmup_epochs and last_best_epochs > self.args.add_labeled_epochs: metrics_per_cycle = pd.concat([metrics_per_cycle, best_report]) train_loader, unlabeled_loader, val_loader, labeled_indices, unlabeled_indices = \ perform_sampling(self.args, uncertainty_sampler, None, epoch, model, train_loader, unlabeled_loader, dataset_class, labeled_indices, unlabeled_indices, labeled_dataset, unlabeled_dataset, test_dataset, self.kwargs, current_labeled, model) current_labeled += self.args.add_labeled last_best_epochs = 0 if self.args.reset_model: model, optimizer, _, self.args = create_model_optimizer_simclr( self.args, dataset_class) optimizer = torch.optim.Adam(model.parameters()) if self.args.novel_class_detection: num_classes = [ np.sum( np.array(base_dataset.targets)[labeled_indices] == i) for i in range(len(base_dataset.classes)) ] num_class_per_cycle = pd.concat([ num_class_per_cycle, pd.DataFrame.from_dict( { cls: num_classes[i] for i, cls in enumerate(base_dataset.classes) }, orient='index').T ]) criterion = get_loss(self.args, dataset_class.labeled_class_samples, reduction='none') else: best_recall = val_report['macro avg'][ 'recall'] if is_best else best_recall best_report = val_report if is_best else best_report best_model = deepcopy(model) if is_best else best_model if current_labeled > self.args.stop_labeled: break if self.args.store_logs: store_logs(self.args, metrics_per_cycle) store_logs(self.args, metrics_per_epoch, log_type='epoch_wise') store_logs(self.args, num_class_per_cycle, log_type='novel_class') return best_recall
def train(train_loader, model, optimizer, epoch, writer, selflabels): global sk_schedule global sk_counter # Put model in train mode model.train() # Init Logger meters batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() world_size = args.world_size dataset_bs = train_loader.batch_size end = time.time() batches_thusfar = epoch * len(train_loader) for it, inputs in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # ============ Get inputs ... ============ video, audio, _, selected, _ = inputs video, audio = video.cuda(), audio.cuda() # ============ Occasional clustering via Sinkhorn-Knopp ... =========== if batches_thusfar + it >= sk_schedule[-1]: # optimize labels with torch.no_grad(): _ = sk_schedule.pop() selflabels = cluster( args, selflabels, train_loader.dataset, model, sk_counter, logger, writer, group, (batches_thusfar + it) * dataset_bs * world_size ) # ============ forward passes ... ============ feat_v, feat_a = model(video, audio) # ============ SeLaVi loss ... ============ if args.headcount == 1: labels = selflabels[selected, 0] else: labels = selflabels[selected, :] loss_vid = get_loss(feat_v, labels, headcount=args.headcount) loss_aud = get_loss(feat_a, labels, headcount=args.headcount) loss = 0.5 * loss_vid + 0.5 * loss_aud # ============ backward and optim step ... ============ optimizer.zero_grad() if args.use_fp16: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # ============ misc ... ============ losses.update(loss.item(), inputs[0].size(0)) batch_time.update(time.time() - end) end = time.time() iteration = epoch * len(train_loader) + it if args.rank == 0 and it % 50 == 0: logger.info( "Epoch: [{0}][{1}]\t" "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t" "Data {data_time.val:.3f} ({data_time.avg:.3f})\t" "Loss {loss.val:.4f} ({loss.avg:.4f})\t" "Lr: {lr:.4f}".format( epoch, it, batch_time=batch_time, data_time=data_time, loss=losses, lr=optimizer.param_groups[0]["lr"], ) ) # Log onto tensorboard if writer: writer.add_scalar( f'loss/iter', loss.item(), iteration) writer.add_scalar( f'lr/iter', optimizer.param_groups[0]["lr"], iteration) writer.add_scalar( f'batch_time/iter', batch_time.avg, iteration) writer.add_scalar( f'data_time/iter', data_time.avg, iteration) # ============ signal handling ... ============ if os.environ['SIGNAL_RECEIVED'] == 'True': if args.rank == 0: logger.info("Beginning reqeue") trigger_job_requeue( os.path.join(args.dump_path, "checkpoint.pth.tar")) dist.barrier() torch.cuda.empty_cache() return (epoch, losses.avg), selflabels
def get_content_loss(self, base_content, target): return utils.get_loss(base_content, target)
def get_style_loss(self, base_style, gram_target): gram_style = utils.gram_matrix(base_style) return utils.get_loss(gram_style, gram_target)
model.load_state_dict(saved['model_state_dict'], strict=False) if frozen_predictor: for param in model.predictor.parameters(): param.requires_grad_(False) datasets = utils.get_datasets(datasets, 9, 1) scaler = utils.ZScoreScaler(datasets['train'].mean, datasets['train'].std) optimizer = optim.Adam([{ 'params': model.adaptor.parameters() }, { 'params': model.predictor.parameters(), 'lr': 1e-5 }], lr=learning_rate) loss = utils.get_loss('MaskedMAELoss') trainer = utils.OursTrainer(model, loss, scaler, device, optimizer, weight_decay, 2, 5) utils.train_model(datasets=datasets, batch_size=64, folder=saved_folder, trainer=trainer, scheduler=None, epochs=100, early_stop_steps=10) utils.test_model(datasets=datasets, batch_size=64, trainer=trainer, folder=saved_folder)
def floor_train_predict(config, analysis_site, floor, floor_id, ref_score, data_folder, model_folder, df, mode, holdout_df, test_floors, recompute_grouped_data, overwrite_models, test_type_mapping, only_public_test_preds, test_waypoint_times, store_all_wifi_predictions, store_full_wifi_predictions, debug_fn, verbose): test_preds = {} valid_preds = [] all_wifi_predictions = [] full_pos_preds = [] floor_key = (analysis_site, floor) combined_full_pos_preds = {floor_key: None} site_model_folder = model_folder / analysis_site Path(site_model_folder).mkdir(parents=True, exist_ok=True) site_df = df[(df.site_id == analysis_site) & (df.num_wifi > 0)] if mode != 'test': site_df = site_df[site_df['mode'] != 'test'] valid_paths = holdout_df.ext_path[(holdout_df['mode'] == 'valid') & ( holdout_df.site_id == analysis_site)].tolist() with pd.option_context('mode.chained_assignment', None): site_df['mode'] = site_df['ext_path'].apply( lambda x: 'valid' if (x in valid_paths) else 'train') else: test_df = site_df[(site_df['mode'] == 'test')] floor_df = site_df[site_df.text_level == floor] numeric_floor = utils.TEST_FLOOR_MAPPING[floor] if mode == 'test': target_floors = np.array( [test_floors[fn] for fn in test_df['fn'].values]) correct_test_floor = target_floors == numeric_floor if not np.any(correct_test_floor) and only_public_test_preds: return (test_preds, valid_preds, all_wifi_predictions, combined_full_pos_preds) test_df_floor = test_df[correct_test_floor] # Load the combined floor train data if mode == 'test': with pd.option_context("mode.chained_assignment", None): floor_df.loc[:, 'mode'] = 'all_train' train = utils.load_site_floor(floor_df, recompute_grouped_data, test_floor=floor) if not np.any(correct_test_floor): # Create the all train file, but don't continue since there is nothing to # predict return (test_preds, valid_preds, all_wifi_predictions, combined_full_pos_preds) valid = utils.load_site_floor(test_df_floor, recompute_grouped_data, test_floor=floor) else: train = utils.load_site_floor(floor_df[floor_df['mode'] == 'train'], recompute_grouped_data) valid = utils.load_site_floor(floor_df[floor_df['mode'] == 'valid'], recompute_grouped_data) # Train the wifi models utils.aggregate_wifi_near_time(train, config['time_range_max_strength']) utils.aggregate_wifi_near_time(valid, config['time_range_max_strength']) utils.interpolate_wifi_waypoints(train, recompute=True, batch_interpolated=True) bssid_grouped = utils.group_waypoints_bssid(train) model_type_prefix = 'test-' if mode == 'test' else '' model_path = site_model_folder / (model_type_prefix + floor + '.pickle') if model_path.exists() and not overwrite_models: with open(model_path, 'rb') as f: model = pickle.load(f) else: model = fit_model(train, bssid_grouped, config, data_folder) with open(model_path, 'wb') as handle: pickle.dump(model, handle, protocol=pickle.HIGHEST_PROTOCOL) # Generate predictions with the wifi model make_predict_efforts = [ v['file_meta']['mode'] != 'test' or (test_type_mapping[v['file_meta'].fn] == 'public') or (not only_public_test_preds) for v in valid ] wifi_pos_preds = [] for j, v in enumerate(valid): # Locate all unique wifi time observations if debug_fn is not None: if v['file_meta'].fn == debug_fn: import pdb pdb.set_trace() x = 1 else: continue pos_pred, full_pos_pred = predict_trajectory( v, make_predict_efforts[j], model, store_full_wifi_predictions, config) wifi_pos_preds.append(pos_pred) if store_full_wifi_predictions and isinstance(full_pos_pred, pd.DataFrame): if len(full_pos_preds): full_pos_pred.drop(['x', 'y'], axis=1, inplace=True) full_pos_preds.append(full_pos_pred) if store_full_wifi_predictions and full_pos_preds: combined_full_pos_preds[floor_key] = pd.concat(full_pos_preds, axis=1) all_preds_floor = [] all_actual_floor = [] for i, v in enumerate(valid): # Interpolate the locations of the unique wifi time observations waypoint_times = test_waypoint_times[ v['file_meta'].ext_path[5:-4]] if ( mode == 'test') else v['waypoint'].time.values v_preds = utils.interpolate_predictions(wifi_pos_preds[i], waypoint_times) if mode == 'test': for waypoint_id in range(waypoint_times.shape[0]): test_preds[analysis_site, v['file_meta'].fn, waypoint_times[waypoint_id]] = (numeric_floor, v_preds[waypoint_id, 0], v_preds[waypoint_id, 1]) else: if store_all_wifi_predictions: pos_preds = wifi_pos_preds[i] for k in pos_preds: all_wifi_predictions.append({ 'site': analysis_site, 'fn': v['file_meta'].fn, 'time': k, 'floor': floor, 'numeric_floor': numeric_floor, 'x_pred': pos_preds[k][0], 'y_pred': pos_preds[k][1], }) all_preds_floor.append(v_preds) actuals = v['waypoint'].iloc[:, 2:].values all_actual_floor.append(actuals) for waypoint_id in range(waypoint_times.shape[0]): squared_error = ((v_preds[waypoint_id] - actuals[waypoint_id])**2).sum() valid_preds.append({ 'site': analysis_site, 'fn': v['file_meta'].fn, 'waypoint_time': waypoint_times[waypoint_id], 'floor': floor, 'numeric_floor': numeric_floor, 'x_pred': v_preds[waypoint_id, 0], 'y_pred': v_preds[waypoint_id, 1], 'x_actual': v['waypoint'].x_waypoint[waypoint_id], 'y_actual': v['waypoint'].y_waypoint[waypoint_id], 'squared_error': squared_error, 'error': np.sqrt(squared_error), }) if mode != 'test' and verbose and ref_score is not None: all_preds_floor = np.concatenate(all_preds_floor) all_actual_floor = np.concatenate(all_actual_floor) floor_loss = utils.get_loss(all_preds_floor, all_actual_floor) ref_score_change = floor_loss - ref_score print(f"{floor_id} {analysis_site} {floor} loss: {floor_loss:.2f}\ ({ref_score_change:.2f}) - {all_preds_floor.size}") elif verbose: print(f"{floor_id} {analysis_site} {floor}") return test_preds, valid_preds, all_wifi_predictions, combined_full_pos_preds
config, os.path.join(config['train_dir'], config['train_hyp_file'])) num_labels = 3 else: train_samples, dev_samples = get_train_dev_data( config, os.path.join(config['train_dir'], config['train_flat_file'])) logging.info("Done Processing Data ...") model = CrossEncoder(config['crossencoder_base_model'], num_labels=num_labels) batch_size = config['batch_size'] num_epochs = config['num_epochs'] train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=batch_size) train_loss = get_loss(config['loss_type'], model) evaluator = CEBinaryAccuracyEvaluator.from_input_examples(dev_samples) warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) logging.info("Warmup-steps: {}".format(warmup_steps)) model_dir = os.path.join(config['saved_model_dir'], config['checkpoint_path']) logging.info("Starting training ...") model.fit(train_dataloader=train_dataloader, evaluator=evaluator, epochs=num_epochs, evaluation_steps=int(config['eval_steps']), warmup_steps=warmup_steps, output_path=model_dir)
def run_mlp_experiment(args, device): """ Runs the experiment with a 3-layers fully connected network. :param args: Namespace from utils.parse_arguments :param device: torch.device :return: np.array of float, np.array of float, np.array of float; train_acc, val_acc, test_acc (in percentage) """ validation_ratio, record_train_acc, record_val_acc, record_test_acc = utils.configure_training_mode( args) train_loader, validation_loader, test_loader = datasets.build_loaders_by_dataset( args.dataset, args.batch_size, validation_ratio=validation_ratio, train_validation_split_seed=0) local_loss_list = utils.get_loss(args) nonlinearity = utils.get_nonlinearity(args) optimizer_local, local_opt_arguments_dict, local_scheduler_arguments_dict, \ optimizer_final, final_opt_arguments_dict, final_scheduler_arguments_dict = \ utils.choose_optimizers_and_parameters(args) conv_sizes = [] do_pooling = [] kernel_sizes = [] fc_layers = [args.mlp_layer_size, args.mlp_layer_size, args.mlp_layer_size] if args.divisive_norm_fc: divisive_norm_list = [ networks.DivisiveNorm(args.divnorm_power, args.grouping_dim, args.grouped_var_delta) for i in range(len(fc_layers)) ] else: divisive_norm_list = None alt_feedback_type = None if args.feedback_alignment: alt_feedback_type = 'feedback_alignment' elif args.sign_symmetry: alt_feedback_type = 'sign_symmetry' net = networks.Network( nonlinearity, local_loss_list, optimizer_local, torch.optim.lr_scheduler.MultiStepLR, conv_sizes, kernel_sizes, do_pooling, fc_layers, 'max', args.dataset, bias=False, local_opt_arguments_dict=local_opt_arguments_dict, local_scheduler_arguments_dict=local_scheduler_arguments_dict, dropout_p=args.dropout_p, batch_norm=args.batch_norm, divisive_norm_list_conv=None, divisive_norm_list_fc=divisive_norm_list, spatial_dropout=args.spatial_dropout, alt_feedback_type=alt_feedback_type) net = net.to(device) print(net) final_loss = nn.CrossEntropyLoss() if args.backprop: final_opt = optimizer_final(net.parameters(), **final_opt_arguments_dict) compute_local_loss = False update_local_loss = False else: final_opt = optimizer_final(net.softmax_layer.parameters(), **final_opt_arguments_dict) compute_local_loss = True update_local_loss = True final_scheduler = torch.optim.lr_scheduler.MultiStepLR( final_opt, **final_scheduler_arguments_dict) train_acc, val_acc, test_acc = utils.train_network( net, device, final_loss, final_opt, final_scheduler, args.n_epochs, train_loader, validation_loader, test_loader, compute_local_loss=compute_local_loss, update_local_loss=update_local_loss, record_train_acc=record_train_acc, record_val_acc=record_val_acc, record_test_acc=record_test_acc, print_results=True, backprop_batch_manhattan=args.backprop_batch_manhattan) return train_acc, val_acc, test_acc
fine_tune_epochs = 13 fine_tune_layers = 120 fine_tune_learning_rate = learning_rate / 10 ######################### use_fine_tune = True print_model_summary = False IMG_SIZE = utils.img_size() TRAIN_DIR = utils.train_dir(TYPE) VALIDATION_DIR = utils.validation_dir(TYPE) loss = utils.get_loss(TYPE) optimizer = tf.keras.optimizers.RMSprop(lr=learning_rate) fine_tune_optimizer = tf.keras.optimizers.RMSprop(lr=fine_tune_learning_rate) total_epochs = epochs + fine_tune_epochs # EMPIEZA train_generator, validation_generator = utils.get_generators(TYPE, batch_size) base_model = utils.get_base_model(MODELNAME) base_model.trainable = False if print_model_summary: base_model.summary()
def main(): # read the train file from first arugment train_file = sys.argv[1] #train_file='../data/covtype.scale.trn.libsvm' # read the test file from second argument test_file = sys.argv[2] #test_file = '../data/covtype.scale.tst.libsvm' # You can use load_svmlight_file to load data from train_file and test_file X_train, y_train = load_svmlight_file(train_file) X_test, y_test = load_svmlight_file(test_file) # You can use cg.ConjugateGradient(X, I, grad, lambda_) # Main entry point to the program X_train = sparse.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test = sparse.hstack([X_test, np.ones((X_test.shape[0], 1))]) X = sparse.csr_matrix(X_train) X_test = sparse.csr_matrix(X_test) y = sparse.csr_matrix(y_train).transpose() y_test = sparse.csr_matrix(y_test).transpose() #set global hyper parameter if sys.argv[1] == "covtype.scale.trn.libsvm": lambda_ = 3631.3203125 optimal_loss = 2541.664519 five_fold_CV = 75.6661 optimal_function_value = 2541.664519 else: lambda_ = 7230.875 optimal_loss = 669.664812 five_fold_CV = 97.3655 optimal_function_value = 669.664812 #SGD #set local sgd hyper parameter print('starting SGD...') n_batch = 1000 beta = 0 lr = 0.001 w = np.zeros((X_train.shape[1])) n = X_train.shape[0] sgd_grad = [] sgd_time = [] sgd_rel = [] sgd_test_acc = [] epoch = 180 start = time.time() #redefine learaning rate for i in range(epoch): gamma_t = lr / (1 + beta * i) batch_ = np.random.permutation(n) #shuffle for j in range(n // n_batch): #make batch idx = batch_[j * n_batch:(j + 1) * n_batch] X_bc = X[idx] y_bc = y[idx] grad = get_grad(w, lambda_, n, X_bc, y_bc, n_batch) #comput gradient w = w - gamma_t * grad #update gradient t = time.time() - start sgd_time.append(t) # append to time list grad_ = np.linalg.norm(grad) # get gradient value sgd_grad.append(grad_) rel = (get_loss(w, lambda_, X_test, y_test, n_batch) - optimal_loss) / optimal_loss # get relative func value sgd_rel.append(rel) test_acc = get_acc(w, lambda_, X_test, y_test, n_batch) # get test accuracy sgd_test_acc.append(test_acc) print("SGD : final_time: {}, fina_test_acc: {}".format( time.time() - start, sgd_test_acc[-1])) #plot SGD ''' plt.plot(sgd_time, sgd_grad) plt.xlabel("time") plt.ylabel("grad") plt.title("SGD") plt.show() plt.plot(sgd_time, sgd_rel) plt.xlabel("time") plt.ylabel("relative function") plt.title("SGD") plt.show() plt.plot(sgd_time, sgd_test_acc) plt.xlabel("time") plt.ylabel("test_acc") plt.title("SGD") plt.show() ''' print('starting Newton...') #Newton #set local newton hyper parameter epoch = 50 n_batch = 1000 beta = 0.0001 lr = 0.001 w = np.zeros((X_train.shape[1])) n = X_train.shape[0] nt_grad = [] nt_time = [] nt_rel = [] newton_time = time.time() nt_test_acc = [] w = np.zeros((X_train.shape[1])) n = X_train.shape[0] for i in range(epoch): gamma_t = lr / (1 + beta * i) hessian_total = np.zeros(w.shape) I_ = [] #init I list to compute conjgate gradient for j in range(n // n_batch): X_bc = X[j * n_batch:(j + 1) * n_batch] #make X_batch y_bc = y[j * n_batch:(j + 1) * n_batch] #make y_batch hessian, I = get_hessian(w, lambda_, n, X_bc, y_bc) # get hessian hessian_total += hessian I_.append(I) I_ = np.concatenate(I_) hessian_total += w delta, _ = cg.conjugateGradient( X, I_, hessian_total, lambda_) #get update value from conjugateGradient w = w + delta #update w t = time.time() - newton_time nt_time.append(t) # append to time list grad_ = np.linalg.norm(hessian_total) # get gradient value nt_grad.append(grad_) rel = (get_loss(w, lambda_, X_test, y_test, n_batch) - optimal_loss) / optimal_loss # get relative func value nt_rel.append(rel) test_acc = get_acc(w, lambda_, X_test, y_test, n_batch) # get test accuracy nt_test_acc.append(test_acc) final_time = time.time() - newton_time print("final_time: {}, fina_test_acc: {}".format(final_time, nt_test_acc[-1])) #plot '''
min_value=1.1, max_value=2.0, value=1.4) iterations = st.sidebar.slider("Iterations", min_value=10, max_value=30, value=20) max_loss = st.sidebar.slider("Maximum loss", min_value=5, max_value=20, value=10) # Setting up model and loss K.set_learning_phase(0) model = inception_v3.InceptionV3(weights='imagenet', include_top=False) loss = get_loss(layers_coeff, model) def get_image_download_link(img): """Generates a link allowing the PIL image to be downloaded in: PIL image out: href string """ buffered = BytesIO() img.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode() href = f'<a href="data:file/jpg;base64,{img_str}">Download result</a>' return href if st.button('Start to dream'):
v_preds = utils.interpolate_predictions(wifi_pos_preds, waypoint_times) if mode == 'test': for waypoint_id in range(waypoint_times.shape[0]): test_preds[analysis_site, v['file_meta'].fn, waypoint_times[waypoint_id]] = ( numeric_floor, v_preds[waypoint_id, 0], v_preds[waypoint_id, 1]) else: all_preds_floor.append(v_preds) all_actual_floor.append(v['waypoint'].iloc[:, 2:].values) if mode != 'test': all_preds_floor = np.concatenate(all_preds_floor) all_actual_floor = np.concatenate(all_actual_floor) floor_loss = utils.get_loss(all_preds_floor, all_actual_floor) all_losses[floor_id] = (floor_loss, all_actual_floor.shape[0]) print(f"{floor} loss: {floor_loss:.2f}") if mode != 'test': site_num_obs = all_losses[:, 1].sum() weighted_loss = (all_losses[:, 0] * all_losses[:, 1]).sum() site_loss = weighted_loss / site_num_obs print( f"Site {analysis_site} ({analysis_site_id+1}) loss: {site_loss:.2f}" ) aggregate_scores[analysis_site_id] = (site_loss, site_num_obs) if mode == 'test': submission = utils.convert_to_submission(data_folder, test_preds)
def main(self): if self.uncertainty_sampling_method == 'mc_dropout': uncertainty_sampler = UncertaintySamplingMCDropout() self.args.weak_supervision_strategy = 'semi_supervised_active_learning' elif self.uncertainty_sampling_method == 'augmentations_based': uncertainty_sampler = UncertaintySamplingAugmentationBased() self.args.weak_supervision_strategy = 'semi_supervised_active_learning' elif self.uncertainty_sampling_method == 'entropy_based': uncertainty_sampler = UncertaintySamplingEntropyBased( verbose=True, uncertainty_sampling_method='entropy_based') self.args.weak_supervision_strategy = 'semi_supervised_active_learning' else: uncertainty_sampler = None self.args.weak_supervision_strategy = "random_sampling" dataset_cls = self.datasets[self.args.dataset]( root=self.args.root, add_labeled=self.args.add_labeled, advanced_transforms=True, merged=self.args.merged, remove_classes=self.args.remove_classes, oversampling=self.args.oversampling, unlabeled_subset_ratio=self.args.unlabeled_subset, expand_labeled=self.args.fixmatch_k_img, expand_unlabeled=self.args.fixmatch_k_img * self.args.fixmatch_mu, unlabeled_augmentations=True if self.uncertainty_sampling_method == 'augmentations_based' else False, seed=self.args.seed, start_labeled=self.args.start_labeled) base_dataset, labeled_dataset, unlabeled_dataset, labeled_indices, unlabeled_indices, test_dataset = \ dataset_cls.get_dataset() train_loader, unlabeled_loader, val_loader = create_loaders( self.args, labeled_dataset, unlabeled_dataset, test_dataset, labeled_indices, unlabeled_indices, self.kwargs, dataset_cls.unlabeled_subset_num) labeled_dataset_fix, unlabeled_dataset_fix = dataset_cls.get_datasets_fixmatch( base_dataset, labeled_indices, unlabeled_indices) self.args.lr = 0.0003 model, optimizer, _ = create_model_optimizer_scheduler( self.args, dataset_cls) if self.init == 'pretrained': model = load_pretrained(model) elif self.init == 'autoencoder': model, optimizer, _ = create_model_optimizer_autoencoder( self.args, dataset_cls) elif self.init == 'simclr': model, optimizer, _, _ = create_model_optimizer_simclr( self.args, dataset_cls) labeled_loader_fix = DataLoader(dataset=labeled_dataset_fix, batch_size=self.args.batch_size, shuffle=True, **self.kwargs) unlabeled_loader_fix = DataLoader(dataset=unlabeled_dataset_fix, batch_size=self.args.batch_size, shuffle=True, **self.kwargs) criterion_labeled = get_loss(self.args, dataset_cls.labeled_class_samples, reduction='none') criterion_unlabeled = get_loss(self.args, dataset_cls.labeled_class_samples, reduction='none') criterions = { 'labeled': criterion_labeled, 'unlabeled': criterion_unlabeled } model.zero_grad() best_recall, best_report, last_best_epochs = 0, None, 0 best_model = deepcopy(model) metrics_per_cycle = pd.DataFrame([]) metrics_per_epoch = pd.DataFrame([]) num_class_per_cycle = pd.DataFrame([]) self.args.start_epoch = 0 current_labeled = dataset_cls.start_labeled for epoch in range(self.args.start_epoch, self.args.fixmatch_epochs): train_loader_fix = zip(labeled_loader_fix, unlabeled_loader_fix) train_loss = self.train(train_loader_fix, model, optimizer, epoch, len(labeled_loader_fix), criterions, base_dataset.classes, last_best_epochs) val_loss, val_report = self.validate(val_loader, model, last_best_epochs, criterions) is_best = val_report['macro avg']['recall'] > best_recall last_best_epochs = 0 if is_best else last_best_epochs + 1 val_report = pd.concat([val_report, train_loss, val_loss], axis=1) metrics_per_epoch = pd.concat([metrics_per_epoch, val_report]) if epoch > self.args.labeled_warmup_epochs and last_best_epochs > self.args.add_labeled_epochs: metrics_per_cycle = pd.concat([metrics_per_cycle, best_report]) train_loader, unlabeled_loader, val_loader, labeled_indices, unlabeled_indices = \ perform_sampling(self.args, uncertainty_sampler, None, epoch, model, train_loader, unlabeled_loader, dataset_cls, labeled_indices, unlabeled_indices, labeled_dataset, unlabeled_dataset, test_dataset, self.kwargs, current_labeled, model) labeled_dataset_fix, unlabeled_dataset_fix = dataset_cls.get_datasets_fixmatch( base_dataset, labeled_indices, unlabeled_indices) labeled_loader_fix = DataLoader( dataset=labeled_dataset_fix, batch_size=self.args.batch_size, shuffle=True, **self.kwargs) unlabeled_loader_fix = DataLoader( dataset=unlabeled_dataset_fix, batch_size=self.args.batch_size, shuffle=True, **self.kwargs) current_labeled += self.args.add_labeled last_best_epochs = 0 if self.args.reset_model: if self.init == 'pretrained': model = load_pretrained(model) elif self.init == 'autoencoder': model, optimizer, _ = create_model_optimizer_autoencoder( self.args, dataset_cls) elif self.init == 'simclr': model, optimizer, _, self.args = create_model_optimizer_simclr( self.args, dataset_cls) if self.args.novel_class_detection: num_classes = [ np.sum( np.array(base_dataset.targets)[labeled_indices] == i) for i in range(len(base_dataset.classes)) ] num_class_per_cycle = pd.concat([ num_class_per_cycle, pd.DataFrame.from_dict( { cls: num_classes[i] for i, cls in enumerate(base_dataset.classes) }, orient='index').T ]) criterion_labeled = get_loss(self.args, dataset_cls.labeled_class_samples, reduction='none') criterion_unlabeled = get_loss( self.args, dataset_cls.labeled_class_samples, reduction='none') criterions = { 'labeled': criterion_labeled, 'unlabeled': criterion_unlabeled } else: best_recall = val_report['macro avg'][ 'recall'] if is_best else best_recall best_report = val_report if is_best else best_report best_model = deepcopy(model) if is_best else best_model save_checkpoint( self.args, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_recall, }, is_best) if current_labeled > self.args.stop_labeled: break if self.args.store_logs: store_logs(self.args, metrics_per_cycle) store_logs(self.args, metrics_per_epoch, log_type='epoch_wise') store_logs(self.args, num_class_per_cycle, log_type='novel_class') return best_recall
def main(self): dataset_cl = self.datasets[self.args.dataset](root=self.args.root, add_labeled=self.args.add_labeled, advanced_transforms=True, merged=self.args.merged, remove_classes=self.args.remove_classes, oversampling=self.args.oversampling, unlabeled_subset_ratio=self.args.unlabeled_subset, seed=self.args.seed, start_labeled=self.args.start_labeled) base_dataset, labeled_dataset, unlabeled_dataset, labeled_indices, unlabeled_indices, test_dataset = \ dataset_cl.get_dataset() train_loader, unlabeled_loader, val_loader = create_loaders(self.args, labeled_dataset, unlabeled_dataset, test_dataset, labeled_indices, unlabeled_indices, self.kwargs, dataset_cl.unlabeled_subset_num) model_backbone, optimizer_backbone, _ = create_model_optimizer_scheduler(self.args, dataset_cl) model_module = LossNet().cuda() optimizer_module = torch.optim.Adam(model_module.parameters()) models = {'backbone': model_backbone, 'module': model_module} optimizers = {'backbone': optimizer_backbone, 'module': optimizer_module} criterion_backbone = get_loss(self.args, dataset_cl.labeled_class_samples, reduction='none') criterions = {'backbone': criterion_backbone, 'module': loss_module_objective_func} uncertainty_sampler = UncertaintySamplingEntropyBased(verbose=True, uncertainty_sampling_method=self.args. uncertainty_sampling_method) current_labeled = dataset_cl.start_labeled metrics_per_cycle = pd.DataFrame([]) metrics_per_epoch = pd.DataFrame([]) num_class_per_cycle = pd.DataFrame([]) print_args(self.args) best_recall, best_report, last_best_epochs = 0, None, 0 best_model = deepcopy(models['backbone']) for epoch in range(self.args.start_epoch, self.args.epochs): train_loss = self.train(train_loader, models, optimizers, criterions, epoch, last_best_epochs) val_loss, val_report = self.validate(val_loader, models, criterions, last_best_epochs) is_best = val_report['macro avg']['recall'] > best_recall last_best_epochs = 0 if is_best else last_best_epochs + 1 val_report = pd.concat([val_report, train_loss, val_loss], axis=1) metrics_per_epoch = pd.concat([metrics_per_epoch, val_report]) if epoch > self.args.labeled_warmup_epochs and last_best_epochs > self.args.add_labeled_epochs: metrics_per_cycle = pd.concat([metrics_per_cycle, best_report]) train_loader, unlabeled_loader, val_loader, labeled_indices, unlabeled_indices = \ perform_sampling(self.args, uncertainty_sampler, None, epoch, models, train_loader, unlabeled_loader, dataset_cl, labeled_indices, unlabeled_indices, labeled_dataset, unlabeled_dataset, test_dataset, self.kwargs, current_labeled, None) current_labeled += self.args.add_labeled last_best_epochs = 0 if self.args.reset_model: model_backbone, optimizer_backbone, scheduler_backbone = \ create_model_optimizer_scheduler(self.args, dataset_cl) model_module, optimizer_module = create_model_optimizer_loss_net() models = {'backbone': model_backbone, 'module': model_module} optimizers = {'backbone': optimizer_backbone, 'module': optimizer_module} if self.args.novel_class_detection: num_classes = [np.sum(np.array(base_dataset.targets)[labeled_indices] == i) for i in range(len(base_dataset.classes))] num_class_per_cycle = pd.concat([num_class_per_cycle, pd.DataFrame.from_dict({cls: num_classes[i] for i, cls in enumerate(base_dataset.classes)}, orient='index').T]) criterion_backbone = get_loss(self.args, dataset_cl.labeled_class_samples, reduction='none') criterions = {'backbone': criterion_backbone, 'module': loss_module_objective_func} else: best_recall = val_report['macro avg']['recall'] if is_best else best_recall best_report = val_report if is_best else best_report best_model = deepcopy(models['backbone']) if is_best else best_model if current_labeled > self.args.stop_labeled: break save_checkpoint(self.args, { 'epoch': epoch + 1, 'state_dict': model_backbone.state_dict(), 'best_prec1': best_recall, }, is_best) if self.args.store_logs: store_logs(self.args, metrics_per_cycle) store_logs(self.args, metrics_per_epoch, log_type='epoch_wise') store_logs(self.args, num_class_per_cycle, log_type='novel_class') return best_recall