def __init__(self, args): self.device = args.device self.batch_size = args.bz self.network = get_network(args.network_name, args.architecture) self.world_size = self.network.number_of_nodes() + 1 # we add node representing the network manager self.log_freq = args.log_freq # create logger logger_path = os.path.join("loggs", args_to_string(args), args.architecture) self.logger = SummaryWriter(logger_path) self.round_idx = 0 # index of the current communication round self.train_dir = os.path.join("data", args.experiment, "train") self.test_dir = os.path.join("data", args.experiment, "test") self.train_path = os.path.join(self.train_dir, "train" + EXTENSIONS[args.experiment]) self.test_path = os.path.join(self.test_dir, "test" + EXTENSIONS[args.experiment]) self.train_iterator = get_iterator(args.experiment, self.train_path, self.device, self.batch_size) self.test_iterator = get_iterator(args.experiment, self.test_path, self.device, self.batch_size) self.gather_list = [get_model(args.experiment, self.device, self.train_iterator) for _ in range(self.world_size)] self.scatter_list = [get_model(args.experiment, self.device, self.train_iterator) for _ in range(self.world_size)] # print initial logs self.write_logs()
def per_channel_normalization_norm(model, norm, normalization_lst_method_name="get_conv_indices_set"): model = get_model(model) if norm == 0 or not hasattr(model, normalization_lst_method_name): return None layers_set = getattr(model, normalization_lst_method_name)() channels_params = [{'params': params.data, 'name': name, 'norm': norm} for l, (name, params) in enumerate(model.named_parameters()) if l in layers_set] return WeightsNormalization(channels_params)
def main(): # read train config conf = read_config_file("configs/train_config_copy.json") model_suffix = datetime.now().strftime("%Y%m%d-%H%M%S") # get datasets data_df = load_csv(csv_path=conf["csv_path"], shuffle=True, seed=42) train_data_df, valid_data_df = train_valid_split(data_df=data_df, train_percent=0.8, seed=42) train_data = MorphDataset(data_df=train_data_df, img_dir=conf["img_dir"], img_size=(conf["img_size"], conf["img_size"]), channels=3, seed=42) valid_data = MorphDataset(data_df=valid_data_df, img_dir=conf["img_dir"], img_size=(conf["img_size"], conf["img_size"]), channels=3, seed=42) train_size = train_data.size valid_size = valid_data.size label_array = train_data.label_array if conf["task_weighting"] else None n_classes = len(train_data.classes) train_data = train_data.batch(batch_size=conf["batch_size"]) valid_data = valid_data.batch(batch_size=conf["batch_size"]) # get model model = get_model(conf=conf, n_classes=n_classes, label_array=label_array) model.summary() # get train callback functions callbacks = get_callbacks(conf, model_suffix) # train model model.fit(train_data, epochs=conf["num_epochs"], steps_per_epoch=train_size//conf["batch_size"], validation_data=valid_data if valid_data is not None else None, validation_steps=valid_size//conf["batch_size"] if valid_data is not None else None, callbacks=callbacks) # save model model_name_prefix = str(conf["model_prefix"]) + str(conf["arch_type"]) if conf["arch_type"] == "resnet": model_name_prefix += str(conf["arch_subtype"]) model.save(model_name_prefix + "_" + model_suffix + '.h5')
def main(): parser = get_parser_ens() args = parser.parse_args() args.method = os.path.basename(__file__).split('-')[1][:-3] if args.aug_test: args.method = args.method + '_augment' torch.backends.cudnn.benchmark = True compute = { 'CIFAR10': ['VGG16BN', 'PreResNet110', 'PreResNet164', 'WideResNet28x10'], 'CIFAR100': ['VGG16BN', 'PreResNet110', 'PreResNet164', 'WideResNet28x10'], 'ImageNet': ['ResNet50'] } for model in compute[args.dataset]: args.model = model logger = Logger(base='./logs/') print('-' * 5, 'Computing results of', model, 'on', args.dataset + '.', '-' * 5) loaders, num_classes = get_data(args) targets = get_targets(loaders['test'], args) args.num_classes = num_classes model = get_model(args) for run in range(1, 6): log_probs = [] fnames = read_models(args, base=os.path.expanduser(args.models_dir), run=run if args.dataset != 'ImageNet' else -1) fnames = sorted(fnames, key=lambda a: int(a.split('-')[-1].split('.')[0])) for ns in range(100)[:min( len(fnames), 100 if args.dataset != 'ImageNet' else 50)]: start = time.time() model.load_state_dict(get_sd(fnames[ns], args)) ones_log_prob = one_sample_pred(loaders['test'], model) log_probs.append(ones_log_prob) logger.add_metrics_ts(ns, log_probs, targets, args, time_=start) logger.save(args) os.makedirs('.megacache', exist_ok=True) logits_pth = '.megacache/logits_%s-%s-%s-%s-%s' logits_pth = logits_pth % (args.dataset, args.model, args.method, ns + 1, run) log_prob = logsumexp(np.dstack(log_probs), axis=2) - np.log(ns + 1) print('Save final logprobs to %s' % logits_pth, end='\n\n') np.save(logits_pth, log_prob)
def run(cfg): '''Load save path''' cfg.log_string('Data save path: %s' % (cfg.save_path)) checkpoint = CheckpointIO(cfg) '''Load device''' cfg.log_string('Loading device settings.') device = load_device(cfg) '''Load data''' cfg.log_string('Loading dataset.') train_loader = get_dataloader(cfg.config, mode='train') test_loader = get_dataloader(cfg.config, mode='test') '''Load net''' cfg.log_string('Loading model.') net = get_model(cfg.config, device=device) if isinstance(net, list): checkpoint.register_modules(voxnet=net[0]) checkpoint.register_modules(refnet=net[1]) else: checkpoint.register_modules(voxnet=net) cfg.log_string('loading loss function') loss_func = get_loss(cfg.config, device) '''Load optimizer''' cfg.log_string('Loading optimizer.') optimizer = get_optimizer(config=cfg.config, net=net) if isinstance(net, list): checkpoint.register_modules(voxopt=optimizer[0]) checkpoint.register_modules(refopt=optimizer[1]) else: checkpoint.register_modules(voxopt=optimizer) '''Load scheduler''' cfg.log_string('Loading optimizer scheduler.') scheduler = load_scheduler(config=cfg.config, optimizer=optimizer) if isinstance(net, list): checkpoint.register_modules(voxsch=scheduler[0]) checkpoint.register_modules(refsch=scheduler[1]) else: checkpoint.register_modules(voxsch=scheduler) '''Load trainer''' cfg.log_string('Loading trainer.') trainer = get_trainer(cfg.config) '''Start to train''' cfg.log_string('Start to train.') #cfg.log_string('Total number of parameters in {0:s}: {1:d}.'.format(cfg.config['method'], sum(p.numel() for p in net.parameters()))) trainer(cfg, net, loss_func, optimizer, scheduler, train_loader=train_loader, test_loader=test_loader, device=device, checkpoint=checkpoint) cfg.log_string('Training finished.')
def gencloned(self): # DEVICETYPE SERVICE TYPE SERVICE ID os.system('clear') print('Which domain you want for the device cloned?\n') domain_rev = input("\nDomain '[NAME].local': ") # LOGIN ROUTE os.system('clear') print('Where the webpage is located?\n' 'folder/./file.ext or file.ext') web = input("\nWebpage location: ") FN = utils.get_friendlyname(None) MF = utils.get_manufacturer(None) MD = utils.get_model(None) UUID = utils.get_uuid(None) urn1 = utils.get_device(None) urn2 = utils.get_service(None) urn3 = utils.get_serviceid(None) xml = ('<?xml version="1.0"?>\r\n' '<root xmlns="urn:schemas-upnp-org:device-1-0">\r\n' '\t<specVersion>\r\n' '\t\t<major>1</major>\r\n' '\t\t<minor>0</minor>\r\n' '\t</specVersion>\r\n' '\t<URLBase>http://' + domain_rev + ':8008</URLBase>\r\n' '\t<device>\r\n' '\t\t<presentationURL>http://' + domain_rev + ':8008/' + web + '</presentationURL>\r\n' '\t\t<deviceType>' + urn1 + '</deviceType>\r\n' '\t\t<friendlyName>' + FN + '</friendlyName>\r\n' '\t\t<modelDescription>Connect to access</modelDescription>\r\n' '\t\t<manufacturer>' + MF + '</manufacturer>\r\n' '\t\t<modelName>' + MD + '</modelName>\r\n' '\t\t<UDN>' + UUID + '</UDN>\r\n' '\t\t<serviceList>\r\n' '\t\t\t<service>\r\n' '\t\t\t\t<serviceType>' + urn2 + '</serviceType>\r\n' '\t\t\t\t<serviceId>' + urn3 + '</serviceId>\r\n' '\t\t\t\t<controlURL>/ssdp/notfound</controlURL>\r\n' '\t\t\t\t<eventSubURL>/ssdp/notfound</eventSubURL>\r\n' '\t\t\t\t<SCPDURL>/ssdp/notfound</SCPDURL>\r\n' '\t\t\t</service>\r\n' '\t\t</serviceList>\r\n' '\t</device>\r\n' '</root>') tfile = open('ssdp.xml', 'w+') tfile.write(xml) tfile.close() os.system('clear') print('You want to start the server now or exit?') choose = input("\n1) Start the server or 2) Exit: ") os.system('clear') if choose == "1": server() elif choose == "2": BYE()
def main(): parser = get_parser_ens() args = parser.parse_args() args.method = os.path.basename(__file__).split('-')[1][:-3] torch.backends.cudnn.benchmark = True if args.aug_test: args.method = args.method + '_augment' print('Computing for all datasets!') compute = { 'CIFAR10': ['VGG16BN', 'WideResNet28x10do'], 'CIFAR100': ['VGG16BN', 'WideResNet28x10do'] } for model in compute[args.dataset]: args.model = model logger = Logger() print('-' * 5, 'Computing results of', model, 'on', args.dataset + '.', '-' * 5) loaders, num_classes = get_data(args) targets = get_targets(loaders['test'], args) fnames = read_models(args, base=os.path.expanduser(args.models_dir)) args.num_classes = num_classes model = get_model(args) for try_ in range(1, 6): fnames = np.random.permutation(fnames) model.load_state_dict(get_sd(fnames[0], args)) log_probs = [] for ns in range(100): start = time.time() ones_log_prob = one_sample_pred(loaders['test'], model) log_probs.append(ones_log_prob) logger.add_metrics_ts(ns, log_probs, targets, args, time_=start) logger.save(args) os.makedirs('./.megacache', exist_ok=True) logits_pth = '.megacache/logits_%s-%s-%s-%s-%s' logits_pth = logits_pth % (args.dataset, args.model, args.method, ns + 1, try_) log_prob = logsumexp(np.dstack(log_probs), axis=2) - np.log(ns + 1) print('Save final logprobs to %s' % logits_pth) np.save(logits_pth, log_prob) print('Used weights from %s' % fnames[0], end='\n\n')
def sgd_lastlayerwd0_0005_otherlayerswd0_lr0_1_norm_sched_instead(model, **kwargs): conv_layers_indices = get_model(model).get_conv_indices_set() lastlayer_params = [{'params': params, 'name': name, 'weight_decay': 5e-4} for l, (name, params) in enumerate(model.named_parameters()) if "lastlayer" in name] notconv_notlastlayer_params = [{'params': params, 'name': name, 'weight_decay': 0} for l, (name, params) in enumerate(model.named_parameters()) if (not ("lastlayer" in name)) and l not in conv_layers_indices] convlayer_params = [{'params': params, 'name': name, 'weight_decay': 0, 'l_idx': l, 'wd_norms': None} for l, (name, params) in enumerate(model.named_parameters()) if (not ("lastlayer" in name)) and l in conv_layers_indices] all_params = lastlayer_params + notconv_notlastlayer_params + convlayer_params return SGDWDMimicNormSchedInsteadLR(all_params, momentum=0.9, lr=0.1, weight_decay=0.0)
def main(config): print(config) # initialize model model = get_model(config) # load weights from checkpoint state_dict = load_weights(config['resume_ckpt']) model.load_state_dict(state_dict) model.cuda() model.eval() train_dataloader, test_dataloader, val_dataloader = setup_dataloaders( config) eval_encoding(config, model, train_dataloader, test_dataloader, val_dataloader)
def main(): parser = get_parser_ens() args = parser.parse_args() args.method = os.path.basename(__file__).split('-')[1][:-3] torch.backends.cudnn.benchmark = True if args.aug_test: args.method = args.method + '_augment' os.makedirs('./logs', exist_ok=True) compute = { 'CIFAR10': ['BayesVGG16BN', 'BayesPreResNet110', 'BayesPreResNet164', 'BayesWideResNet28x10'], 'CIFAR100': ['BayesVGG16BN', 'BayesPreResNet110', 'BayesPreResNet164', 'BayesWideResNet28x10'], 'ImageNet': ['BayesResNet50'] } for model in compute[args.dataset]: args.model = model logger = Logger() print('-'*5, 'Computing results of', model, 'on', args.dataset + '.', '-'*5) loaders, num_classes = get_data(args) targets = get_targets(loaders['test'], args) fnames = read_models(args, base=os.path.expanduser(args.models_dir)) args.num_classes = num_classes model = get_model(args) for run in range(1, 6): print('Repeat num. %s' % run) log_probs = [] checkpoint = get_sd(fnames[0], args) model.load_state_dict(checkpoint) for ns in range(100 if args.dataset != 'ImageNet' else 50): start = time.time() ones_log_prob = one_sample_pred(loaders['test'], model) log_probs.append(ones_log_prob) logger.add_metrics_ts(ns, log_probs, targets, args, time_=start) logger.save(args) os.makedirs('.megacache', exist_ok=True) logits_pth = '.megacache/logits_%s-%s-%s-%s-%s' logits_pth = logits_pth % (args.dataset, args.model, args.method, ns+1, run) log_prob = logsumexp(np.dstack(log_probs), axis=2) - np.log(ns+1) np.save(logits_pth, log_prob)
def main(argv): del argv confusions = [float(t) for t in FLAGS.confusions.split(" ")] mixtures = [float(t) for t in FLAGS.active_sampling_percentage.split(" ")] max_dataset_size = None if FLAGS.max_dataset_size == 0 else FLAGS.max_dataset_size starting_seed = 42 for c in confusions: for m in mixtures: for seed in range(starting_seed, starting_seed + FLAGS.trials): sampler = get_AL_sampler(FLAGS.sampling_method) score_model = utils.get_model(seed) results, sampler_state = generate_one_curve( sampler, score_model, seed, FLAGS.warmstart_size, FLAGS.batch_size, c, m, FLAGS.train_horizon)
def per_channel_normalization_norm_as_wd(model, norm, normalization_lst_method_name="get_conv_indices_set", norms_dict=None): model = get_model(model) if not hasattr(model, normalization_lst_method_name): return None layers_set = getattr(model, normalization_lst_method_name)() import numpy as np norms_dict_for_l = {} for l in layers_set: norms_dict_for_l[l] = norms_dict["channels_norm_conv_w_norm_per_channel_layer" + str(l)] for ep in range(0,norms_dict["channels_norm_conv_w_norm_per_channel_layer" + str(l)].__len__()): factor = float(np.sqrt(10))**(ep//20) norms_dict_for_l[l][ep] *= factor channels_params = [{'params': params.data, 'name': name, 'norm': norms_dict_for_l[l]} for l, (name, params) in enumerate(model.named_parameters()) if l in layers_set] return WeightsNormalization(channels_params)
def __init__(self, args): # common args self.args = args self.best_miou = -1.0 self.dataset_name = args.dataset_name self.debug = args.debug self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu:0") self.dir_checkpoints = f"{args.dir_root}/checkpoints/{args.experim_name}" self.experim_name = args.experim_name self.ignore_index = args.ignore_index self.init_n_pixels = args.n_init_pixels self.max_budget = args.max_budget self.n_classes = args.n_classes self.n_epochs = args.n_epochs self.n_pixels_by_us = args.n_pixels_by_us self.network_name = args.network_name self.nth_query = -1 self.stride_total = args.stride_total self.dataloader = get_dataloader(deepcopy(args), val=False, query=False, shuffle=True, batch_size=args.batch_size, n_workers=args.n_workers) self.dataloader_query = get_dataloader(deepcopy(args), val=False, query=True, shuffle=False, batch_size=1, n_workers=args.n_workers) self.dataloader_val = get_dataloader(deepcopy(args), val=True, query=False, shuffle=False, batch_size=1, n_workers=args.n_workers) self.model = get_model(args).to(self.device) self.lr_scheduler_type = args.lr_scheduler_type self.query_selector = QuerySelector(args, self.dataloader_query) self.vis = Visualiser(args.dataset_name) # for tracking stats self.running_loss, self.running_score = AverageMeter(), RunningScore( args.n_classes)
def _train(self): print(f"\n({self.experim_name}) training...\n") model = get_model(self.args).to(self.device) optimizer = get_optimizer(self.args, model) lr_scheduler = get_lr_scheduler(self.args, optimizer=optimizer, iters_per_epoch=len(self.dataloader)) for e in range(1, 1 + self.n_epochs): model, optimizer, lr_scheduler = self._train_epoch( e, model, optimizer, lr_scheduler) self._val(e, model) if self.debug: break self.best_miou = -1.0 return model
torch.save(state, args.cv_dir + '/ckpt_E_%d_A_%.3f' % (epoch, accuracy)) #--------------------------------------------------------------------------------------------------------# trainset, testset = utils.get_dataset(args.model, args.data_dir) trainloader = torchdata.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8) testloader = torchdata.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=8) # Load the Model rnet, _, _ = utils.get_model(args.model) rnet.cuda() # Load the pre-trained classifier if args.load: checkpoint = torch.load(args.load) rnet.load_state_dict(checkpoint['state_dict']) # Save the configuration to the output directory configure(args.cv_dir + '/log', flush_secs=5) # Define the optimizer if args.model.split('_')[1] == 'C10' or args.model.split('_')[1] == 'C100': optimizer = optim.SGD(rnet.parameters(), lr=args.lr, momentum=0.9,
from utils.bunch import bunch import sys import pprint if __name__ == '__main__': args, excluded_args, parser = get_args() args = bunch.bunchify(args) set_torch_seed(args.seed) device = set_gpu(args.gpu) datasets = get_data(args) tasks = get_tasks(args) backbone = get_backbone(args, device) strategy = get_strategy(args, device) model = get_model(backbone, tasks, datasets, strategy, args, device) compressed_args = compress_args(bunch.unbunchify(args), parser) print(" ----------------- FULL ARGS (COMPACT) ----------------") pprint.pprint(compressed_args, indent=2) print(" ------------------------------------------------------") print(" ------------------ UNRECOGNISED ARGS -----------------") pprint.pprint(excluded_args, indent=2) print(" ------------------------------------------------------") system = ExperimentBuilder(model, tasks, datasets, device, args) system.load_pretrained() system.train_model() system.evaluate_model()
def main_per_worker(process_index, ngpus_per_node, args): update_config(cfg, args) # torch seed torch.cuda.manual_seed(random.random()) # cudnn cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED #proc_rank proc_rank = args.rank * ngpus_per_node + process_index #create logger logger, output_dir = create_logger(cfg, proc_rank) # logger.info(pprint.pformat(args)) # logger.info(cfg) model = get_model(cfg, cfg.MODEL.FILE, cfg.MODEL.NAME) emb = InceptionResnetV1(pretrained='vggface2', classify=False) assert cfg.MODEL.APPEARANCE.WEIGHTS != '' load_eval_model(cfg.MODEL.APPEARANCE.WEIGHTS, emb) # TODO change based on the paper optimizer = get_optimizer(cfg, model) model, optimizer, last_iter = load_checkpoint(cfg, model, optimizer) lr_scheduler = get_lr_scheduler(cfg, optimizer, last_iter) transform = FacenetInferenceTransform(size=(cfg.TRAIN.INPUT_MIN, cfg.TRAIN.INPUT_MAX)) train_dataset = TrackletpairDataset(cfg.DATASET.ROOT, transform=transform, is_train=True) eval_dataset = TrackletpairDataset(cfg.DATASET.ROOT, transform=transform, is_train=False) # distribution if args.distributed: logger.info( f'Init process group: dist_url: {args.dist_url}, ' f'world_size: {args.world_size}, ' f'machine: {args.rank}, ' f'rank:{proc_rank}' ) dist.init_process_group( backend=cfg.DIST_BACKEND, init_method=args.dist_url, world_size=args.world_size, rank=proc_rank ) torch.cuda.set_device(process_index) model.cuda() emb.cuda() model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[process_index] ) emb = torch.nn.parallel.DistributedDataParallel( emb, device_ids=[process_index] ) train_sampler = BalancedBatchSampler( train_dataset ) batch_size = cfg.DATASET.IMG_NUM_PER_GPU else: assert proc_rank == 0, ('proc_rank != 0, it will influence ' 'the evaluation procedure') model = torch.nn.DataParallel(model).cuda() emb = torch.nn.DataParallel(emb).cuda() train_sampler = BalancedBatchSampler( train_dataset ) batch_size = cfg.DATASET.IMG_NUM_PER_GPU * ngpus_per_node train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), drop_last=False, collate_fn=tracklet_pair_collect, num_workers=cfg.WORKERS, pin_memory=True, sampler=train_sampler ) eval_loader = torch.utils.data.DataLoader( eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False, collate_fn=tracklet_pair_collect, num_workers=cfg.WORKERS ) criterion = nn.CrossEntropyLoss() Trainer = trackletpairConnectTrainer( cfg, model, optimizer, lr_scheduler, criterion, output_dir, 'acc', last_iter, proc_rank, pre_ap_model=emb, ) while True: Trainer.train(train_loader, eval_loader) # eval Trainer.evaluate(eval_loader)
args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) config['config_file'] = args.config.replace('/','.').split('.')[-2] seed = config['seed'] np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) n_epochs = config['optimization']['n_epochs'] if not args.disable_cuda and torch.cuda.is_available(): device = torch.device('cuda:{}'.format(args.gpu)) else: device = torch.device('cpu') logger = Logger(config) model = get_model(config['model']) optim = get_optimizer(model.parameters(),config['optimization']) train_loader, valid_loader, test_loader = get_data(config['data']) ## Train for i in range(n_epochs): for data, label in train_loader: break
'reward': reward, } torch.save(state, args.cv_dir + '/ckpt_E_%d_R_%.2E' % (epoch, reward)) #--------------------------------------------------------------------------------------------------------# trainset, testset = utils.get_dataset(args.img_size, args.data_dir) trainloader = torchdata.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) testloader = torchdata.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) agent = utils.get_model(num_actions) # ---- Load the pre-trained model ---------------------- start_epoch = 0 if args.load is not None: checkpoint = torch.load(args.load) agent.load_state_dict(checkpoint['agent']) start_epoch = checkpoint['epoch'] + 1 print('loaded agent from %s' % args.load) # Parallelize the models if multiple GPUs available - Important for Large Batch Size to Reduce Variance if args.parallel: agent = nn.DataParallel(agent) agent.cuda() # Update the parameters of the policy network
# {'frame_id': (obj_num, crop_min, crop_max, 3)} crop_im[label][frame_id] = crop_img[label_idx] # {'frame_id': (obj_num, emb_size+4+1+3) emb x y w h label(float) crop_index(array(1))} if obj_num: det_result[label][frame_id] = np.zeros( (obj_num, emb_size + 4 + 1 + 1)) - 1 det_result[label][frame_id][:, :emb_size] = img_embs[label_idx] det_result[label][frame_id][:, emb_size:emb_size + 4] = boxes[label_idx] det_result[label][frame_id][:, emb_size + 4] = labels[label_idx] det_result[label][frame_id][:, emb_size + 5:] = np.array( range(obj_num)).reshape(-1, 1) # init cluster tnt_model = get_model(cfg, cfg.MODEL.FILE, cfg.MODEL.NAME) if cfg.MODEL.RESUME_PATH != '': load_eval_model(cfg.MODEL.RESUME_PATH, tnt_model) tnt_model.cuda().eval() cluster_per_label = {} for label in range(1, cfg.DATASET.NUM_CLASSES): if len(det_result[label]) == 0: continue start = time.time() # use coarse constriant to get coarse track dict print("processing label: ", class_dict[label]) print("det num: ", len(det_result[label])) coarse_track_dict = merge_det(det_result[label], crop_im[label]) det_result[label].clear() crop_im[label].clear()
torch.save( state, args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E' % (epoch, accuracy, reward)) #--------------------------------------------------------------------------------------------------------# trainset, testset = utils.get_dataset(args.model, args.data_dir) trainloader = torchdata.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8) testloader = torchdata.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=8) rnet, _, agent = utils.get_model(args.model) # Save the args to the checkpoint directory configure(args.cv_dir + '/log', flush_secs=5) # Agent Action Space mappings, _, patch_size = utils.action_space_model(args.model.split('_')[1]) # Load the classifier - has to exist checkpoint = torch.load(args.ckpt_hr_cl) rnet.load_state_dict(checkpoint['state_dict']) print('loaded the high resolution classifier') # Load the Policy Network start_epoch = 0 if args.load is not None:
myTrainer.update_lr(epoch, schedule) myTrainer.sparsification(epoch) train_1 = t_classifier.evaluate(myTrainer.model, evaluator_iterator, 0, train_end) print("*********CURRENT EPOCH********** : %d" % epoch) print("Train Classifier top-1 (Softmax): %0.2f" % train_1) if epoch % 5 == (4): if t == 0: test_1 = t_classifier.evaluate(myTrainer.model, test_iterator, test_start, test_end, mode='test', step_size=args.step_size) print("Test Classifier top-1 (Softmax): %0.2f" % test_1) if test_1 > best_acc: best_acc = test_1 best_model = utils.get_model(myModel) print("change best model") else: correct, stat = t_classifier.evaluate(myTrainer.model, test_iterator, test_start, test_end, mode='test', step_size=args.step_size) print("Test Classifier top-1 (Softmax, all): %0.2f" % correct['all']) print("Test Classifier top-1 (Softmax, pre): %0.2f" % correct['pre']) print("Test Classifier top-1 (Softmax, new): %0.2f" % correct['new']) print("Test Classifier top-1 (Softmax, intra_pre): %0.2f" % correct['intra_pre']) print("Test Classifier top-1 (Softmax, intra_new): %0.2f" % correct['intra_new']) if correct['intra_pre'] > best_acc: best_acc = correct['intra_pre'] best_model = utils.get_model(myModel)
use_cuda = torch.cuda.is_available() ## get logger logger = getLogger(self.config.model_name, self.config.task_name, self.config.path) logger.info("Task Name : {}".format(self.config.task_name)) logger.info("Backbone_name : {}".format(self.config.model_name)) logger.info("input_shape : (3,{}.{})".format(self.config.input_shape,self.config.input_shape)) logger.info("num_epochs : {}".format(self.config.num_epochs)) logger.info("resume_from : {}".format(self.config.resume_from)) logger.info("pretrained : {}".format(self.config.pretrained)) ## tensorboard writer log_dir = os.path.join(self.config.path,"{}".format("tensorboard_log")) if not os.path.isdir(log_dir): os.mkdir(log_dir) writer = SummaryWriter(log_dir) ## get model of train net = get_model(self.config.model_name) net = torch.nn.DataParallel(net, device_ids = self.config.device_ids) net = net.cuda(device = device_ids[0]) ## loss criterion = nn.CrossEntropyLoss() ## optimizer if self.config.optimizers == 'SGD': optimizer = optim.SGD(net.parameters(), lr=self.config.init_lr, momentum=0.9, weight_decay=self.config.weight_decay) elif self.config.optimizers == 'Adam': optimizer = optim.Adam(net.parameters(), lr=self.config.init_lr, weight_decay=self.config.weight_decay) milestones = [80,150,200,300] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) logger.info(("============opti===========")) logger.info("Optimizer:{}".format(self.config.optimizers)) logger.info("lr:{}".format(self.config.init_lr)) logger.info("weight_decay:{}".format(self.config.weight_decay))
torch.save( state, args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E' % (epoch, accuracy, reward)) #--------------------------------------------------------------------------------------------------------# trainset, testset = utils.get_dataset(args.model, args.data_dir) trainloader = torchdata.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8) testloader = torchdata.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=8) rnet_hr, rnet_lr, agent = utils.get_model(args.model) rnet_hr.cuda() rnet_lr.eval().cuda() agent.cuda() # Save the configurations into the output folder configure(args.cv_dir + '/log', flush_secs=5) # Action Space mappings, img_size, patch_size = utils.action_space_model( args.model.split('_')[1]) # Load the Low-res classifier if args.ckpt_lr_cl is not None: checkpoint = torch.load(args.ckpt_lr_cl) if args.model.split('_')[1] == 'C10' or args.model.split('_')[1] == 'C100':
def main(config): save_path = config['save_path'] epochs = config['epochs'] os.environ['TORCH_HOME'] = config['torch_home'] distributed = config['use_DDP'] start_ep = 0 start_cnt = 0 # initialize model print("Initializing model...") if distributed: initialize_distributed(config) rank = config['rank'] # map string name to class constructor model = get_model(config) model.apply(init_weights) if config['resume_ckpt'] is not None: # load weights from checkpoint state_dict = load_weights(config['resume_ckpt']) model.load_state_dict(state_dict) print("Moving model to GPU") model.cuda(torch.cuda.current_device()) print("Setting up losses") if config['use_vgg']: criterionVGG = Vgg19PerceptualLoss(config['reduced_w']) criterionVGG.cuda() validationLoss = criterionVGG if config['use_gan']: use_sigmoid = config['no_lsgan'] disc_input_channels = 3 discriminator = MultiscaleDiscriminator(disc_input_channels, config['ndf'], config['n_layers_D'], 'instance', use_sigmoid, config['num_D'], False, False) discriminator.apply(init_weights) if config['resume_ckpt_D'] is not None: # load weights from checkpoint print("Resuming discriminator from %s" % (config['resume_ckpt_D'])) state_dict = load_weights(config['resume_ckpt_D']) discriminator.load_state_dict(state_dict) discriminator.cuda(torch.cuda.current_device()) criterionGAN = GANLoss(use_lsgan=not config['no_lsgan']) criterionGAN.cuda() criterionFeat = nn.L1Loss().cuda() if config['use_l2']: criterionMSE = nn.MSELoss() criterionMSE.cuda() validationLoss = criterionMSE # initialize dataloader print("Setting up dataloaders...") train_dataloader, val_dataloader, train_sampler = setup_dataloaders(config) print("Done!") # run the training loop print("Initializing optimizers...") optimizer_G = optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay']) if config['resume_ckpt_opt_G'] is not None: optimizer_G_state_dict = torch.load( config['resume_ckpt_opt_G'], map_location=lambda storage, loc: storage) optimizer_G.load_state_dict(optimizer_G_state_dict) if config['use_gan']: optimizer_D = optim.Adam(discriminator.parameters(), lr=config['learning_rate']) if config['resume_ckpt_opt_D'] is not None: optimizer_D_state_dict = torch.load( config['resume_ckpt_opt_D'], map_location=lambda storage, loc: storage) optimizer_D.load_state_dict(optimizer_D_state_dict) print("Done!") if distributed: print("Moving model to DDP...") model = DDP(model) if config['use_gan']: discriminator = DDP(discriminator, delay_allreduce=True) print("Done!") tb_logger = None if rank == 0: tb_logdir = os.path.join(save_path, 'tbdir') if not os.path.exists(tb_logdir): os.makedirs(tb_logdir) tb_logger = SummaryWriter(tb_logdir) # run training if not os.path.exists(save_path): os.makedirs(save_path) log_name = os.path.join(save_path, 'loss_log.txt') opt_name = os.path.join(save_path, 'opt.yaml') print(config) save_options(opt_name, config) log_handle = open(log_name, 'a') print("Starting training") cnt = start_cnt assert (config['use_warped'] or config['use_temporal']) for ep in range(start_ep, epochs): if train_sampler is not None: train_sampler.set_epoch(ep) for curr_batch in train_dataloader: optimizer_G.zero_grad() input_a = curr_batch['input_a'].cuda() target = curr_batch['target'].cuda() if config['use_warped'] and config['use_temporal']: input_a = torch.cat((input_a, input_a), 0) input_b = torch.cat((curr_batch['input_b'].cuda(), curr_batch['input_temporal'].cuda()), 0) target = torch.cat((target, target), 0) elif config['use_temporal']: input_b = curr_batch['input_temporal'].cuda() elif config['use_warped']: input_b = curr_batch['input_b'].cuda() output_dict = model(input_a, input_b) output_recon = output_dict['reconstruction'] loss_vgg = loss_G_GAN = loss_G_feat = loss_l2 = 0 if config['use_vgg']: loss_vgg = criterionVGG(output_recon, target) * config['vgg_lambda'] if config['use_gan']: predicted_landmarks = output_dict['input_a_gauss_maps'] # output_dict['reconstruction'] can be considered normalized loss_G_GAN, loss_D_real, loss_D_fake = apply_GAN_criterion( output_recon, target, predicted_landmarks.detach(), discriminator, criterionGAN) loss_D = (loss_D_fake + loss_D_real) * 0.5 if config['use_l2']: loss_l2 = criterionMSE(output_recon, target) * config['l2_lambda'] loss_G = loss_G_GAN + loss_G_feat + loss_vgg + loss_l2 loss_G.backward() # grad_norm clipping if not config['no_grad_clip']: torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer_G.step() if config['use_gan']: optimizer_D.zero_grad() loss_D.backward() # grad_norm clipping if not config['no_grad_clip']: torch.nn.utils.clip_grad_norm_(discriminator.parameters(), 1.0) optimizer_D.step() if distributed: if config['use_vgg']: loss_vgg = reduce_tensor(loss_vgg, config['world_size']) if rank == 0: if cnt % 10 == 0: run_visualization(output_dict, output_recon, target, input_a, input_b, save_path, tb_logger, cnt) print_dict = {"learning_rate": get_learning_rate(optimizer_G)} if config['use_vgg']: tb_logger.add_scalar('vgg.loss', loss_vgg, cnt) print_dict['Loss_VGG'] = loss_vgg.data if config['use_gan']: tb_logger.add_scalar('gan.loss', loss_G_GAN, cnt) tb_logger.add_scalar('d_real.loss', loss_D_real, cnt) tb_logger.add_scalar('d_fake.loss', loss_D_fake, cnt) print_dict['Loss_G_GAN'] = loss_G_GAN print_dict['Loss_real'] = loss_D_real.data print_dict['Loss_fake'] = loss_D_fake.data if config['use_l2']: tb_logger.add_scalar('l2.loss', loss_l2, cnt) print_dict['Loss_L2'] = loss_l2.data log_iter(ep, cnt % len(train_dataloader), len(train_dataloader), print_dict, log_handle=log_handle) if loss_G != loss_G: print("NaN!!") exit(-2) cnt = cnt + 1 # end of train iter loop if cnt % config['val_freq'] == 0 and config['val_freq'] > 0: val_loss = run_val( model, validationLoss, val_dataloader, os.path.join(save_path, 'val_%d_renders' % (ep))) if distributed: val_loss = reduce_tensor(val_loss, config['world_size']) if rank == 0: tb_logger.add_scalar('validation.loss', val_loss, cnt) log_iter(ep, cnt % len(train_dataloader), len(train_dataloader), {"Loss_VGG": val_loss}, header="Validation loss: ", log_handle=log_handle) if rank == 0: if (ep % config['save_freq'] == 0): fname = 'checkpoint_%d.ckpt' % (ep) fname = os.path.join(save_path, fname) print("Saving model...") save_weights(model, fname, distributed) optimizer_g_fname = os.path.join( save_path, 'latest_optimizer_g_state.ckpt') torch.save(optimizer_G.state_dict(), optimizer_g_fname) if config['use_gan']: fname = 'checkpoint_D_%d.ckpt' % (ep) fname = os.path.join(save_path, fname) save_weights(discriminator, fname, distributed) optimizer_d_fname = os.path.join( save_path, 'latest_optimizer_d_state.ckpt') torch.save(optimizer_D.state_dict(), optimizer_d_fname)
device = torch.device("cpu" if cfg.CPU_MODE else "cuda") num_class_list, cat_list = get_category_list(annotations, num_classes, cfg) para_dict = { "num_classes": num_classes, "num_class_list": num_class_list, "cfg": cfg, "device": device, } criterion = eval(cfg.LOSS.LOSS_TYPE)(para_dict=para_dict) epoch_number = cfg.TRAIN.MAX_EPOCH # ----- BEGIN MODEL BUILDER ----- model = get_model(cfg, num_classes, device, logger) combiner = Combiner(cfg, device) optimizer = get_optimizer(cfg, model) scheduler = get_scheduler(cfg, optimizer) # ----- END MODEL BUILDER ----- trainLoader = DataLoader(train_set, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=cfg.TRAIN.SHUFFLE, num_workers=cfg.TRAIN.NUM_WORKERS, pin_memory=cfg.PIN_MEMORY, drop_last=True) validLoader = DataLoader( valid_set, batch_size=cfg.TEST.BATCH_SIZE,
def main(args): # make the export folder structure # this is made here because the Logger uses the filename if args.do_save: # make a base save directory utils.make_dir(args.save_dir) # make a directory in the base save directory with for the specific # method. save_subdir = os.path.join(args.save_dir, args.dataset + "_" + args.sampling_method) utils.make_dir(save_subdir) filename = os.path.join( save_subdir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt") sys.stdout = utils.Logger(filename) # confusion argument can have multiple values confusions = [float(t) for t in args.confusions.split(" ")] mixtures = [float(t) for t in args.active_sampling_percentage.split(" ")] max_dataset_size = None if args.max_dataset_size == 0 else args.max_dataset_size starting_seed = args.seed # get the dataset from file based on the data directory and dataset name X, y = utils.get_mldata(args.data_dir, args.dataset) # object to store the results in all_results = {} # percentage of labels to randomize for c in confusions: # Mixture weights on active sampling." for m in mixtures: # the number of curves created during multiple trials for seed in range(starting_seed, starting_seed + args.trials): # get the sampler based on the name # returns a python object # also named: query strategy sampler = get_AL_sampler(args.sampling_method) # get the model score_model = utils.get_model(args.score_method, seed) # if (args.select_method == "None" or args.select_method == args.score_method): select_model = None else: select_model = utils.get_model(args.select_method, seed) # create the learning curve results, sampler_state = generate_one_curve( X, y, sampler, score_model, seed, args.warmstart_size, args.batch_size, select_model, confusion=c, active_p=m, max_points=max_dataset_size, standardize_data=args.standardize_data, norm_data=args.normalize_data, train_horizon=args.train_horizon) key = (args.dataset, args.sampling_method, args.score_method, args.select_method, m, args.warmstart_size, args.batch_size, c, args.standardize_data, args.normalize_data, seed) sampler_output = sampler_state.to_dict() results["sampler_output"] = sampler_output all_results[key] = results # Not sure why this is done in a qay like this. fields = [ "dataset", "sampler", "score_method", "select_method", "active percentage", "warmstart size", "batch size", "confusion", "standardize", "normalize", "seed" ] all_results["tuple_keys"] = fields # write the results to a file if args.do_save: # format the filename filename = "results_score_{}_select_{}_norm_{}_stand_{}".format( args.score_method, args.select_method, args.normalize_data, args.standardize_data) existing_files = gfile.Glob( os.path.join(save_subdir, "{}*.pkl".format(filename))) filepath = os.path.join( save_subdir, "{}_{}.pkl".format(filename, 1000 + len(existing_files))[1:]) # dump the dict to a pickle file pickle.dump(all_results, gfile.GFile(filepath, "w")) # flush stfout sys.stdout.flush_file()
"num_class_list": num_class_list, "cfg": cfg, "device": device, } criterion = eval(cfg.LOSS.LOSS_TYPE)(para_dict=para_dict) epoch_number = cfg.TRAIN.MAX_EPOCH # ----- BEGIN MODEL BUILDER ----- l1_cls_num = label_map[:, 0].max().item() + 1 l2_cls_num = label_map[:, 1].max().item() + 1 model_dir = os.path.join(cfg.OUTPUT_DIR, cfg.NAME, "models") code_dir = os.path.join(cfg.OUTPUT_DIR, cfg.NAME, "codes") model = get_model(cfg, [l1_cls_num, l2_cls_num], device, logger) if cfg.TRAIN_STAGE == 2: last_stage_weight_path = os.path.join(model_dir, 'best_model_stage1.pth') load_weight(model, last_stage_weight_path) model.module.freeze_backbone() model.module.freeze_classifer(0) elif cfg.TRAIN_STAGE == 1: last_stage_weight_path = os.path.join(args.pretrained_path) load_weight(model, last_stage_weight_path) model.module.freeze_backbone() model.module.freeze_classifer(1) # load_pretrained_weight(model, args.pretrained_path) combiner = Combiner(cfg, device) optimizer = get_optimizer(cfg, model) scheduler = get_scheduler(cfg, optimizer)
def main(argv): del argv if not gfile.Exists(FLAGS.save_dir): try: gfile.MkDir(FLAGS.save_dir) except: print(('WARNING: error creating save directory, ' 'directory most likely already created.')) save_dir = os.path.join( FLAGS.save_dir, FLAGS.dataset + "_" + FLAGS.sampling_method) do_save = FLAGS.do_save == "True" if do_save: if not gfile.Exists(save_dir): try: gfile.MkDir(save_dir) except: print(('WARNING: error creating save directory, ' 'directory most likely already created.')) # Set up logging filename = os.path.join( save_dir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt") sys.stdout = utils.Logger(filename) confusions = [float(t) for t in FLAGS.confusions.split(" ")] mixtures = [float(t) for t in FLAGS.active_sampling_percentage.split(" ")] all_results = {} max_dataset_size = None if FLAGS.max_dataset_size == "0" else int( FLAGS.max_dataset_size) normalize_data = FLAGS.normalize_data == "True" standardize_data = FLAGS.standardize_data == "True" X, y = utils.get_mldata(FLAGS.data_dir, FLAGS.dataset) starting_seed = FLAGS.seed for c in confusions: for m in mixtures: for seed in range(starting_seed, starting_seed + FLAGS.trials): sampler = get_AL_sampler(FLAGS.sampling_method) score_model = utils.get_model(FLAGS.score_method, seed) if (FLAGS.select_method == "None" or FLAGS.select_method == FLAGS.score_method): select_model = None else: select_model = utils.get_model(FLAGS.select_method, seed) results, sampler_state = generate_one_curve( X, y, sampler, score_model, seed, FLAGS.warmstart_size, FLAGS.batch_size, select_model, c, m, max_dataset_size, standardize_data, normalize_data, FLAGS.train_horizon) key = (FLAGS.dataset, FLAGS.sampling_method, FLAGS.score_method, FLAGS.select_method, m, FLAGS.warmstart_size, FLAGS.batch_size, c, standardize_data, normalize_data, seed) sampler_output = sampler_state.to_dict() results["sampler_output"] = sampler_output all_results[key] = results fields = [ "dataset", "sampler", "score_method", "select_method", "active percentage", "warmstart size", "batch size", "confusion", "standardize", "normalize", "seed" ] all_results["tuple_keys"] = fields if do_save: filename = ("results_score_" + FLAGS.score_method + "_select_" + FLAGS.select_method + "_norm_" + str(normalize_data) + "_stand_" + str(standardize_data)) existing_files = gfile.Glob(os.path.join(save_dir, filename + "*.pkl")) filename = os.path.join(save_dir, filename + "_" + str(1000+len(existing_files))[1:] + ".pkl") pickle.dump(all_results, gfile.GFile(filename, "w")) sys.stdout.flush_file()
def main(argv): del argv if not gfile.Exists(FLAGS.save_dir): try: gfile.MkDir(FLAGS.save_dir) except: print(('WARNING: error creating save directory, ')) save_dir = os.path.join(FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.sampling_method) if FLAGS.do_save == "True": if not gfile.Exists(save_dir): try: gfile.MkDir(save_dir) except: print(('WARNING: error creating save directory, ' 'directory most likely already created.')) # Set up logging filename = os.path.join( save_dir, "log-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) + ".txt") sys.stdout = utils.Logger(filename) X, y = utils.get_mldata(FLAGS.data_dir, FLAGS.dataset) #load dataset! starting_seed = FLAGS.seed all_results = {} for seed in range(starting_seed, starting_seed + FLAGS.trials): sampler = get_AL_sampler(FLAGS.sampling_method) #load sampler! score_model = utils.get_model(FLAGS.score_method, seed) #load score model! if (FLAGS.select_method == "None" or #load select model! FLAGS.select_method == FLAGS.score_method): select_model = None else: select_model = utils.get_model(FLAGS.select_method, seed) results, sampler_state = \ generate_one_curve(X=X, y=y, sampler=sampler, score_model=score_model, seed=seed, warmstart_size=FLAGS.warmstart_size, batch_size=FLAGS.batch_size, select_model=select_model, max_points=FLAGS.max_dataset_size) key = (FLAGS.dataset, FLAGS.sampling_method, FLAGS.score_method, FLAGS.select_method, FLAGS.warmstart_size, FLAGS.batch_size, seed) #sampler_output = sampler_state.to_dict() #results['sampler_output'] = sampler_output results['sampler_output'] = None all_results[key] = results fields = [ 'dataset', 'sampling_methods', 'score_method', 'select_method', 'warmstart size', 'batch size', 'seed' ] all_results['tuple_keys'] = fields if FLAGS.do_save == "True": filename = ("results_score_" + FLAGS.score_method + "_select_" + FLAGS.select_method) existing_files = gfile.Glob(os.path.join(save_dir, filename + "*.pkl")) filename = os.path.join( save_dir, filename + "_" + str(1000 + len(existing_files))[1:] + ".pkl") pickle.dump(all_results, gfile.GFile(filename, "w")) sys.stdout.flush_file()