def test_helper_threads(self): """ Test openmp threads helper method. """ rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') rnn.set_num_threads(4) self.assertEqual(torch.get_num_threads(), 4)
def evaluate(model, data_loader, device): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def main(): # Things need to be parsed: # device, batch_size, n_epoch, num_workers, n_neg_sample, parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument("--device", type=str, default="cuda:0") parser.add_argument("--seed", type=int, default=0) parser.add_argument( "--batch_size_train", type=int, default=1 ) # batch size will indeed affact this. Larger batch size will cause out of memory issue parser.add_argument("--batch_size_eval", type=int, default=4) parser.add_argument("--n_epoch", type=int, default=4) parser.add_argument("--n_worker", type=int, default=3) parser.add_argument("--n_neg_sample", type=int, default=4) parser.add_argument("--num_dev", type=int, default=2000) parser.add_argument( "--max_seq_len", type=int, default=256 ) # TODO: think about a way to pass this value to the collate function. parser.add_argument("--dataset", type=str, default="openbook") # parse the input arguments args = parser.parse_args() # set the random seeds torch.manual_seed(args.seed) # set pytorch seed random.seed(args.seed) # set python seed. # #This python random library is used in two places: one is constructing the raw dataset, the other is when constructing train data. np.random.seed(args.seed) # set numpy seed torch.set_num_threads( 1) # this has nothing to do with dataloader num worker print("=" * 20) print("args:", args) print("num thread:", torch.get_num_threads()) print("=" * 20) train_and_eval_model(args) return 0
def evaluate(model, data_loader, device, device_ids, distributed, log_freq=1000, title=None, header='Test:'): model.to(device) if distributed: model = DistributedDataParallel(model, device_ids=device_ids) elif device.type.startswith('cuda'): model = DataParallel(model, device_ids=device_ids) if title is not None: logger.info(title) num_threads = torch.get_num_threads() torch.set_num_threads(1) model.eval() metric_logger = MetricLogger(delimiter=' ') for image, target in metric_logger.log_every(data_loader, log_freq, header): image = image.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(image) acc1, acc5 = compute_accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = image.shape[0] metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) # gather the stats from all processes metric_logger.synchronize_between_processes() top1_accuracy = metric_logger.acc1.global_avg top5_accuracy = metric_logger.acc5.global_avg logger.info(' * Acc@1 {:.4f}\tAcc@5 {:.4f}\n'.format( top1_accuracy, top5_accuracy)) torch.set_num_threads(num_threads) return metric_logger.acc1.global_avg
def parse_args(): parser = argparse.ArgumentParser( description="Eval the achromatic pixel detector") a = parser.add_argument a("model_file", help="Model to evaluate") a("test_list", help="File listing test images") a("--image-size", type=int, default=256, help="Size of input images") a("--remove-gamma", action="store_true", help="Remove srgb gamma from training images.") a("--apply-gamma", action="store_true", help="Apply srgb gamma to output data.") a("--mask-clipped", action="store_true", help="Exclude clipped pixels from the estimate") a("--mask-black", action="store_true", help="Exclude black pixels from the estimate") a("--batch-size", type=int, default=16, help="Size of the minibatch") a("--num-workers", type=int, default=torch.get_num_threads(), help="Number of parallel threads") a("--device", default="cuda", help="Processing device") a("--plot-estimates", action="store_true", help="show the estimates on a plot") a("--filter-outliers", action="store_true", help= "exclude pixels outside the range of allowed illuminants (deprecated)") a("--filter", help="Classifier excluding unlikely rgb values.") a("--cv", type=int, help="Number of cross validation folds") a("--tex", action="store_true", help="Latex table format") a("--gw", action="store_true", help="Apply gray-world instead") a("--gt", action="store_true", help="Use the ground truth instead of the actual estimate") a("--output-dir", help="Directory where processed images are placed") return parser.parse_args()
def dataloader(cfg, split, bs, shuffle=False): """Create a data loader for the specified dataset. """ if cfg.dataset == "Flickr30K": from ml.datasets.flickr import Flickr30kEntities ds = Flickr30kEntities( split, path=cfg.data / "Flickr30K", tokenization=cfg.tok, max_tokens=cfg.max_tokens, max_entities=cfg.max_entities, max_rois=cfg.max_rois, ) else: raise ValueError(f"Unsupported dataset: {cfg.dataset}") num_workers = cfg.num_workers or max(th.get_num_threads() // 2, 2) return DataLoader(ds, batch_size=bs, shuffle=shuffle, num_workers=num_workers)
def evaluate(model, data_loader, device='cuda'): n_threads = torch.get_num_threads() torch.set_num_threads(1) # Does it nessesary? Who knows... cpu_device = torch.device("cpu") inference_res = [] model.eval() for images, targets in data_loader: images = list(img.to(device) for img in images) if torch.cuda.is_available(): torch.cuda.synchronize() outputs = model(images) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] res = targets, outputs inference_res.append(res) torch.set_num_threads(n_threads) return inference_res
def set_hardware(args: argparse.Namespace) -> Optional[torch.device]: # set torch number of threads if args.torch_num_threads is None: LOGGER.info("Using default number of CPU threads: %s" % torch.get_num_threads()) else: torch.set_num_threads(args.torch_num_threads) LOGGER.info("Using specified number of CPU threads: %s" % args.torch_num_threads) # specify gpu device if relevant if args.gpu: gpu_device: Optional[torch.device] gpu_device = torch.device(args.gpu_device) LOGGER.info("Using GPU device: %s" % args.gpu_device) else: gpu_device = None LOGGER.info("Using CPU device") # return device return gpu_device
def setup_dist_backend(args, set_threads=False, thread_choice=None): """Sets up backend/environment for distributed training. Params: args: Command line args for main.py. thread_choice: How to choose number of OMP threads used. """ def setup_print(s, **kwargs): if args.setup_verbose is True: print(s, **kwargs) # assumes all data will have (roughly) the same dimensions cudnn.benchmark = True # choose environment variable OMP_NUM_THREADS # see: https://github.com/pytorch/pytorch/pull/22501 if set_threads is True: if thread_choice is None: os.environ['OMP_NUM_THREADS'] = str(1) elif thread_choice == 'torch_threads': os.environ['OMP_NUM_THREADS'] = str(torch.get_num_threads()) elif thread_choice == 'multiproc': n_threads = (int)(multiprocessing.cpu_count() / os.environ['WORLD_SIZE']) os.environ['OMP_NUM_THREADS'] = str(n_threads) if args.distributed is True: if args.local_rank == 0: setup_print('Setting up distributed process group...') torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=env_world_size()) # make sure there's no mismatch between world sizes assert (env_world_size() == torch.distributed.get_world_size()) setup_print( f"\tSuccess on process {args.local_rank}/{torch.distributed.get_world_size()}" )
def __init__(self, env, policy_comm): """ env is an environment object that conforms to a gym interface policy_comm is an mpi4py comm object to communicate with the other processes that are working to train this policy. See docs for details. """ self.env = env self.comm = policy_comm self.rank = self.comm.Get_rank() self.world_rank = MPI.COMM_WORLD.Get_rank() self.root = 0 self.is_root = self.rank == self.root print(f'{self.world_rank} {self.rank} {self.comm.Get_size()}') # Avoid slowdowns caused by each separate process's PyTorch using more than its fair share of CPU resources. torch.set_num_threads( max(int(torch.get_num_threads() / self.comm.Get_size()), 1)) self.model: torch.nn.Module = Model(env.observation_space, env.action_space) self.optimizer = torch.optim.Adam(self.model.parameters())
def evaluate(model, data_loader, device, interval=1000, split_name='Test', title=None): if title is not None: print(title) num_threads = torch.get_num_threads() torch.set_num_threads(1) model.eval() metric_logger = MetricLogger(delimiter=' ') header = '{}:'.format(split_name) with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, interval, header): image = image.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(image) acc1, acc5 = main_util.compute_accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = image.shape[0] metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) # gather the stats from all processes metric_logger.synchronize_between_processes() top1_accuracy = metric_logger.acc1.global_avg top5_accuracy = metric_logger.acc5.global_avg print(' * Acc@1 {:.4f}\tAcc@5 {:.4f}\n'.format(top1_accuracy, top5_accuracy)) torch.set_num_threads(num_threads) return metric_logger.acc1.global_avg
def _train(self: "Solver") -> float: """Perform Training on One Epoch Returns: float -- train loss (averaged over batches) """ #torch.set_num_threads(8) threads = torch.get_num_threads() #torch.set_num_threads(threads) print("Threads: ", threads) self.model.train() tr_loss = 0.0 pbar = tqdm(self.train_loader, desc="Train Batch", position=0, leave=True) for b, batch in enumerate(pbar): mixture, source = batch print(len(batch)) if self.cuda: mixture = mixture.cuda() source = source.cuda() estimate = self.model(mixture) loss = self.criterion(estimate, source) self.optim.zero_grad() loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.train_config.max_norm) self.optim.step() tr_loss += loss.item() pbar.set_postfix(tr_loss=tr_loss / (b + 1)) tr_loss /= len(self.train_loader) return tr_loss
def evaluate_cls(model, data_loader, device, use_amp=False): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' total_loss = 0.0 preds = [] trues = [] for images, targets in metric_logger.log_every(data_loader, 100, header): # images = torch.stack(images,0).to(device) # targets = [ # {k: v.to(device) for k, v in targ.items() if k not in ["path"]} for targ in targets] # targets = torch.stack([target["labels"] for target in targets], 0).to(device) images = images.to(device) targets = targets.to(device) # torch.cuda.synchronize() with torch.cuda.amp.autocast(use_amp): outputs = model(images, targets, False, True) total_loss += outputs["valid_loss"] preds.extend(outputs["preds"]) trues.extend(targets) num_datas = len(data_loader.dataset) valid_loss = total_loss / num_datas valid_acc = ( torch.eq(torch.tensor(preds), torch.tensor(trues)).sum().float() / num_datas).item() # torch.set_num_threads(n_threads) print("\nvalid_loss:%.5f valid_acc:%.5f\n" % (valid_loss, valid_acc)) return valid_loss, valid_acc
def main(): model = DCTTS(args).to(DEVICE) print('Model {} is working...'.format(args.model_name)) print('{} threads are used...'.format(torch.get_num_threads())) ckpt_dir = os.path.join(args.logdir, args.model_name) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # scheduler = MultiStepLR(optimizer, milestones=[50000, 150000, 300000], gamma=0.5) # scheduler = LambdaLR(optimizer, lr_policy) if not os.path.exists(ckpt_dir): os.makedirs(os.path.join(ckpt_dir, 'A', 'train')) if args.pretrained_path is not None: print('Train with pretrained model {}'.format(args.pretrained_path)) state = torch.load(args.pretrained_path) model.custom_load_state_dict(state['model']) else: print('Already exists. Retrain the model.') ckpt = sorted(glob.glob(os.path.join(ckpt_dir, '*k.pth.tar')))[-1] state = torch.load(ckpt) model.load_state_dict(state['model']) args.global_step = state['global_step'] optimizer.load_state_dict(state['optimizer']) # scheduler.load_state_dict(state['scheduler']) # model = torch.nn.DataParallel(model, device_ids=list(range(args.no_gpu))).to(DEVICE) dataset = SpeechDataset(args.data_path, args.meta_train, mem_mode=args.mem_mode) validset = SpeechDataset(args.data_path, args.meta_eval, mem_mode=args.mem_mode) data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True, collate_fn=t2m_ga_collate_fn, drop_last=True, pin_memory=True) valid_loader = DataLoader(dataset=validset, batch_size=args.test_batch, shuffle=False, collate_fn=t2m_ga_collate_fn, pin_memory=True) writer = SummaryWriter(ckpt_dir) train(model, data_loader, valid_loader, optimizer, scheduler, batch_size=args.batch_size, ckpt_dir=ckpt_dir, writer=writer) return None
def train_or_eval_model(i, args, raw_in_data, raw_out_data): # reduce number of threads as we're running FUTURE_CHUNKS parallel processes num_threads = int(torch.get_num_threads() / Model.FUTURE_CHUNKS) torch.set_num_threads(num_threads) # create or load a model model = Model() if args.load_model: model_path = path.join(args.load_model, 'py-{}.pt'.format(i)) model.load(model_path) sys.stderr.write('[{}] Loaded model from {}\n'.format(i, model_path)) else: sys.stderr.write('[{}] Created a new model\n'.format(i)) # normalize input data if args.inference: input_data = model.normalize_input(raw_in_data, update_obs=False) else: input_data = model.normalize_input(raw_in_data, update_obs=True) # discretize output data output_data = model.discretize_output(raw_out_data) # print some stats print_stats(i, output_data) if args.inference: model.set_model_eval() sys.stderr.write('[{}] test set size: {}\n'.format(i, len(input_data))) sys.stderr.write('[{}] loss: {:.3f}, accuracy: {:.2f}%\n' .format(i, model.compute_loss(input_data, output_data), 100 * model.compute_accuracy(input_data, output_data))) else: # training model.set_model_train() # train a neural network with data train(i, args, model, input_data, output_data)
def time_evaluate_cpu_all(models, number=1, stft_only=False): orig_threads = torch.get_num_threads() torch.set_num_threads(1) with torch.no_grad(): inp = torch.rand(8192) times = {} for name, model in models.items(): if model is None: continue print(f'Running {name}') model.eval() if not stft_only: times[name] = timeit.timeit( '_ = model(inp)', number=number, globals=locals()) / number else: times[name] = timeit.timeit( '_ = model.decoder(model.encoder(inp))', number=number, globals=locals()) / number torch.set_num_threads(orig_threads) return times
def evaluate(model, loader, device): n_threads = torch.get_num_threads() torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = detection_util.MetricLogger(delimiter=" ") header = 'Test:' tp_total, fp_total, fn_total = torch.zeros([26, 10]), torch.zeros( [26, 10]), torch.zeros([26, 10]) for images, targets in metric_logger.log_every(loader, 100, header): images = list(img.to(device) for img in images) torch.cuda.synchronize() model_time = time.time() outputs = model(images) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time evaluator_time = time.time() tp, fp, fn = detection_metrics.getnum_tp_fp_fn(targets, outputs) tp_total += tp fp_total += fp fn_total += fn evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) print("AP:", detection_metrics.get_mAP(tp_total, fp_total, fn_total)) torch.set_num_threads(n_threads) return
def __init__(self, act_limit, obs_dim, act_dim, hidden_sizes, pi_lr=1e-3, q_lr=1e-3, gamma=None, alpha=None, polyak=None, load=False, noise_scale=0.1, target_noise=0.2, noise_clip=0.5, policy_delay=2, exp_name='Exp1', replay_buffer=None, path='saved_models/'): self.act_limit = act_limit self.gamma = gamma self.alpha = alpha self.polyak = polyak self.load = load self.exp_name = exp_name self.path = path self.pi_lr = pi_lr self.q_lr = q_lr self.noise_scale = noise_scale self.target_noise = target_noise self.noise_clip = noise_clip self.policy_delay = policy_delay self.replay_buffer = replay_buffer self.create_networks(obs_dim, act_dim, hidden_sizes) self.update_timer = 0 torch.set_num_threads(torch.get_num_threads())
def main(): G = SSRN().to(DEVICE) D = ConditionalDiscriminatorBlock().to(DEVICE) print('{} threads are used...'.format(torch.get_num_threads())) ckpt_dir = os.path.join(args.logdir, type(G).__name__) G_optim = torch.optim.Adam(G.parameters(), lr=args.lr) D_optim = torch.optim.Adam(D.parameters(), lr=args.lr) # scheduler = MultiStepLR(optimizer, milestones=[100000, 200000], gamma=0.5) if not os.path.exists(ckpt_dir): os.makedirs(os.path.join(ckpt_dir, 'A', 'train')) else: print('Already exists. Retrain the model.') ckpt = sorted(glob.glob(os.path.join(ckpt_dir, '{}-*k.pth'.format(type(G).__name__)))) state = torch.load(ckpt) args.global_step = state['global_step'] G.load_state_dict(state['G']) G_optim.load_state_dict(state['G_optim']) ckpt = sorted(glob.glob(os.path.join(ckpt_dir, '{}-*k.pth'.format(type(D).__name__)))) state = torch.load(ckpt) D.load_state_dict(state['D']) D_optim.load_state_dict(state['D_optim']) dataset = SpeechDataset(args.data_path, args.meta_train, type(G).__name__, mem_mode=args.mem_mode) validset = SpeechDataset(args.data_path, args.meta_eval, type(G).__name__, mem_mode=args.mem_mode) data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True, pin_memory=True) valid_loader = DataLoader(dataset=validset, batch_size=args.test_batch, shuffle=False, collate_fn=collate_fn) writer = SummaryWriter(ckpt_dir) train(G, D, data_loader, valid_loader, G_optim, D_optim, batch_size=args.batch_size, ckpt_dir=ckpt_dir, writer=writer) return None
def evaluate(model, data_loader, device, data_set=None, mAP_list=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if data_set is None: data_set = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(data_set, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = torch.stack(images, dim=0) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] images = images.to(device) # targets = {k: v.to(device) for k, v in targets.items()} if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() # list((bboxes_out, labels_out, scores_out), ...) results = model(images, targets) outputs = [] for index, (bboxes_out, labels_out, scores_out) in enumerate(results): # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax) height_width = targets[index]["height_width"] # height_width = [300, 300] bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1] bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0] info = {"boxes": bboxes_out.to(cpu_device), "labels": labels_out.to(cpu_device), "scores": scores_out.to(cpu_device)} outputs.append(info) # outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = dict() for index in range(len(outputs)): info = {targets[index]["image_id"].item(): outputs[index]} res.update(info) # res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) print_txt = coco_evaluator.coco_eval[iou_types[0]].stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP)
levels_and_models = [("phylum", ConvNet(3)), ("class", ConvNet(5)), ("order", ConvNet(10))] lr_space = np.geomspace(1e-6, 1e3, num=10) weight_decay = np.geomspace(1e-6, 1e3, num=10) # populate paramter dicts param_dicts = list() for model_id, (level, m) in enumerate(levels_and_models): for l in lr_space: for w in weight_decay: param_dict = { "level": level, "model": copy.deepcopy(m), "eval_on": "val", "cnn_config": { "model": model_id, "lr": l, "weight_decay": w } } param_dicts.append(param_dict) def cnn_train_test_unpack(args): return cnn_train_eval(**args) with Pool(int(cpu_count() / torch.get_num_threads()) - 1) as p: experiment_logs = p.map(cnn_train_test_unpack, param_dicts) np.save("grid_search_best_cnn_logs.npy", np.array(experiment_logs))
def main_function(experiment_directory, continue_from, batch_split): logging.debug("running " + experiment_directory) specs = ws.load_experiment_specifications(experiment_directory) logging.info("Experiment description: \n" + specs["Description"]) data_source = specs["DataSource"] train_split_file = specs["TrainSplit"] arch = __import__("networks." + specs["NetworkArch"], fromlist=["Decoder"]) logging.debug(specs["NetworkSpecs"]) latent_size = specs["CodeLength"] checkpoints = list( range( specs["SnapshotFrequency"], specs["NumEpochs"] + 1, specs["SnapshotFrequency"], )) for checkpoint in specs["AdditionalSnapshots"]: checkpoints.append(checkpoint) checkpoints.sort() lr_schedules = get_learning_rate_schedules(specs) grad_clip = get_spec_with_default(specs, "GradientClipNorm", None) if grad_clip is not None: logging.debug("clipping gradients to max norm {}".format(grad_clip)) def save_latest(epoch): save_model(experiment_directory, "latest.pth", decoder, epoch) save_optimizer(experiment_directory, "latest.pth", optimizer_all, epoch) save_latent_vectors(experiment_directory, "latest.pth", lat_vecs, epoch) def save_checkpoints(epoch): save_model(experiment_directory, str(epoch) + ".pth", decoder, epoch) save_optimizer(experiment_directory, str(epoch) + ".pth", optimizer_all, epoch) save_latent_vectors(experiment_directory, str(epoch) + ".pth", lat_vecs, epoch) def signal_handler(sig, frame): logging.info("Stopping early...") sys.exit(0) def adjust_learning_rate(lr_schedules, optimizer, epoch): for i, param_group in enumerate(optimizer.param_groups): param_group["lr"] = lr_schedules[i].get_learning_rate(epoch) def empirical_stat(latent_vecs, indices): lat_mat = torch.zeros(0).cuda() for ind in indices: lat_mat = torch.cat([lat_mat, latent_vecs[ind]], 0) mean = torch.mean(lat_mat, 0) var = torch.var(lat_mat, 0) return mean, var signal.signal(signal.SIGINT, signal_handler) num_samp_per_scene = specs["SamplesPerScene"] scene_per_batch = specs["ScenesPerBatch"] clamp_dist = specs["ClampingDistance"] minT = -clamp_dist maxT = clamp_dist enforce_minmax = True do_code_regularization = get_spec_with_default(specs, "CodeRegularization", True) code_reg_lambda = get_spec_with_default(specs, "CodeRegularizationLambda", 1e-4) code_bound = get_spec_with_default(specs, "CodeBound", None) decoder = arch.Decoder(latent_size, **specs["NetworkSpecs"]).cuda() logging.info("training with {} GPU(s)".format(torch.cuda.device_count())) # if torch.cuda.device_count() > 1: decoder = torch.nn.DataParallel(decoder) num_epochs = specs["NumEpochs"] log_frequency = get_spec_with_default(specs, "LogFrequency", 10) with open(train_split_file, "r") as f: train_split = json.load(f) sdf_dataset = deep_sdf.data.SDFSamples(data_source, train_split, num_samp_per_scene, load_ram=False) print('[HERE: In train_deep_sdf.main_function] sdf_dataset len =', len(sdf_dataset)) num_data_loader_threads = get_spec_with_default(specs, "DataLoaderThreads", 1) logging.debug( "loading data with {} threads".format(num_data_loader_threads)) sdf_loader = data_utils.DataLoader( sdf_dataset, batch_size=scene_per_batch, shuffle=True, num_workers=num_data_loader_threads, drop_last=True, ) print('[HERE: In train_deep_sdf.main_function] sdf_loader len =', len(sdf_loader)) logging.debug("torch num_threads: {}".format(torch.get_num_threads())) num_scenes = len(sdf_dataset) logging.info("There are {} scenes".format(num_scenes)) logging.debug(decoder) lat_vecs = torch.nn.Embedding(num_scenes, latent_size, max_norm=code_bound) torch.nn.init.normal_( lat_vecs.weight.data, 0.0, get_spec_with_default(specs, "CodeInitStdDev", 1.0) / math.sqrt(latent_size), ) logging.debug("initialized with mean magnitude {}".format( get_mean_latent_vector_magnitude(lat_vecs))) loss_l1 = torch.nn.L1Loss(reduction="sum") optimizer_all = torch.optim.Adam([ { "params": decoder.parameters(), "lr": lr_schedules[0].get_learning_rate(0), }, { "params": lat_vecs.parameters(), "lr": lr_schedules[1].get_learning_rate(0), }, ]) loss_log = [] lr_log = [] lat_mag_log = [] timing_log = [] param_mag_log = {} start_epoch = 1 if continue_from is not None: logging.info('continuing from "{}"'.format(continue_from)) lat_epoch = load_latent_vectors(experiment_directory, continue_from + ".pth", lat_vecs) model_epoch = ws.load_model_parameters(experiment_directory, continue_from, decoder) optimizer_epoch = load_optimizer(experiment_directory, continue_from + ".pth", optimizer_all) loss_log, lr_log, timing_log, lat_mag_log, param_mag_log, log_epoch = load_logs( experiment_directory) if not log_epoch == model_epoch: loss_log, lr_log, timing_log, lat_mag_log, param_mag_log = clip_logs( loss_log, lr_log, timing_log, lat_mag_log, param_mag_log, model_epoch) if not (model_epoch == optimizer_epoch and model_epoch == lat_epoch): raise RuntimeError("epoch mismatch: {} vs {} vs {} vs {}".format( model_epoch, optimizer_epoch, lat_epoch, log_epoch)) start_epoch = model_epoch + 1 logging.debug("loaded") logging.info("starting from epoch {}".format(start_epoch)) logging.info("Number of decoder parameters: {}".format( sum(p.data.nelement() for p in decoder.parameters()))) logging.info( "Number of shape code parameters: {} (# codes {}, code dim {})".format( lat_vecs.num_embeddings * lat_vecs.embedding_dim, lat_vecs.num_embeddings, lat_vecs.embedding_dim, )) for epoch in range(start_epoch, num_epochs + 1): start = time.time() logging.info("epoch {}...".format(epoch)) decoder.train() adjust_learning_rate(lr_schedules, optimizer_all, epoch) for sdf_data, indices in sdf_loader: #print('[HERE: In train_deep_sdf.LOOPsdf_loader] indices =', indices) # Process the input data sdf_data = sdf_data.reshape(-1, 4) num_sdf_samples = sdf_data.shape[0] sdf_data.requires_grad = False xyz = sdf_data[:, 0:3] sdf_gt = sdf_data[:, 3].unsqueeze(1) if enforce_minmax: sdf_gt = torch.clamp(sdf_gt, minT, maxT) xyz = torch.chunk(xyz, batch_split) indices = torch.chunk( indices.unsqueeze(-1).repeat(1, num_samp_per_scene).view(-1), batch_split, ) sdf_gt = torch.chunk(sdf_gt, batch_split) batch_loss = 0.0 optimizer_all.zero_grad() for i in range(batch_split): #print('[HERE: In train_deep_sdf.LOOPbatch_split] i/batch_split = %d/%d'%(i, batch_split)) batch_vecs = lat_vecs(indices[i]) input = torch.cat([batch_vecs, xyz[i]], dim=1) # NN optimization pred_sdf = decoder(input) if enforce_minmax: pred_sdf = torch.clamp(pred_sdf, minT, maxT) chunk_loss = loss_l1(pred_sdf, sdf_gt[i].cuda()) / num_sdf_samples if do_code_regularization: l2_size_loss = torch.sum(torch.norm(batch_vecs, dim=1)) reg_loss = (code_reg_lambda * min(1, epoch / 100) * l2_size_loss) / num_sdf_samples chunk_loss = chunk_loss + reg_loss.cuda() chunk_loss.backward() batch_loss += chunk_loss.item() logging.debug("loss = {}".format(batch_loss)) logging.info("loss = {}".format(batch_loss)) loss_log.append(batch_loss) if grad_clip is not None: torch.nn.utils.clip_grad_norm_(decoder.parameters(), grad_clip) optimizer_all.step() end = time.time() seconds_elapsed = end - start timing_log.append(seconds_elapsed) lr_log.append( [schedule.get_learning_rate(epoch) for schedule in lr_schedules]) lat_mag_log.append(get_mean_latent_vector_magnitude(lat_vecs)) append_parameter_magnitudes(param_mag_log, decoder) if epoch in checkpoints: save_checkpoints(epoch) if epoch % log_frequency == 0: save_latest(epoch) save_logs( experiment_directory, loss_log, lr_log, timing_log, lat_mag_log, param_mag_log, epoch, )
from joblib import Parallel, delayed import util import torch import torch as T import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from collections import OrderedDict from config_reader import config_reader from scipy.ndimage.filters import gaussian_filter #parser = argparse.ArgumentParser() #parser.add_argument('--t7_file', required=True) #parser.add_argument('--pth_file', required=True) #args = parser.parse_args() torch.set_num_threads(torch.get_num_threads()) weight_name = './model/pose_model.pth' blocks = {} # find connection in the specified sequence, center 29 is in the position 15 limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \ [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \ [1,16], [16,18], [3,17], [6,18]] # the middle joints heatmap correpondence mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \ [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \ [55,56], [37,38], [45,46]] # visualize
def evaluate(model, data_loader, device, metric_logger, print_freq, file_save=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger.renew(epoch_size=len(data_loader), delimiter=" ", train=False) header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) all_results = [] for image, targets in metric_logger.log_every(data_loader, print_freq, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # TODO: add also mask to results if file_save is not None: for image_id in res.keys(): for b in range(len(res[image_id]['labels'])): # boxes xyxy -> xywh current_box = res[image_id]['boxes'][b].numpy().tolist() current_box[2] -= current_box[0] current_box[3] -= current_box[1] all_results.append({ "image_id": int(image_id), "category_id": int(res[image_id]['labels'][b].numpy()), "bbox": current_box, "score": float(res[image_id]['scores'][b].numpy()) }) # gather the stats from all processes metric_logger.synchronize_between_processes() coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate(make_print=True) coco_evaluator.summarize(make_print=True) for k in coco_evaluator.coco_eval.keys(): acc_name = k + "_" + "mAP" # here I add only the main figure of merit, could be extended acc_val = coco_evaluator.coco_eval[k].stats[0] metric_logger.add_meter( acc_name, utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.meters[acc_name].update(acc_val) metric_logger.end_epoch() metric_logger.print_out("Averaged stats: {}".format(str(metric_logger))) # save results if file_save is not None: with open(file_save, 'w') as outfile: json.dump(all_results, outfile) torch.set_num_threads(n_threads) return coco_evaluator
def evaluate(model, data_loader, device, epoch_num=None, check_num=200): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) idx = 0 for images, targets in metric_logger.log_every(data_loader, 100, header): images = list(img.to(device) for img in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs_set = model(images) # 输出:对应a validate batch里面的每一个输出组成的list outputs_list = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs_set] # 对于minibatch里面的每个output # outputs_list包含一个个 t 是 {'boxes','labels','scores','masks'},每个的值都是一个tensor model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs_list) } # 构建一个dict,每个键为target["image_id"].item() 即imageid # 值为对应数据在模型预测的时候的输出t, 是 {'boxes','labels','scores','masks'}字典, # 其内每个的值都是一个tensor,长度=预测目标数 idx += 1 if idx - idx // check_num * check_num == 0: # 每100次记录一次 if epoch_num is not None: coco_a_result_check(images, targets, res, 'E' + str(epoch_num) + '_' + str(idx)) else: coco_a_result_check(images, targets, res) ''' for key in res: print(len(res[key]['boxes'])) # 一开始mask rcnn网络输出是100个框(detr 200),后续学好了之后框的数量会大大下降。 ''' evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def main(args): torch.manual_seed(123) local_rank = int(os.environ[args.env_rank]) world_size = int(os.environ[args.env_world_size]) device = torch.device('cuda:%d' % (local_rank) if torch.cuda.is_available() else 'cpu') if local_rank == 0: print('A number of cuda devices: %d' % (torch.cuda.device_count())) print('A number of cpu threads: %d' % (torch.get_num_threads())) transform = torchvision.transforms.Compose([ torchvision.transforms.Resize((224, 224)), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if world_size > 1: print('rank: {}/{}'.format(local_rank + 1, world_size)) torch.distributed.init_process_group(backend='nccl', init_method='env://', rank=local_rank, world_size=world_size) # Data loading code train_dataset = ImagesDataset(data_dir=args.data_dir, transform=transform) train_sampler = None if world_size > 1: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True, drop_last=False, sampler=train_sampler) net = torchvision.models.resnet50() net = net.to(device) if world_size > 1: net = torch.nn.parallel.DistributedDataParallel(net) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) net.train() for epoch in range(args.epochs): epoch_start = timeit.default_timer() if world_size > 1: train_sampler.set_epoch(epoch) train_loss = 0 for index, (images, labels) in enumerate(train_loader, 1): # forward pass images = images.to(device) labels = labels.to(device) outputs = net(images) loss = criterion(outputs, labels) # backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() if local_rank == 0: print( '\repoch %3d batch %3d/%3d train loss %6.4f' % (epoch + 1, index, len(train_loader), train_loss / index), end='') if local_rank == 0: print('\repoch %3d batch %3d/%3d train loss %6.4f' % (epoch + 1, index, len(train_loader), train_loss / len(train_loader)), end='') print(' %5.3fsec' % (timeit.default_timer() - epoch_start))
def _setup(self, config): self.config = config print('NeuroCard config:') pprint.pprint(config) os.chdir(config['cwd']) for k, v in config.items(): setattr(self, k, v) if config['__gpu'] == 0: torch.set_num_threads(config['__cpu']) # W&B. # Do wandb.init() after the os.chdir() above makes sure that the Git # diff file (diff.patch) is w.r.t. the directory where this file is in, # rather than w.r.t. Ray's package dir. wandb_project = config['__run'] wandb.init(name=os.path.basename( self.logdir if self.logdir[-1] != '/' else self.logdir[:-1]), sync_tensorboard=True, config=config, project=wandb_project) self.epoch = 0 if isinstance(self.join_tables, int): # Hack to support training single-model tables. sorted_table_names = sorted( list(datasets.TPC_DS.GetTDSLightJoinKeys().keys())) self.join_tables = [sorted_table_names[self.join_tables]] # Try to make all the runs the same, except for input orderings. torch.manual_seed(0) np.random.seed(0) # Common attributes. self.loader = None self.join_spec = None join_iter_dataset = None table_primary_index = None # New datasets should be loaded here. assert self.dataset in ['tpcds'] if self.dataset == 'tpcds': print('Training on Join({})'.format(self.join_tables)) loaded_tables = [] for t in self.join_tables: print('Loading', t) table = datasets.LoadTds(t, use_cols=self.use_cols) table.data.info() loaded_tables.append(table) if len(self.join_tables) > 1: join_spec, join_iter_dataset, loader, table = self.MakeSamplerDatasetLoader( loaded_tables) self.join_spec = join_spec self.train_data = join_iter_dataset self.loader = loader table_primary_index = [t.name for t in loaded_tables].index('title') table.cardinality = datasets.TPC_DS.GetFullOuterCardinalityOrFail( self.join_tables) self.train_data.cardinality = table.cardinality print('rows in full join', table.cardinality, 'cols in full join', len(table.columns), 'cols:', table) else: # Train on a single table. table = loaded_tables[0] if self.dataset != 'tpcds' or len(self.join_tables) == 1: table.data.info() self.train_data = self.MakeTableDataset(table) self.table = table # Provide true cardinalities in a file or implement an oracle CardEst. self.oracle = None self.table_bits = 0 # A fixed ordering? self.fixed_ordering = self.MakeOrdering(table) model = self.MakeModel(self.table, self.train_data, table_primary_index=table_primary_index) # NOTE: ReportModel()'s returned value is the true model size in # megabytes containing all all *trainable* parameters. As impl # convenience, the saved ckpts on disk have slightly bigger footprint # due to saving non-trainable constants (the masks in each layer) as # well. They can be deterministically reconstructed based on RNG seeds # and so should not be counted as model size. self.mb = train_utils.ReportModel(model) if not isinstance(model, transformer.Transformer): print('applying train_utils.weight_init()') model.apply(train_utils.weight_init) self.model = model if self.use_data_parallel: self.model = DataParallelPassthrough(self.model) wandb.watch(model, log='all') if self.use_transformer: opt = torch.optim.Adam( list(model.parameters()), 2e-4, # betas=(0.9, 0.98), # B in Lingvo; in Trfmr paper. betas=(0.9, 0.997), # A in Lingvo. eps=1e-9, ) else: if self.optimizer == 'adam': opt = torch.optim.Adam(list(model.parameters()), 2e-4) else: print('Using Adagrad') opt = torch.optim.Adagrad(list(model.parameters()), 2e-4) print('Optimizer:', opt) self.opt = opt total_steps = self.epochs * self.max_steps if self.lr_scheduler == 'CosineAnnealingLR': # Starts decaying to 0 immediately. self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( opt, total_steps) elif self.lr_scheduler == 'OneCycleLR': # Warms up to max_lr, then decays to ~0. self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( opt, max_lr=2e-3, total_steps=total_steps) elif self.lr_scheduler is not None and self.lr_scheduler.startswith( 'OneCycleLR-'): warmup_percentage = float(self.lr_scheduler.split('-')[-1]) # Warms up to max_lr, then decays to ~0. self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( opt, max_lr=2e-3, total_steps=total_steps, pct_start=warmup_percentage) elif self.lr_scheduler is not None and self.lr_scheduler.startswith( 'wd_'): # Warmups and decays. splits = self.lr_scheduler.split('_') assert len(splits) == 3, splits lr, warmup_fraction = float(splits[1]), float(splits[2]) self.custom_lr_lambda = train_utils.get_cosine_learning_rate_fn( total_steps, learning_rate=lr, min_learning_rate_mult=1e-5, constant_fraction=0., warmup_fraction=warmup_fraction) else: assert self.lr_scheduler is None, self.lr_scheduler self.tbx_logger = tune_logger.TBXLogger(self.config, self.logdir) if self.checkpoint_to_load: self.LoadCheckpoint() self.loaded_queries = None self.oracle_cards = None if self.dataset == 'tpcds' and len(self.join_tables) > 1: queries_job_format = utils.JobToQuery(self.queries_csv) self.loaded_queries, self.oracle_cards = utils.UnpackQueries( self.table, queries_job_format) if config['__gpu'] == 0: print('CUDA not available, using # cpu cores for intra-op:', torch.get_num_threads(), '; inter-op:', torch.get_num_interop_threads())
from bayesmark import np_util from bayesmark.abstract_optimizer import AbstractOptimizer from bayesmark.experiment import experiment_main from sklearn.preprocessing import power_transform import numpy as np import pandas as pd import torch from torch.quasirandom import SobolEngine from pyDOE2 import lhs from hebo.design_space.design_space import DesignSpace from hebo.models.model_factory import get_model from hebo.acquisitions.acq import LCB, Mean, Sigma, MOMeanSigmaLCB, MACE from hebo.optimizers.evolution_optimizer import EvolutionOpt torch.set_num_threads(min(1, torch.get_num_threads())) class MACEBO(AbstractOptimizer): # Unclear what is best package to list for primary_import here. primary_import = "bayesmark" def __init__(self, api_config, model_name='gpy'): AbstractOptimizer.__init__(self, api_config) self.api_config = api_config self.space = self.parse_space(api_config) self.X = pd.DataFrame(columns=self.space.para_names) self.y = np.zeros((0, 1)) self.model_name = model_name for k in api_config: print(k, api_config[k])
random.seed(666) np.random.seed(666) torch.cuda.manual_seed(666) torch.manual_seed(666) argparser = OptionParser() argparser.add_option('--config_file', type="str", default='config.cfg') argparser.add_option("--numthread", type="int", dest="nthread", default=4) argparser.add_option('--use_cuda', action='store_true', default=True) argparser.add_option('--parsingmodel', type="str", default='BaseParser') (args, extra_args) = argparser.parse_args() options = Configurable(args.config_file, extra_args) torch.set_num_threads(args.nthread) print("Pytorch using {} threads.".format(torch.get_num_threads())) if options.external_embedding: print('Using external embedding: {}'.format( options.external_embedding)) if options.gpuFlag: print("Use GPU!") print('Preparing vocab') words, w2i, p2i, rels = vocab(options.conll_train) ''' proportion setting ''' with open(options.conll_train, 'r') as conllFP: sentencesData = list(read_conll(conllFP))
def generateArgs() -> argparse.Namespace: """ This function parses and returns the arguments provided by the user. Use python main.py --help to get a full list of arguments. :return: Returns the parsed arguments given by the user. """ # Argument parser. Most defaults are what the original paper outlined. arg_parser = argparse.ArgumentParser() # Groups for the arguments systemArgs = arg_parser.add_argument_group( 'system', "Args that affect the system to be used during the running of the program." ) trainingArgs = arg_parser.add_argument_group( 'training', "Args that affect training.") loggingArgs = arg_parser.add_argument_group( 'logging', "Args that affect logging.") fileArgs = arg_parser.add_argument_group( 'files', "Args that deal with files (saving and loading).") testingArgs = arg_parser.add_argument_group( 'testing', "Args that deal with testing.") actionArgs = arg_parser.add_argument_group( 'actions', "Args that deal with what the program does.") # Add flag argument to run with GPU or not. systemArgs.add_argument('-g', '--gpu', action='store_true', required=False, help="Use GPU", default=False) systemArgs.add_argument('--version', action='version', dest='version', version=Config.version) systemArgs.add_argument( '-t', '--threads', help="The number of threads that you want to use.", type=int, required=False, default=1) systemArgs.add_argument( '-T', '--max-threads', help="Use the most number of threads possible.", action="store_true", required=False, default=False) systemArgs.add_argument( '--force-threads', help= "Force the program to not limit the number of threads from 1-7", action="store_true", required=False, default=False) systemArgs.add_argument( '--model-number', help="The number of the model. To be used when saving.", type=int, required=False, default=0) # Reduce learning rate by this rate # the gamma in the LR scheduler trainingArgs.add_argument('-F', '--factor', help="Reduce learning rate by factor", type=float, required=False, default=.1) # The base learning rate to start out with. trainingArgs.add_argument('-l', '--learning-rate', help="Standard learning rate", type=float, required=False, default=1e-05) # The momentum for the network. trainingArgs.add_argument('-m', '--momentum', help="Momentum rate", type=float, required=False, default=.9) # Batch size for the network. trainingArgs.add_argument('-b', '--batch-size', help="Batch size", type=int, required=False, default=75) # Beta for the loss function. trainingArgs.add_argument('-B', '--beta', help="Beta for loss function", type=int, required=False, default=500) trainingArgs.add_argument('-e', '--epochs', help="Total number of epochs for this model", type=int, required=False, default=10) trainingArgs.add_argument( '-d', '--database-root', type=str, help="The root folder of the database to be used.", required=False, default="KingsCollege/") trainingArgs.add_argument( '--threshold-factor', help= "When loss is less than the beta times this number, halve the threshold. Should be (0,1]", required=False, type=float, default=2.0 / 3.0) loggingArgs.add_argument( '-v', '--verbose', help="Print everything the neural network is doing.", action='store_true', required=False, default=False) # Print progress every nth batch. loggingArgs.add_argument('-p', '--print-every', help="Print progress every nth batch", type=int, required=False, default=4) loggingArgs.add_argument( '-L', '--log-config', help="How much the program should log.", type=str, choices=["all", "main", "min", "warn", "err", "none"], required=False, default="main") fileArgs.add_argument( '-f', "--model-file", help="Save model to this file", type=str, required=False, default="models/posenet-model-v{}-E{:04d}-N{:02d}.model") # Use the provided pretrained model. fileArgs.add_argument('-M', '--pretrained-model', help="Resume using given pretrained model", type=str, required=False, default=None) fileArgs.add_argument( '-s', '--dont-save', help= "Don't save models after each epoch. Default action is to save models after each epoch.", required=False, action='store_true', default=False) fileArgs.add_argument('-r', '--resume', help="Resume from latest model", action='store_true', required=False, default=False) testingArgs.add_argument( '--num-of-tests', help="Number of times to test the network to get the uncertainty.", required=False, default=64, type=int) testingArgs.add_argument('--test-every', help="Test every given epochs.", required=False, default=2, type=int) testingArgs.add_argument( '--test-print-lots', help= "Print testing results with the frequency based on the batch-size (default) or not. " + "\nWith the batch size, testing results will be printed less.", required=False, action="store_true", default=False) actionArgs.add_argument('-i', '--image', type=str, help="A single image to test the network on.", required=False, default=None) actionArgs.add_argument( '--skip-training', help= "Skips training and validation and goes straight to testing. Good to use if you keep getting memory errors.", required=False, action="store_true", default=False) actionArgs.add_argument('--plot', help="Plot losses of specified model.", required=False, action="store_true", default=False) actionArgs.add_argument( '--skip-testing', help= "Skip testing (useful if you get memory errors only while testing.)", required=False, action="store_true", default=False) out: argparse.Namespace = arg_parser.parse_args() if out.verbose: out.log_config = "all" if out.resume and out.pretrained_model is None: out.pretrained_model = "models/posenet-latest-v{version}-N{num:02d}.model".format( version=Config.version, num=out.model_number) if out.max_threads or out.threads > torch.get_num_threads(): out.threads = torch.get_num_threads() if out.threads >= 8 and not out.force_threads: out.threads = 7 if out.threads is 0: out.threads = 1 return out
filename = save_dir + 'exp.log' if not os.path.isfile(filename): f = open(filename, mode='w') f.close() logger = get_logger(filename) argument_file = save_dir + '.args' argsDict = args.__dict__ with open(argument_file, 'w') as f: f.writelines('------------------ start ------------------' + '\n') for eachArg, value in argsDict.items(): f.writelines(eachArg + ' : ' + str(value) + '\n') f.writelines('------------------- end -------------------' + '\n') torch.set_num_threads(torch.get_num_threads()) sac(lambda: SoccerPLUS(visual=False), actor_critic=MLPActorCritic, ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), gamma=args.gamma, seed=args.seed, epochs=args.epochs, policy_type=args.policy_type, replay_size=args.replay_size, lr=args.lr, alpha=args.alpha, batch_size=args.batch_size, start_steps=10000, steps_per_epoch=1000, polyak=0.995,
def evaluate( model, data_loader, device, maxDets=None, crop_inference_to_fov=False): # See: https://cocodataset.org/#detection-eval # NOTE: The coco evaluator (and what's reported in FasterRCNN and # maskrcnn papers) combines detection and classification by # considering something to be detected only if it's from the same # class. eg. If the model places a bounding box and labels it "traffic # light", but in reality that location has a "person", this is # considered a false positive traffic light and a false negative # person. We'd like to get this metric, sure, but we're also # interested in classic detection .. i.e. just "is there a nucleus?" # so we get AP using both the full set of classes AS WELL AS # a remapped class set where anything is considered a "nucleus" n_threads = torch.get_num_threads() # mFIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() # iou_types = _get_iou_types(model) iou_types = ['bbox'] # segmAP is meaningless in my hybrid bbox/segm dataset maxDets = [1, 10, 100] if maxDets is None else maxDets cropper = tvdt.Cropper() if crop_inference_to_fov else None # combined detection & classification precision/recall dst = data_loader.dataset coco = get_coco_api_from_dataset(dst, crop_inference_to_fov=crop_inference_to_fov) coco_evaluator = CocoEvaluator(coco, iou_types, maxDets=maxDets) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' # precision/recall for just detection (objectness) classification = dst.do_classification if classification: # IMPORTANT: REVERSE ME AFTER DEFINING COCO API dst.do_classification = False dst.set_labelmaps() metric_logger_objectness = utils.MetricLogger(delimiter=" ") coco_objectness = get_coco_api_from_dataset( dst, crop_inference_to_fov=crop_inference_to_fov) coco_evaluator_objectness = CocoEvaluator( coco_objectness, iou_types, maxDets=maxDets) # IMPORTANT: THIS LINE IS CRITICAL dst.do_classification = True dst.set_labelmaps() else: metric_logger_objectness = None # noinspection PyUnusedLocal coco_objectness = None coco_evaluator_objectness = None n_true = 0 n_pred = 0 n_matched = 0 cltargets = [] clprobabs = [] cloutlabs = [] seg_intersects = [] seg_sums = [] def _get_categnames(prefix): if prefix == '': return dst.categs_names return dst.supercategs_names for images, targets in metric_logger.log_every(data_loader, 100, header): images = list(img.to(device) for img in images) targets = list(targets) # uncomment if GPU # torch.cuda.synchronize() model_time = time.time() outputs = model(images) outputs = [ {k: v.to(cpu_device) for k, v in t.items() if v is not None} for t in outputs ] model_time = time.time() - model_time if crop_inference_to_fov: images, targets, outputs = _crop_all_to_fov( images=images, targets=targets, outputs=outputs, cropper=cropper) # combined detection & classification precision/recall res = { target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update( model_time=model_time, evaluator_time=evaluator_time) probabs_exist = 'probabs' in outputs[0] if classification: # IMPORTANT NOTE: The way that FasterRCNN is implemented # assigns each box prediction a confidence score and a label. This # is NOT the same as the "traditional" classifier where there is a # confidence score for ALL classes per object/pixel. Instead, here # the class logits are "flattened" so that each box-label # combination is considered separately, then the NMS is done # independently per class. Long story short, each box only has # one label and confidence # Match truth to outputs and only count matched objects for # classification accuracy stats for target, output in zip(targets, outputs): # Match, ignoring ambiguous nuclei. Note that the model # already filters out anything predicted as ignore_label # in inference mode, so we only need to do this for gtruth keep = target['iscrowd'] == 0 cltrg_boxes = np.int32(target['boxes'][keep]) cltrg_labels = np.int32(target['labels'][keep]) keep_target, keep_output, _, _ = \ map_bboxes_using_hungarian_algorithm( bboxes1=cltrg_boxes, bboxes2=np.int32(output['boxes']), min_iou=0.5) # classification performance n_true += cltrg_boxes.shape[0] n_pred += output['boxes'].shape[0] n_matched += len(keep_output) cltargets.extend(cltrg_labels[keep_target].tolist()) if probabs_exist: clprobabs.extend( np.float32(output['probabs'])[keep_output, :].tolist() ) else: cloutlabs.extend( np.int32(output['labels'])[keep_output].tolist() ) # FIXME: for now, we just assess this if classification because # otherwise I'll need to refactor the function output # segmentation performance if 'masks' in target: ismask = np.int32(target['ismask'])[keep_target] == 1 tmask = np.int32(target['masks'])[keep_target, ...][ismask, ...] if not model.transform.densify_mask: omask = np.int32(output['masks'] > 0.5) omask = omask[:, 0, :, :] else: omask = np.int32(output['masks']) obj_ids = np.arange(1, omask.max() + 1) omask = omask == obj_ids[:, None, None] omask = 0 + omask omask = omask[keep_output, ...][ismask, ...] for i in range(tmask.shape[0]): sms = tmask[i, ...].sum() + omask[i, ...].sum() isc = np.sum( 0 + ((tmask[i, ...] + omask[i, ...]) == 2) ) if (sms > 0) and (isc > 0): seg_sums.append(sms) seg_intersects.append(isc) # FIXME (low priority): have this use a map from the data loader # labelcodes to justdetection code (eg 2 -> 1, 3 -> 1, etc) # instead of hardcoding the assumption that "nucleus" will # always have the code 1. Note that the model already filters # out anything predicted as ignore_label. # remap predictions to just "nucleus". Note that the labels # have already been remapped during indexing of the coco API. # NEEDLESS TO SAY, this must happen AFTER we've assigned # the classifications to the classification_outputs list for _, output in res.items(): output['labels'] = 1 + (0 * output['labels']) # precision/recall for just detection (objectness) evaluator_time = time.time() coco_evaluator_objectness.update(res) evaluator_time = time.time() - evaluator_time metric_logger_objectness.update( model_time=model_time, evaluator_time=evaluator_time) # combined detection & classification precision/recall # gather the stats from all processes & accumulate preds from all imgs metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() coco_evaluator.accumulate() coco_evaluator.summarize() if classification: # Init classification results classification_metrics = { 'n_true_nuclei_excl_ambiguous': n_true, 'n_predicted_nuclei': n_pred, 'n_matched_for_classif': n_matched, } for prefix in ['', 'superCateg_']: categs_names = _get_categnames(prefix) classification_metrics.update({ f'{prefix}{k}': np.nan for k in ['accuracy', 'auroc_micro', 'auroc_macro', 'mcc'] }) # Class-by-class classification_metrics.update({ f'{prefix}accuracy_{cls_name}': np.nan for cls_name in categs_names }) classification_metrics.update({ f'{prefix}mcc_{cls_name}': np.nan for cls_name in categs_names }) if probabs_exist: classification_metrics.update({ f'{prefix}aucroc_{cls_name}': np.nan for cls_name in categs_names }) for prefix in ['', 'superCateg_']: categs_names = _get_categnames(prefix) classification_metrics.update({ f'{prefix}confusion_trueClass-{tc}_predictedClass-{pc}': 0 for tc in categs_names for pc in categs_names }) # segmentation -- restricted to matched nuclei with available seg if len(seg_sums) > 0: seg_intersects = np.array(seg_intersects) seg_sums = np.array(seg_sums) intersect = np.sum(seg_intersects) sums = np.sum(seg_sums) ious = seg_intersects / (seg_sums - seg_intersects) dices = 2. * seg_intersects / seg_sums classification_metrics.update({ # overall 'seg_intersect': intersect, 'seg_sum': sums, 'seg_IOU': intersect / (sums - intersect), 'seg_DICE': 2. * intersect / sums, # by nucleus 'seg_n': len(ious), 'seg_medIOU': np.median(ious), 'seg_medDICE': np.median(dices), }) metric_logger_objectness.synchronize_between_processes() print("\nAveraged stats (OBJECTNESS):", metric_logger_objectness) coco_evaluator_objectness.synchronize_between_processes() coco_evaluator_objectness.accumulate() coco_evaluator_objectness.summarize() # NOTE: WE MAKE SURE ALL LABELMAPS BELOW START AT ZERO SINCE THE # FUNCTION _update_classification_metrics DOES AN ARGMAX INTERNALLY # SO FIRST COLUMN CORRESPONDS TO ZERO'TH CLASS, WHICH CORRESPONDS TO # LABEL = 1 IN OUR DATASET AND MODEL # classification accuracy without remapping clkwargs = { 'metrics_dict': classification_metrics, 'all_labels': np.array(cltargets) - 1, 'rlabelcodes': { k - 1: v for k, v in dst.rlabelcodes.items() if v != 'AMBIGUOUS' }, 'codemap': None, 'prefix': 'superCateg_' if dst.use_supercategs else '', } if probabs_exist: clkwargs['all_scores'] = np.array(clprobabs) else: clkwargs['output_labels'] = np.array(cloutlabs) _update_classification_metrics(**clkwargs) # FIXME (low priority): this hard-codes the name of ambiguous categ # classification accuracy mapped to supercategs if not dst.use_supercategs: clkwargs.update({ 'rlabelcodes': { k - 1: v for k, v in dst.supercategs_rlabelcodes.items() if v != 'AMBIGUOUS' }, 'codemap': { k - 1: v - 1 for k, v in dst.main_codes_to_supercategs_codes.items() if dst.supercategs_rlabelcodes[v] != 'AMBIGUOUS' }, 'prefix': 'superCateg_', }) _update_classification_metrics(**clkwargs) else: classification_metrics = {} torch.set_num_threads(n_threads) return coco_evaluator, coco_evaluator_objectness, classification_metrics