def __init__(self, output_dir, model_path, dataset_name, dataset_kwargs=None, save_annotations=True): print_info("Tester initialized for model {} and dataset {}".format( model_path, dataset_name)) # Output directory self.output_dir = coerce_to_path_and_create_dir(output_dir) self.save_annotations = save_annotations print_info("Output dir is {}".format(self.output_dir)) # Dataset self.dataset_kwargs = dataset_kwargs or {} self.dataset = get_dataset(dataset_name)(split="test", **self.dataset_kwargs) print_info("Dataset {} loaded with kwargs {}: {} samples".format( dataset_name, self.dataset_kwargs, len(self.dataset))) # Model torch.backends.cudnn.benchmark = False # XXX: at inference, input images are usually not fixed self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.model = load_model_from_path(model_path, device=self.device) self.model.eval() print_info("Model {} created and checkpoint state loaded".format( self.model.name)) # Metrics if self.dataset.label_files is not None: self.metrics = RunningMetrics(self.dataset.restricted_labels, self.dataset.metric_labels) print_info("Labels found, metrics instantiated") else: self.metrics = None print_info( "No labels found, performance metrics won't be computed") # Outputs # saving probability maps takes a lot of space, remove comment if needed # self.prob_dir = coerce_to_path_and_create_dir(self.output_dir / "prob_map") self.prob_maps, self.seg_maps = [], [] if self.save_annotations: self.seg_dir = coerce_to_path_and_create_dir(self.output_dir / "seg_map") self.blend_dir = coerce_to_path_and_create_dir(self.output_dir / "blend")
def __init__(self, output_dir=SYNTHETIC_DOCUMENT_DATASET_PATH, merged_labels=True, baseline_as_label=False, verbose=True): self.output_dir = coerce_to_path_and_create_dir(output_dir) self.merged_labels = merged_labels self.baseline_as_label = baseline_as_label self.verbose = verbose (self.output_dir / 'train').mkdir(exist_ok=True) (self.output_dir / 'val').mkdir(exist_ok=True) (self.output_dir / 'test').mkdir(exist_ok=True)
def __init__(self, input_dir, output_dir, json_file='via_region_data.json', out_ext='png', color=ILLUSTRATION_COLOR, verbose=True): self.input_dir = coerce_to_path_and_check_exist(input_dir) self.annotations = self.load_json(self.input_dir / json_file) self.output_dir = coerce_to_path_and_create_dir(output_dir) self.out_ext = out_ext self.color = color self.mode = 'L' if isinstance(color, int) else 'RGB' self.background_color = 0 if isinstance(color, int) else (0, 0, 0) self.verbose = verbose
def __init__(self, input_dir, output_dir, color_label_mapping=COLOR_TO_LABEL_MAPPING, img_extension='png', verbose=True): self.input_dir = coerce_to_path_and_check_exist(input_dir) self.files = get_files_from_dir(self.input_dir, valid_extensions=img_extension) self.output_dir = coerce_to_path_and_create_dir(output_dir) self.color_label_mapping = color_label_mapping self.verbose = verbose
def __init__(self, input_dir, output_dir, suffix_fmt='-{}', out_ext='jpg', create_sub_dir=False, verbose=True): self.input_dir = coerce_to_path_and_check_exist(input_dir) self.files = get_files_from_dir(self.input_dir, valid_extensions='pdf') self.output_dir = coerce_to_path_and_create_dir(output_dir) self.suffix_fmt = suffix_fmt self.out_ext = out_ext self.create_sub_dir = create_sub_dir self.verbose = verbose if self.verbose: print_info("Pdf2Image initialised: found {} files".format( len(self.files)))
def run(self): for url in self.manifest_urls: manifest = self.get_json(url) if manifest is not None: manifest_id = Path(urlparse(manifest['@id']).path).parent.name print_info('Processing {}...'.format(manifest_id)) output_path = coerce_to_path_and_create_dir(self.output_dir / manifest_id) resources = self.get_resources(manifest) for resource_url in resources: resource_url = '/'.join(resource_url.split('/')[:-3] + [self.size] + resource_url.split('/')[-2:]) with requests.get(resource_url, stream=True) as response: response.raw.decode_content = True resrc_path = Path(urlparse(resource_url).path) name = '{}{}'.format(resrc_path.parts[-5], resrc_path.suffix) output_file = output_path / name print_info('Saving {}...'.format(output_file.relative_to(self.output_dir))) with open(output_file, mode='wb') as f: shutil.copyfileobj(response.raw, f)
def __init__(self, input_dir, output_dir, tag="default", seg_fmt=SEG_GROUND_TRUTH_FMT, labels_to_eval=None, save_annotations=True, labels_to_annot=None, predict_bbox=False, verbose=True): self.input_dir = coerce_to_path_and_check_exist(input_dir).absolute() self.files = get_files_from_dir(self.input_dir, valid_extensions=VALID_EXTENSIONS, recursive=True, sort=True) self.output_dir = coerce_to_path_and_create_dir(output_dir).absolute() self.seg_fmt = seg_fmt self.logger = get_logger(self.output_dir, name='evaluator') model_path = coerce_to_path_and_check_exist(MODELS_PATH / tag / MODEL_FILE) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.model, (self.img_size, restricted_labels, self.normalize) = load_model_from_path( model_path, device=self.device, attributes_to_return=[ 'train_resolution', 'restricted_labels', 'normalize' ]) self.model.eval() self.restricted_labels = sorted(restricted_labels) self.labels_to_eval = [ ILLUSTRATION_LABEL ] if labels_to_eval is None else sorted(labels_to_eval) self.labels_to_rm = set(self.restricted_labels).difference( self.labels_to_eval) assert len( set(self.labels_to_eval).intersection( self.restricted_labels)) == len(self.labels_to_eval) self.restricted_colors = [ LABEL_TO_COLOR_MAPPING[l] for l in self.restricted_labels ] self.label_idx_color_mapping = { self.restricted_labels.index(l) + 1: c for l, c in zip(self.restricted_labels, self.restricted_colors) } self.color_label_idx_mapping = { c: l for l, c in self.label_idx_color_mapping.items() } self.metrics = defaultdict(lambda: RunningMetrics( self.restricted_labels, self.labels_to_eval)) self.save_annotations = save_annotations self.labels_to_annot = labels_to_annot or self.labels_to_eval self.predict_bbox = predict_bbox self.verbose = verbose self.print_and_log_info('Output dir: {}'.format( self.output_dir.absolute())) self.print_and_log_info('Evaluator initialised with kwargs {}'.format({ 'labels_to_eval': self.labels_to_eval, 'save_annotations': save_annotations })) self.print_and_log_info('Model tag: {}'.format(model_path.parent.name)) self.print_and_log_info( 'Model characteristics: train_resolution={}, restricted_labels={}'. format(self.img_size, self.restricted_labels)) self.print_and_log_info('Found {} input files to process'.format( len(self.files)))
def qualitative_eval(self): """Routine to save qualitative results""" loss = AverageMeter() scores_path = self.run_dir / FINAL_SCORES_FILE with open(scores_path, mode="w") as f: f.write("loss\n") cluster_path = coerce_to_path_and_create_dir(self.run_dir / 'clusters') dataset = self.train_loader.dataset train_loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.n_workers, shuffle=False) # Compute results distances, cluster_idx = np.array([]), np.array([], dtype=np.int32) averages = {k: AverageTensorMeter() for k in range(self.n_prototypes)} for images, labels in train_loader: images = images.to(self.device) dist = self.model(images)[1] dist_min_by_sample, argmin_idx = map(lambda t: t.cpu().numpy(), dist.min(1)) loss.update(dist_min_by_sample.mean(), n=len(dist_min_by_sample)) argmin_idx = argmin_idx.astype(np.int32) distances = np.hstack([distances, dist_min_by_sample]) cluster_idx = np.hstack([cluster_idx, argmin_idx]) transformed_imgs = self.model.transform(images).cpu() for k in range(self.n_prototypes): imgs = transformed_imgs[argmin_idx == k, k] averages[k].update(imgs) self.print_and_log_info("final_loss: {:.5}".format(loss.avg)) with open(scores_path, mode="a") as f: f.write("{:.5}\n".format(loss.avg)) # Save results with open(cluster_path / 'cluster_counts.tsv', mode='w') as f: f.write('\t'.join([str(k) for k in range(self.n_prototypes)]) + '\n') f.write('\t'.join( [str(averages[k].count) for k in range(self.n_prototypes)]) + '\n') for k in range(self.n_prototypes): path = coerce_to_path_and_create_dir(cluster_path / f'cluster{k}') indices = np.where(cluster_idx == k)[0] top_idx = np.argsort(distances[indices])[:N_CLUSTER_SAMPLES] for j, idx in enumerate(top_idx): inp = dataset[indices[idx]][0].unsqueeze(0).to(self.device) convert_to_img(inp).save(path / f'top{j}_raw.png') if not self.model.transformer.is_identity: convert_to_img(self.model.transform(inp)[0, k]).save( path / f'top{j}_tsf.png') convert_to_img( self.model.transform(inp, inverse=True)[0, k]).save( path / f'top{j}_tsf_inp.png') if len(indices) <= N_CLUSTER_SAMPLES: random_idx = indices else: random_idx = np.random.choice(indices, N_CLUSTER_SAMPLES, replace=False) for j, idx in enumerate(random_idx): inp = dataset[idx][0].unsqueeze(0).to(self.device) convert_to_img(inp).save(path / f'random{j}_raw.png') if not self.model.transformer.is_identity: convert_to_img(self.model.transform(inp)[0, k]).save( path / f'random{j}_tsf.png') convert_to_img( self.model.transform(inp, inverse=True)[0, k]).save( path / f'random{j}_tsf_inp.png') try: convert_to_img(averages[k].avg).save(path / 'avg.png') except AssertionError: print_warning(f'no image found in cluster {k}')
def save_training_metrics(self): df_train = pd.read_csv(self.train_metrics_path, sep="\t", index_col=0) df_val = pd.read_csv(self.val_metrics_path, sep="\t", index_col=0) df_scores = pd.read_csv(self.val_scores_path, sep="\t", index_col=0) if len(df_train) == 0: self.print_and_log_info("No metrics or plots to save") return # Losses losses = list( filter(lambda s: s.startswith('loss'), self.train_metrics.names)) df = df_train.join(df_val[['loss_val']], how="outer") fig = plot_lines(df, losses + ['loss_val'], title="Loss") fig.savefig(self.run_dir / "loss.pdf") # Cluster proportions names = list( filter(lambda s: s.startswith('prop_'), self.train_metrics.names)) fig = plot_lines(df, names, title="Cluster proportions") fig.savefig(self.run_dir / "cluster_proportions.pdf") s = df[names].iloc[-1] s.index = list(map(lambda n: n.replace('prop_clus', ''), names)) fig = plot_bar(s, title="Final cluster proportions") fig.savefig(self.run_dir / "cluster_proportions_final.pdf") # Validation if not self.is_val_empty: names = list( filter(lambda name: 'cls' not in name, self.val_scores.names)) fig = plot_lines(df_scores, names, title="Global scores", unit_yaxis=True) fig.savefig(self.run_dir / 'global_scores.pdf') fig = plot_lines(df_scores, [f'acc_cls{i}' for i in range(self.n_classes)], title="Scores by cls", unit_yaxis=True) fig.savefig(self.run_dir / "scores_by_cls.pdf") # Prototypes & Variances size = MAX_GIF_SIZE if MAX_GIF_SIZE < max( self.img_size) else self.img_size self.save_prototypes() if self.is_gmm: self.save_variances() for k in range(self.n_prototypes): save_gif(self.prototypes_path / f'proto{k}', f'prototype{k}.gif', size=size) shutil.rmtree(str(self.prototypes_path / f'proto{k}')) if self.is_gmm: save_gif(self.variances_path / f'var{k}', f'variance{k}.gif', size=size) shutil.rmtree(str(self.variances_path / f'var{k}')) # Transformation predictions if self.model.transformer.is_identity: # no need to keep transformation predictions shutil.rmtree(str(self.transformation_path)) coerce_to_path_and_create_dir(self.transformation_path) else: self.save_transformed_images() for i in range(self.images_to_tsf.size(0)): for k in range(self.n_prototypes): save_gif(self.transformation_path / f'img{i}' / f'tsf{k}', f'tsf{k}.gif', size=size) shutil.rmtree( str(self.transformation_path / f'img{i}' / f'tsf{k}')) self.print_and_log_info("Training metrics and visuals saved")
def __init__(self, config_path, run_dir): self.config_path = coerce_to_path_and_check_exist(config_path) self.run_dir = coerce_to_path_and_create_dir(run_dir) self.logger = get_logger(self.run_dir, name="trainer") self.print_and_log_info( "Trainer initialisation: run directory is {}".format(run_dir)) shutil.copy(self.config_path, self.run_dir) self.print_and_log_info("Config {} copied to run directory".format( self.config_path)) with open(self.config_path) as fp: cfg = yaml.load(fp, Loader=yaml.FullLoader) if torch.cuda.is_available(): type_device = "cuda" nb_device = torch.cuda.device_count() else: type_device = "cpu" nb_device = None self.device = torch.device(type_device) self.print_and_log_info("Using {} device, nb_device is {}".format( type_device, nb_device)) # Datasets and dataloaders self.dataset_kwargs = cfg["dataset"] self.dataset_name = self.dataset_kwargs.pop("name") train_dataset = get_dataset(self.dataset_name)("train", **self.dataset_kwargs) val_dataset = get_dataset(self.dataset_name)("val", **self.dataset_kwargs) self.n_classes = train_dataset.n_classes self.is_val_empty = len(val_dataset) == 0 self.print_and_log_info("Dataset {} instantiated with {}".format( self.dataset_name, self.dataset_kwargs)) self.print_and_log_info( "Found {} classes, {} train samples, {} val samples".format( self.n_classes, len(train_dataset), len(val_dataset))) self.img_size = train_dataset.img_size self.batch_size = cfg["training"]["batch_size"] self.n_workers = cfg["training"].get("n_workers", 4) self.train_loader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=self.n_workers, shuffle=True) self.val_loader = DataLoader(val_dataset, batch_size=self.batch_size, num_workers=self.n_workers) self.print_and_log_info( "Dataloaders instantiated with batch_size={} and n_workers={}". format(self.batch_size, self.n_workers)) self.n_batches = len(self.train_loader) self.n_iterations, self.n_epoches = cfg["training"].get( "n_iterations"), cfg["training"].get("n_epoches") assert not (self.n_iterations is not None and self.n_epoches is not None) if self.n_iterations is not None: self.n_epoches = max(self.n_iterations // self.n_batches, 1) else: self.n_iterations = self.n_epoches * len(self.train_loader) # Model self.model_kwargs = cfg["model"] self.model_name = self.model_kwargs.pop("name") self.is_gmm = 'gmm' in self.model_name self.model = get_model(self.model_name)( self.train_loader.dataset, **self.model_kwargs).to(self.device) self.print_and_log_info("Using model {} with kwargs {}".format( self.model_name, self.model_kwargs)) self.print_and_log_info('Number of trainable parameters: {}'.format( f'{count_parameters(self.model):,}')) self.n_prototypes = self.model.n_prototypes # Optimizer opt_params = cfg["training"]["optimizer"] or {} optimizer_name = opt_params.pop("name") cluster_kwargs = opt_params.pop('cluster', {}) tsf_kwargs = opt_params.pop('transformer', {}) self.optimizer = get_optimizer(optimizer_name)([ dict(params=self.model.cluster_parameters(), **cluster_kwargs), dict(params=self.model.transformer_parameters(), **tsf_kwargs) ], **opt_params) self.model.set_optimizer(self.optimizer) self.print_and_log_info("Using optimizer {} with kwargs {}".format( optimizer_name, opt_params)) self.print_and_log_info("cluster kwargs {}".format(cluster_kwargs)) self.print_and_log_info("transformer kwargs {}".format(tsf_kwargs)) # Scheduler scheduler_params = cfg["training"].get("scheduler", {}) or {} scheduler_name = scheduler_params.pop("name", None) self.scheduler_update_range = scheduler_params.pop( "update_range", "epoch") assert self.scheduler_update_range in ["epoch", "batch"] if scheduler_name == "multi_step" and isinstance( scheduler_params["milestones"][0], float): n_tot = self.n_epoches if self.scheduler_update_range == "epoch" else self.n_iterations scheduler_params["milestones"] = [ round(m * n_tot) for m in scheduler_params["milestones"] ] self.scheduler = get_scheduler(scheduler_name)(self.optimizer, **scheduler_params) self.cur_lr = self.scheduler.get_last_lr()[0] self.print_and_log_info("Using scheduler {} with parameters {}".format( scheduler_name, scheduler_params)) # Pretrained / Resume checkpoint_path = cfg["training"].get("pretrained") checkpoint_path_resume = cfg["training"].get("resume") assert not (checkpoint_path is not None and checkpoint_path_resume is not None) if checkpoint_path is not None: self.load_from_tag(checkpoint_path) elif checkpoint_path_resume is not None: self.load_from_tag(checkpoint_path_resume, resume=True) else: self.start_epoch, self.start_batch = 1, 1 # Train metrics & check_cluster interval metric_names = ['time/img', 'loss'] metric_names += [f'prop_clus{i}' for i in range(self.n_prototypes)] train_iter_interval = cfg["training"]["train_stat_interval"] self.train_stat_interval = train_iter_interval self.train_metrics = Metrics(*metric_names) self.train_metrics_path = self.run_dir / TRAIN_METRICS_FILE with open(self.train_metrics_path, mode="w") as f: f.write("iteration\tepoch\tbatch\t" + "\t".join(self.train_metrics.names) + "\n") self.check_cluster_interval = cfg["training"]["check_cluster_interval"] # Val metrics & scores val_iter_interval = cfg["training"]["val_stat_interval"] self.val_stat_interval = val_iter_interval self.val_metrics = Metrics('loss_val') self.val_metrics_path = self.run_dir / VAL_METRICS_FILE with open(self.val_metrics_path, mode="w") as f: f.write("iteration\tepoch\tbatch\t" + "\t".join(self.val_metrics.names) + "\n") self.val_scores = Scores(self.n_classes, self.n_prototypes) self.val_scores_path = self.run_dir / VAL_SCORES_FILE with open(self.val_scores_path, mode="w") as f: f.write("iteration\tepoch\tbatch\t" + "\t".join(self.val_scores.names) + "\n") # Prototypes & Variances self.prototypes_path = coerce_to_path_and_create_dir(self.run_dir / 'prototypes') [ coerce_to_path_and_create_dir(self.prototypes_path / f'proto{k}') for k in range(self.n_prototypes) ] if self.is_gmm: self.variances_path = coerce_to_path_and_create_dir(self.run_dir / 'variances') [ coerce_to_path_and_create_dir(self.variances_path / f'var{k}') for k in range(self.n_prototypes) ] # Transformation predictions self.transformation_path = coerce_to_path_and_create_dir( self.run_dir / 'transformations') self.images_to_tsf = next(iter( self.train_loader))[0][:N_TRANSFORMATION_PREDICTIONS].to( self.device) for k in range(self.images_to_tsf.size(0)): out = coerce_to_path_and_create_dir(self.transformation_path / f'img{k}') convert_to_img(self.images_to_tsf[k]).save(out / 'input.png') [ coerce_to_path_and_create_dir(out / f'tsf{k}') for k in range(self.n_prototypes) ] # Visdom viz_port = cfg["training"].get("visualizer_port") if viz_port is not None: from visdom import Visdom os.environ["http_proxy"] = "" self.visualizer = Visdom( port=viz_port, env=f'{self.run_dir.parent.name}_{self.run_dir.name}') self.visualizer.delete_env( self.visualizer.env) # Clean env before plotting self.print_and_log_info(f"Visualizer initialised at {viz_port}") else: self.visualizer = None self.print_and_log_info("No visualizer initialized")
def __init__(self, manifest_urls, output_dir, width=None, height=None): self.manifest_urls = manifest_urls self.output_dir = coerce_to_path_and_create_dir(output_dir) self.size = self.get_formatted_size(width, height)
def __init__(self, config_path, run_dir): self.config_path = coerce_to_path_and_check_exist(config_path) self.run_dir = coerce_to_path_and_create_dir(run_dir) self.logger = get_logger(self.run_dir, name="trainer") self.print_and_log_info( "Trainer initialisation: run directory is {}".format(run_dir)) shutil.copy(self.config_path, self.run_dir) self.print_and_log_info("Config {} copied to run directory".format( self.config_path)) with open(self.config_path) as fp: cfg = yaml.load(fp, Loader=yaml.FullLoader) if torch.cuda.is_available(): type_device = "cuda" nb_device = torch.cuda.device_count() # XXX: set to False when input image sizes are not fixed torch.backends.cudnn.benchmark = cfg["training"].get( "cudnn_benchmark", True) else: type_device = "cpu" nb_device = None self.device = torch.device(type_device) self.print_and_log_info("Using {} device, nb_device is {}".format( type_device, nb_device)) # Datasets and dataloaders self.dataset_kwargs = cfg["dataset"] self.dataset_name = self.dataset_kwargs.pop("name") train_dataset = get_dataset(self.dataset_name)("train", **self.dataset_kwargs) val_dataset = get_dataset(self.dataset_name)("val", **self.dataset_kwargs) self.restricted_labels = sorted( self.dataset_kwargs["restricted_labels"]) self.n_classes = len(self.restricted_labels) + 1 self.is_val_empty = len(val_dataset) == 0 self.print_and_log_info("Dataset {} instantiated with {}".format( self.dataset_name, self.dataset_kwargs)) self.print_and_log_info( "Found {} classes, {} train samples, {} val samples".format( self.n_classes, len(train_dataset), len(val_dataset))) self.batch_size = cfg["training"]["batch_size"] self.n_workers = cfg["training"]["n_workers"] self.train_loader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=self.n_workers, shuffle=True) self.val_loader = DataLoader(val_dataset, batch_size=self.batch_size, num_workers=self.n_workers) self.print_and_log_info( "Dataloaders instantiated with batch_size={} and n_workers={}". format(self.batch_size, self.n_workers)) self.n_batches = len(self.train_loader) self.n_iterations, self.n_epoches = cfg["training"].get( "n_iterations"), cfg["training"].get("n_epoches") assert not (self.n_iterations is not None and self.n_epoches is not None) if self.n_iterations is not None: self.n_epoches = max(self.n_iterations // self.n_batches, 1) else: self.n_iterations = self.n_epoches * len(self.train_loader) # Model self.model_kwargs = cfg["model"] self.model_name = self.model_kwargs.pop("name") model = get_model(self.model_name)(self.n_classes, **self.model_kwargs).to(self.device) self.model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())) self.print_and_log_info("Using model {} with kwargs {}".format( self.model_name, self.model_kwargs)) self.print_and_log_info('Number of trainable parameters: {}'.format( f'{count_parameters(self.model):,}')) # Optimizer optimizer_params = cfg["training"]["optimizer"] or {} optimizer_name = optimizer_params.pop("name", None) self.optimizer = get_optimizer(optimizer_name)(model.parameters(), **optimizer_params) self.print_and_log_info("Using optimizer {} with kwargs {}".format( optimizer_name, optimizer_params)) # Scheduler scheduler_params = cfg["training"].get("scheduler", {}) or {} scheduler_name = scheduler_params.pop("name", None) self.scheduler_update_range = scheduler_params.pop( "update_range", "epoch") assert self.scheduler_update_range in ["epoch", "batch"] if scheduler_name == "multi_step" and isinstance( scheduler_params["milestones"][0], float): n_tot = self.n_epoches if self.scheduler_update_range == "epoch" else self.n_iterations scheduler_params["milestones"] = [ round(m * n_tot) for m in scheduler_params["milestones"] ] self.scheduler = get_scheduler(scheduler_name)(self.optimizer, **scheduler_params) self.cur_lr = -1 self.print_and_log_info("Using scheduler {} with parameters {}".format( scheduler_name, scheduler_params)) # Loss loss_name = cfg["training"]["loss"] self.criterion = get_loss(loss_name)() self.print_and_log_info("Using loss {}".format(self.criterion)) # Pretrained / Resume checkpoint_path = cfg["training"].get("pretrained") checkpoint_path_resume = cfg["training"].get("resume") assert not (checkpoint_path is not None and checkpoint_path_resume is not None) if checkpoint_path is not None: self.load_from_tag(checkpoint_path) elif checkpoint_path_resume is not None: self.load_from_tag(checkpoint_path_resume, resume=True) else: self.start_epoch, self.start_batch = 1, 1 # Train metrics train_iter_interval = cfg["training"].get( "train_stat_interval", self.n_epoches * self.n_batches // 200) self.train_stat_interval = train_iter_interval self.train_time = AverageMeter() self.train_loss = AverageMeter() self.train_metrics_path = self.run_dir / TRAIN_METRICS_FILE with open(self.train_metrics_path, mode="w") as f: f.write( "iteration\tepoch\tbatch\ttrain_loss\ttrain_time_per_img\n") # Val metrics val_iter_interval = cfg["training"].get( "val_stat_interval", self.n_epoches * self.n_batches // 100) self.val_stat_interval = val_iter_interval self.val_loss = AverageMeter() self.val_metrics = RunningMetrics(self.restricted_labels) self.val_current_score = None self.val_metrics_path = self.run_dir / VAL_METRICS_FILE with open(self.val_metrics_path, mode="w") as f: f.write("iteration\tepoch\tbatch\tval_loss\t" + "\t".join(self.val_metrics.names) + "\n")
def __init__(self, output_dir=SYNTHETIC_LINE_DATASET_PATH, verbose=True): self.output_dir = coerce_to_path_and_create_dir(output_dir) self.verbose = verbose (self.output_dir / 'train').mkdir(exist_ok=True) (self.output_dir / 'val').mkdir(exist_ok=True) (self.output_dir / 'test').mkdir(exist_ok=True)
def __init__(self, input_dir, output_dir, labels_to_extract=None, in_ext=VALID_EXTENSIONS, out_ext='jpg', tag='default', save_annotations=True, straight_bbox=False, add_margin=True, draw_margin=False, verbose=True): self.input_dir = coerce_to_path_and_check_exist(input_dir).absolute() self.files = get_files_from_dir(self.input_dir, valid_extensions=in_ext, recursive=True, sort=True) self.output_dir = coerce_to_path_and_create_dir(output_dir).absolute() self.out_extension = out_ext self.logger = get_logger(self.output_dir, name='extractor') model_path = coerce_to_path_and_check_exist(MODELS_PATH / tag / MODEL_FILE) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.model, (self.img_size, restricted_labels, self.normalize) = load_model_from_path( model_path, device=self.device, attributes_to_return=[ 'train_resolution', 'restricted_labels', 'normalize' ]) self.model.eval() self.restricted_labels = sorted(restricted_labels) self.labels_to_extract = [ 1, 4 ] if labels_to_extract is None else sorted(labels_to_extract) if not set(self.labels_to_extract).issubset(self.restricted_labels): raise ValueError( 'Incompatible `labels_to_extract` and `tag` arguments: ' f'model was trained using {self.restricted_labels} labels only' ) self.save_annotations = save_annotations self.straight_bbox = straight_bbox self.add_margin = add_margin self.draw_margin = add_margin and draw_margin self.verbose = verbose self.print_and_log_info('Extractor initialised with kwargs {}'.format({ 'tag': tag, 'labels_to_extract': self.labels_to_extract, 'save_annotations': save_annotations, 'straight_bbox': straight_bbox, 'add_margin': add_margin, 'draw_margin': draw_margin })) self.print_and_log_info( 'Model characteristics: train_resolution={}, restricted_labels={}'. format(self.img_size, self.restricted_labels)) self.print_and_log_info('Found {} input files to process'.format( len(self.files)))