def main(args: RunConfiguration): cam_loader_getter = get_cam_loader_getter(args=args) if args.box_v2_metric: multi_contour_eval = True multi_iou_eval = True iou_threshold_list = [30, 50, 70] else: multi_contour_eval = False multi_iou_eval = False iou_threshold_list = [50] performance = evaluate_wsol_from_cam_loader( cam_loader_getter=cam_loader_getter, metadata_root=args.metadata_path, mask_root=args.dataset_path, dataset_name=args.dataset, split=args.dataset_split, multi_contour_eval=multi_contour_eval, multi_iou_eval=multi_iou_eval, iou_threshold_list=iou_threshold_list, cam_curve_interval=args.cam_curve_interval, ) if args.output_base_path: output_base_path = args.output_base_path else: output_base_path = os.path.join(args.casm_base_path, "wsoleval") os.makedirs(output_base_path, exist_ok=True) file_name = "{}___{}___{}.json".format( args.dataset, args.dataset_split, "v2" if args.box_v2_metric else "v1", ) io.write_json({ "performance": performance, }, os.path.join(output_base_path, file_name))
def save_model_with_metadata(model: nn.Module, metadata: dict, output_dir: str, file_name="model"): torch.save(model.state_dict(), os.path.join(output_dir, f"{file_name}.p")) io.write_json(metadata, os.path.join(output_dir, f"{file_name}.metadata.json"))
def subsample_train(base_config_path, out_config_path, out_data_path, out_metadata_path, num_samples_per_class=None, num_samples=None): config = io.read_json(base_config_path) raw_train_examples = io.read_jsonl(config["paths"]["train"]) new_config = config.copy() new_config["paths"]["train"] = out_data_path if num_samples_per_class is None and num_samples is not None: selected_examples = random.choices( list(range(len(raw_train_examples))), k=num_samples, ) sub_examples = [raw_train_examples[i] for i in selected_examples] metadata = [sub_examples] elif num_samples_per_class is not None and num_samples is None: index_label_list = [{ "idx": i, "label": example["label"] } for i, example in enumerate(raw_train_examples)] grouped = datastructures.group_by(index_label_list, lambda _: _["label"]) sorted_keys = sorted(list(grouped.keys())) sub_examples = [] metadata = {} for key in sorted_keys: key_examples = grouped[key] indices = [_["idx"] for _ in key_examples] selected_key_examples = random.choices(indices, k=num_samples_per_class) sub_examples += [ raw_train_examples[i] for i in selected_key_examples ] metadata[key] = selected_key_examples else: raise RuntimeError() io.create_containing_folder(out_config_path) io.create_containing_folder(out_data_path) io.create_containing_folder(out_metadata_path) io.write_json(new_config, out_config_path) io.write_jsonl(sub_examples, out_data_path) io.write_json(metadata, out_metadata_path)
def set_args(args): os.makedirs(args.output_path, exist_ok=True) args.casms_path = os.path.join(args.output_path, args.name) os.makedirs(args.casms_path, exist_ok=True) args.log_path = os.path.join(args.casms_path, 'log') if args.reproduce != '': set_reproduction(args) string_args = '' for name in sorted(vars(args)): string_args += name + '=' + str(getattr(args, name)) + ', ' io.write_json(args.to_dict(), os.path.join(args.casms_path, "args.json"))
def create_input_texts_and_configs(config_path, config_output_path, output_base_path, verbose=True): task = tasks.create_task_from_config_path(config_path) train_examples = task.get_train_examples() unsup_config = {"task": task.name, "orig": None, "aug": []} for example in train_examples: unsup_load_data.scrub_label(example, task) if verbose: print(task.name) # Write scrubbed examples orig_data_path = os.path.join(output_base_path, task.name, "orig", f"train.unsup.jsonl") io.create_containing_folder(orig_data_path) io.write_jsonl( [example.asdict() for example in train_examples], orig_data_path, ) unsup_config["orig"] = orig_data_path for field_name, field in task.Example.__dataclass_fields__.items(): if field.type != str: continue if field_name in GLOBAL_EXCLUSION: continue if field_name in TASK_EXCLUSION_DICT.get(task.name, []): continue if verbose: print(f" {field_name}") # Write text files orig_field_txt_path = os.path.join(output_base_path, task.name, "orig", f"train_{field_name}.txt") io.create_containing_folder(orig_field_txt_path) io.write_file( "\n".join([ getattr(example, field_name).strip() for example in train_examples ]), orig_field_txt_path, ) io.create_containing_folder(config_output_path) io.write_json(unsup_config, config_output_path)
def main(args): data_loader = torch.utils.data.DataLoader( ImagePathDataset.from_path( config_path=args.val_json, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), imagenet_utils.NORMALIZATION, ]), return_paths=True, ), batch_size=72, shuffle=False, num_workers=4, pin_memory=False ) model1 = casme_load_model( args.casm_path1, classifier_load_mode="pickled", verbose=False, ) model2 = casme_load_model( args.casm_path2, classifier_load_mode="pickled", verbose=False, ) all_results = {} for i, ((input_, target), paths) in enumerate(tqdm.tqdm(data_loader)): input_ = input_.cuda() with torch.no_grad(): classifier_output1, layers1 = model1['classifier'](input_, return_intermediate=True) masker_output1 = model1['masker'](layers1, use_p=None, class_ids=None, no_sigmoid=True) classifier_output2, layers2 = model2['classifier'](input_, return_intermediate=True) masker_output2 = model2['masker'](layers2, use_p=None, class_ids=None, no_sigmoid=True) score = get_scores( masker_output1.cpu().squeeze(1).numpy(), masker_output2.cpu().squeeze(1).numpy(), reduce=False, ) add_scores(all_results, score) io.write_json(pd.DataFrame(all_results).mean().to_dict(), args.output_path)
def preprocess_all_glue_data(input_base_path, output_base_path): os.makedirs(output_base_path, exist_ok=True) os.makedirs(os.path.join(output_base_path, "data"), exist_ok=True) os.makedirs(os.path.join(output_base_path, "configs"), exist_ok=True) for task_name in tqdm.tqdm(GLUE_CONVERSION): task_data_path = os.path.join(output_base_path, "data", task_name) os.makedirs(task_data_path, exist_ok=True) task_all_examples = get_full_examples( task_name=task_name, input_base_path=input_base_path, ) config = {"task": task_name, "paths": {}} for phase, phase_data in task_all_examples.items(): phase_data_path = os.path.join(task_data_path, f"{phase}.jsonl") io.write_jsonl( data=phase_data, path=phase_data_path, ) config["paths"][phase] = phase_data_path io.write_json(data=config, path=os.path.join(output_base_path, "configs", f"{task_name}.json"))
def main(args: RunConfiguration): # data loading code data_loader = torch.utils.data.DataLoader(ImagePathDataset.from_path( config_path=args.val_json, transform=transforms.Compose([ transforms.Resize([224, 224] if args.break_ratio else 224), transforms.CenterCrop(224), transforms.ToTensor(), imagenet_utils.NORMALIZATION, ]), return_paths=True, ), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False) original_classifier = archs.resnet50shared( pretrained=True).eval().to(device) # get score for special cases if args.mode == "max": model = {'special': 'max', 'classifier': original_classifier} elif args.mode == "min": model = {'special': 'min', 'classifier': original_classifier} elif args.mode == "center": model = {'special': 'center', 'classifier': original_classifier} elif args.mode == "ground_truth": model = {'special': 'ground_truth', 'classifier': original_classifier} elif args.mode == "casme": model = casme_load_model( args.casm_path, classifier_load_mode=args.classifier_load_mode) elif args.mode == "external": model = { 'special': 'external', 'classifier': original_classifier, 'bboxes': io.read_json(args.casm_path) } elif args.mode == "torchray_grad_cam": model = {'special': 'grad_cam', 'classifier': original_classifier} elif args.mode == "torchray_guided_backprop": model = { 'special': 'guided_backprop', 'classifier': original_classifier } else: raise KeyError(args.mode) gt_bboxes = io.read_json(args.bboxes_path) results, candidate_bbox_ls = score( args=args, model=model, data_loader=data_loader, bboxes=gt_bboxes, original_classifier=original_classifier, record_bboxes=args.record_bboxes, ) io.write_json(results, args.output_path) if args.record_bboxes: assert candidate_bbox_ls io.write_json([bbox.to_dict() for bbox in candidate_bbox_ls], args.record_bboxes)
def main(args): quick_init_out = initialization.quick_init(args=args, verbose=True) task, uda_task_data = uda_load_data.load_task_data_from_path( args.uda_task_config_path) with distributed.only_first_process(local_rank=args.local_rank): # load the model model_wrapper = llp_model_setup.setup_model( model_type=args.model_type, task=task, llp_embedding_dim=args.llp_embedding_dim, config_path=args.model_config_path, tokenizer_path=args.model_tokenizer_path, ) llp_model_setup.load_model( model=model_wrapper.model, state_dict=torch.load(args.model_path), load_mode=args.model_load_mode, ) model_wrapper.model.to(quick_init_out.device) # === Train Data Setup [START] === # labeled_examples = uda_task_data["sup"]["train"] unlabeled_examples, indices = train_setup.maybe_subsample_train( train_examples=uda_task_data["unsup"]["orig"], train_examples_number=args.unlabeled_train_examples_number, train_examples_fraction=args.unlabeled_train_examples_fraction, ) if indices is not None: write_json(indices, os.path.join(args.output_dir, "sampled_indices.json")) train_examples = labeled_examples + unlabeled_examples num_train_examples = len(train_examples) # === Train Data Setup [END] === # train_schedule = train_setup.get_train_schedule( num_train_examples=num_train_examples, max_steps=args.max_steps, num_train_epochs=args.num_train_epochs, gradient_accumulation_steps=args.gradient_accumulation_steps, per_gpu_train_batch_size=args.train_batch_size, n_gpu=quick_init_out.n_gpu, ) print("t_total", train_schedule.t_total) loss_criterion = train_setup.resolve_loss_function( task_type=task.TASK_TYPE) optimizer_scheduler = shared_model_setup.create_optimizer( model=model_wrapper.model, learning_rate=args.learning_rate, t_total=train_schedule.t_total, warmup_steps=args.warmup_steps, warmup_proportion=args.warmup_proportion, optimizer_type=args.optimizer_type, verbose=True, ) # I don't think this works for LLP... shared_model_setup.special_model_setup( model_wrapper=model_wrapper, optimizer_scheduler=optimizer_scheduler, fp16=args.fp16, fp16_opt_level=args.fp16_opt_level, n_gpu=quick_init_out.n_gpu, local_rank=args.local_rank, ) rparams = llp_runner.RunnerParameters( feat_spec=model_resolution.build_featurization_spec( model_type=args.model_type, max_seq_length=args.max_seq_length, ), local_rank=args.local_rank, n_gpu=quick_init_out.n_gpu, fp16=args.fp16, learning_rate=args.learning_rate, eval_batch_size=args.eval_batch_size, max_grad_norm=args.max_grad_norm, ) llp_params = llp_runner.LlpParameters( num_labeled=len(labeled_examples), llp_embedding_dim=args.llp_embedding_dim, llp_const_k=args.llp_const_k, llp_const_t=args.llp_const_t, llp_const_tau=args.llp_const_tau, llp_prop_chunk_size=args.llp_prop_chunk_size, llp_mem_bank_t=args.llp_mem_bank_t, llp_rep_global_agg_loss_lambda=args.llp_rep_global_agg_loss_lambda, llp_embedding_norm_loss=args.llp_embedding_norm_loss, llp_compute_global_agg_loss_mode=args.llp_compute_global_agg_loss_mode, ) llpuda_params = uda_llp_runner.LLPUDAParameters( uda_coeff=args.uda_coeff, use_unsup=args.unsup_ratio != 0, unsup_ratio=args.unsup_ratio, ) with quick_init_out.log_writer.log_context(): runner = uda_llp_runner.UDALLPRunner( task=task, model_wrapper=model_wrapper, optimizer_scheduler=optimizer_scheduler, loss_criterion=loss_criterion, device=quick_init_out.device, rparams=rparams, llp_params=llp_params, llpuda_params=llpuda_params, train_schedule=train_schedule, log_writer=quick_init_out.log_writer, ) if args.do_train: runner.init_llp_state(train_examples) runner.run_train(train_examples, uda_task_data) if args.do_save: torch.save(model_wrapper.model.state_dict(), os.path.join(args.output_dir, "model.p")) if args.do_val: val_examples = task.get_val_examples() results = runner.run_val(val_examples) evaluate.write_val_results( results=results, output_dir=args.output_dir, verbose=True, ) if args.do_test: test_examples = task.get_test_examples() logits = runner.run_test(test_examples) evaluate.write_preds( logits=logits, output_path=os.path.join(args.output_dir, "test_preds.csv"), )
def generate_jsons(train_path, val_path, output_base_path, num_per_class_in_a=50, seed=1234): random_state = np.random.RandomState(seed=seed) classes, class_to_idx = find_classes(train_path) samples = make_dataset(train_path, class_to_idx, IMG_EXTENSIONS) random_state.shuffle(samples) # Train io.write_json( { "root": train_path, "samples": samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train.json"), ) # Resampled Train class_dict = {} for path, class_idx in samples: if class_idx not in class_dict: class_dict[class_idx] = [] class_dict[class_idx].append((path, class_idx)) samples_a, samples_b = [], [] for class_idx in range(len(class_dict)): class_samples = class_dict[class_idx] chosen = set( random_state.choice(np.arange(len(class_samples)), num_per_class_in_a, replace=False)) for i, sample in enumerate(class_samples): if i in chosen: samples_a.append(sample) else: samples_b.append(sample) io.write_json( { "root": train_path, "samples": samples_a, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train_val.json"), ) io.write_json( { "root": train_path, "samples": samples_b, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train_train.json"), ) # Shuffled Train random_classes = np.random.randint(1000, size=len(samples)) io.write_json( { "root": train_path, "samples": [(path, int(c)) for (path, _), c in zip(samples, random_classes)], "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train_shuffle.json"), ) # Val classes, class_to_idx = find_classes(val_path) val_samples = make_dataset(val_path, class_to_idx, IMG_EXTENSIONS) io.write_json( { "root": val_path, "samples": val_samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "val.json"), )
def generate_jsons_with_extended_annot(train_path, val_path, val_annotation_path, output_base_path, extended_annot_base_path=None, num_per_class_in_train_val=50, seed=1234): os.makedirs(output_base_path, exist_ok=True) random_state = np.random.RandomState(seed=seed) classes, class_to_idx = find_classes(train_path) train_samples = make_dataset(train_path, class_to_idx, IMG_EXTENSIONS) random_state.shuffle(train_samples) # 1. Train io.write_json( { "root": train_path, "samples": train_samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train.json"), ) # 2. New Train, TrainVal split if extended_annot_base_path is not None: annot_data = get_extended_annot_data(extended_annot_base_path) # 2a. Put samples that have annotations into class_dict, others into remaining_samples class_dict = {} remaining_samples = [] for path, class_idx in train_samples: if class_idx not in class_dict: class_dict[class_idx] = [] folder_id, class_id, file_id = split_path(path) assert folder_id == class_id if (class_id, file_id) in annot_data: class_dict[class_idx].append((path, class_idx)) else: remaining_samples.append((path, class_idx)) print(len(train_samples), len(remaining_samples)) # 2b. Construct splits train_val_samples = [] train_train_samples: list = remaining_samples.copy() for annot_class_samples in class_dict.values(): random_state.shuffle(annot_class_samples) train_val_samples += annot_class_samples[: num_per_class_in_train_val] train_train_samples += annot_class_samples[ num_per_class_in_train_val:] random_state.shuffle(train_val_samples) random_state.shuffle(train_train_samples) # 2c. Construct annot subsamples subsampled_annot_data = {} for path, _ in train_val_samples: _, class_id, file_id = split_path(path) subsampled_annot_data[ f"{class_id}_{file_id}"], metadata = export_bboxes.get_gt_boxes( ann_path=annot_data[(class_id, file_id)], category=class_id, break_ratio=False, html_lib="html.parser", ) print(len(subsampled_annot_data)) # 2d. Write all io.write_json( { "root": train_path, "samples": train_val_samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train_val.json"), ) io.write_json( { "root": train_path, "samples": train_train_samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train_train.json"), ) io.write_json( subsampled_annot_data, os.path.join(output_base_path, "train_val_bboxes.json"), ) # 3. Shuffled Train random_classes = np.random.randint(1000, size=len(train_samples)) shuffled_samples = [(path, int(c)) for (path, _), c in zip(train_samples, random_classes)] io.write_json( { "root": train_path, "samples": shuffled_samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "train_shuffle.json"), ) # Val classes, class_to_idx = find_classes(val_path) val_samples = make_dataset(val_path, class_to_idx, IMG_EXTENSIONS) io.write_json( { "root": val_path, "samples": val_samples, "classes": classes, "class_to_idx": class_to_idx, }, os.path.join(output_base_path, "val.json"), ) export_bboxes.get_annotations_and_write( data_path=val_path, annotation_path=val_annotation_path, break_ratio=False, output_path=os.path.join(output_base_path, "val_bboxes.json"), )
def main(args: RunConfiguration): data_loader = torch.utils.data.DataLoader( ImagePathDataset.from_path( config_path=args.val_json, transform=transforms.Compose([ transforms.Resize([224, 224] if args.break_ratio else 224), transforms.CenterCrop(224), transforms.ToTensor(), imagenet_utils.NORMALIZATION, ]), return_paths=True, ), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=False ) model = casme_load_model(args.casm_path) set_no_grad(model["classifier"]) os.makedirs(args.output_path, exist_ok=True) all_results_dict = { "cascading": {}, "independent": {}, } plot_path_dict = {"cascading": [], "independent": []} for batch_i, ((input_, target), paths) in enumerate(tqdm.tqdm( data_loader, desc="batch", total=args.num_batches)): if batch_i >= args.num_batches: break img = get_image_arr(input_)[args.plot_img_i] plot_base_path = os.path.join(args.output_path, "plot__{}__{}".format(batch_i, args.plot_img_i)) print(plot_base_path) input_ = input_.to(device) continuous, _, _, _, _, _ = get_masks_and_check_predictions(input_, target, model, no_sigmoid=True) gold_continuous = continuous scores = get_scores(gold_continuous, continuous, reduce=False) if args.do_plot: os.makedirs(plot_base_path, exist_ok=True) plot_file_name = "{}.png".format("normal") save_fig( img=img, mask=continuous[args.plot_img_i], title="normal", path=os.path.join(plot_base_path, plot_file_name), ) plot_path_dict["cascading"].append(plot_file_name) plot_path_dict["independent"].append(plot_file_name) # Record normal scores if "normal" not in all_results_dict["cascading"]: all_results_dict["cascading"]["normal"] = {} all_results_dict["independent"]["normal"] = {} add_scores(d=all_results_dict["cascading"]["normal"], new_d=scores) add_scores(d=all_results_dict["independent"]["normal"], new_d=scores) # Cascading for layer_i, layer_name in enumerate(tqdm.tqdm( cascading_parameter_randomization_generator(model["classifier"], depth=args.layer_depth), desc="cascading")): if layer_name not in all_results_dict["cascading"]: all_results_dict["cascading"][layer_name] = {} continuous, _, _, _, _, _ = get_masks_and_check_predictions(input_, target, model, no_sigmoid=True) add_scores( d=all_results_dict["cascading"][layer_name], new_d=get_scores(gold_continuous, continuous, reduce=False) ) if args.do_plot: plot_file_name = "cascading__{:02d}__.png".format(layer_i, layer_name) save_fig( img=img, mask=continuous[args.plot_img_i], title=layer_name, path=os.path.join(plot_base_path, plot_file_name), ) plot_path_dict["cascading"].append(plot_file_name) # Independent for layer_i, layer_name in enumerate(tqdm.tqdm( independent_parameter_randomization_generator(model["classifier"], depth=args.layer_depth), desc="independent")): if layer_name not in all_results_dict["independent"]: all_results_dict["independent"][layer_name] = {} continuous, binarized_mask, rectangular, is_correct, bboxes, classifier_outputs = \ get_masks_and_check_predictions(input_, target, model) add_scores( d=all_results_dict["independent"][layer_name], new_d=get_scores(gold_continuous, continuous, reduce=False) ) if args.do_plot: plot_file_name = "independent__{:02d}__.png".format(layer_i, layer_name) save_fig( img=img, mask=continuous[args.plot_img_i], title=layer_name, path=os.path.join(plot_base_path, plot_file_name), ) plot_path_dict["independent"].append(plot_file_name) write_html(plot_path_dict, plot_base_path) results = compile_results(all_results_dict) io.write_json(results, os.path.join(args.output_path, "scores.json"))