def label(self, report_pairs, reports_path, extract_impression): """Label the provided report(s).""" # Load reports in place. loader = Loader(report_pairs, reports_path, extract_impression) loader.load() # Extract observation mentions in place. self.extractor.extract(loader.collection) # Classify mentions in place. self.classifier.classify(loader.collection) # output mentions/categories/negation/attributes attributes = self.aggregator.getAttributeOutput(loader.collection) # # Aggregate mentions to obtain one set of labels for each report. # labels = aggregator.aggregate(loader.collection) return loader.reports, attributes
def main(output_dir, model_name, batch_size, num_workers, augment_epoch, unaugment_epoch, device, label_type, confidence, num_classes, epochs_per_save, teacher_noise, data_config, model_config): assert label_type in ["soft", "hard"] # setup for directory and log output_dir = os.path.join(output_dir, model_name) if not os.path.isdir(output_dir): os.makedirs(output_dir) os.chmod(output_dir, 0o775) if hasattr(print, "set_log_dir"): print.set_log_dir(output_dir) print(f"==========Settings==========") print(f"batch size: {batch_size}") print(f"augment epoch: {augment_epoch}") print(f"unaugment epoch: {unaugment_epoch}") print(f"label type: {label_type}") print(f"confidence: {confidence}") print(f"models: {model_config['models']}") print(f"epochs: {model_config['epochs']}") print(f"ratio: {model_config['ratio']}") print(f"learning rate: {model_config['learning_rate']}") print(f"lr_decay_rate: {model_config['lr_decay_rate']}") print(f"lr_decay_epoch: {model_config['lr_decay_epoch']}") print(f"N, M: {data_config['N']}, {data_config['M']}") print(f"teacher noise: {teacher_noise}") print(f"dropout_prob: {model_config['dropout_prob']}") print(f"stoc_depth_prob: {model_config['stochastic_depth_prob']}") print("============================") logger = Logger(os.path.join(output_dir, 'logs')) # dataset, dataloader dataset = load_dataset(**data_config) dataloaders = {} dataloaders['train_unaugmented'] = DataLoader(dataset['train_unaugmented'], batch_size=batch_size, shuffle=True, num_workers=num_workers) dataloaders['train_augmented'] = DataLoader(dataset['train_augmented'], batch_size=batch_size, shuffle=True, num_workers=num_workers) dataloaders['test'] = DataLoader(dataset['test'], batch_size=batch_size, shuffle=True, num_workers=num_workers) # load model loader = Loader(**model_config, device=device, num_classes=num_classes) (teacher, student, optimizer, lr_scheduler, start_epoch, end_epoch, phase, best_acc, best_epoch, best_model) = loader.load(0) model_name = model_config["models"][phase] ratio = model_config["ratio"][phase] save_path = os.path.join(output_dir, f"{model_name}.pt") logger.set_model_name(model_name) ############################ ###### Training phase ###### ############################ if teacher is None: dataloaders['train'] = dataloaders[ 'train_augmented'] if teacher_noise else dataloaders[ 'train_unaugmented'] else: dataloaders['train'] = NS_DataLoader(dataloaders, dataset, teacher, device, label_type, confidence, ratio, num_workers, batch_size, num_classes, print) while True: teacher = train_model(student, optimizer, lr_scheduler, dataloaders, start_epoch, end_epoch, best_acc, best_epoch, best_model, device, logger, phase, epochs_per_save, save_path, augment_epoch, unaugment_epoch) phase += 1 if phase < len(model_config["models"]): (_, student, optimizer, lr_scheduler, start_epoch, end_epoch, _, best_acc, best_epoch, best_model) = loader.load(phase) model_name = model_config["models"][phase] ratio = model_config["ratio"][phase] logger.set_model_name(model_name) save_path = os.path.join(output_dir, f"{model_name}.pt") dataloaders['train'] = NS_DataLoader(dataloaders, dataset, teacher, device, label_type, confidence, ratio, num_workers, batch_size, num_classes, print) del teacher else: break