Ejemplo n.º 1
0
    def label(self, report_pairs, reports_path, extract_impression):
        """Label the provided report(s)."""
        # Load reports in place.
        loader = Loader(report_pairs, reports_path, extract_impression)
        loader.load()
        # Extract observation mentions in place.
        self.extractor.extract(loader.collection)
        # Classify mentions in place.
        self.classifier.classify(loader.collection)
        # output mentions/categories/negation/attributes
        attributes = self.aggregator.getAttributeOutput(loader.collection)
        # # Aggregate mentions to obtain one set of labels for each report.
        # labels = aggregator.aggregate(loader.collection)

        return loader.reports, attributes
Ejemplo n.º 2
0
def main(output_dir, model_name, batch_size, num_workers, augment_epoch,
         unaugment_epoch, device, label_type, confidence, num_classes,
         epochs_per_save, teacher_noise, data_config, model_config):
    assert label_type in ["soft", "hard"]
    # setup for directory and log
    output_dir = os.path.join(output_dir, model_name)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
        os.chmod(output_dir, 0o775)

    if hasattr(print, "set_log_dir"):
        print.set_log_dir(output_dir)

    print(f"==========Settings==========")
    print(f"batch size:      {batch_size}")
    print(f"augment epoch:   {augment_epoch}")
    print(f"unaugment epoch: {unaugment_epoch}")
    print(f"label type:      {label_type}")
    print(f"confidence:      {confidence}")
    print(f"models:          {model_config['models']}")
    print(f"epochs:          {model_config['epochs']}")
    print(f"ratio:           {model_config['ratio']}")
    print(f"learning rate:   {model_config['learning_rate']}")
    print(f"lr_decay_rate:   {model_config['lr_decay_rate']}")
    print(f"lr_decay_epoch:  {model_config['lr_decay_epoch']}")
    print(f"N, M:            {data_config['N']}, {data_config['M']}")
    print(f"teacher noise:   {teacher_noise}")
    print(f"dropout_prob:    {model_config['dropout_prob']}")
    print(f"stoc_depth_prob: {model_config['stochastic_depth_prob']}")
    print("============================")

    logger = Logger(os.path.join(output_dir, 'logs'))

    # dataset, dataloader

    dataset = load_dataset(**data_config)
    dataloaders = {}
    dataloaders['train_unaugmented'] = DataLoader(dataset['train_unaugmented'],
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=num_workers)
    dataloaders['train_augmented'] = DataLoader(dataset['train_augmented'],
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=num_workers)
    dataloaders['test'] = DataLoader(dataset['test'],
                                     batch_size=batch_size,
                                     shuffle=True,
                                     num_workers=num_workers)

    # load model
    loader = Loader(**model_config, device=device, num_classes=num_classes)
    (teacher, student, optimizer, lr_scheduler, start_epoch, end_epoch, phase,
     best_acc, best_epoch, best_model) = loader.load(0)
    model_name = model_config["models"][phase]
    ratio = model_config["ratio"][phase]
    save_path = os.path.join(output_dir, f"{model_name}.pt")
    logger.set_model_name(model_name)

    ############################
    ###### Training phase ######
    ############################
    if teacher is None:
        dataloaders['train'] = dataloaders[
            'train_augmented'] if teacher_noise else dataloaders[
                'train_unaugmented']
    else:
        dataloaders['train'] = NS_DataLoader(dataloaders, dataset, teacher,
                                             device, label_type, confidence,
                                             ratio, num_workers, batch_size,
                                             num_classes, print)

    while True:
        teacher = train_model(student, optimizer, lr_scheduler, dataloaders,
                              start_epoch, end_epoch, best_acc, best_epoch,
                              best_model, device, logger, phase,
                              epochs_per_save, save_path, augment_epoch,
                              unaugment_epoch)
        phase += 1
        if phase < len(model_config["models"]):
            (_, student, optimizer, lr_scheduler, start_epoch, end_epoch, _,
             best_acc, best_epoch, best_model) = loader.load(phase)
            model_name = model_config["models"][phase]
            ratio = model_config["ratio"][phase]

            logger.set_model_name(model_name)
            save_path = os.path.join(output_dir, f"{model_name}.pt")

            dataloaders['train'] = NS_DataLoader(dataloaders, dataset, teacher,
                                                 device, label_type,
                                                 confidence, ratio,
                                                 num_workers, batch_size,
                                                 num_classes, print)
            del teacher
        else:
            break