Esempio n. 1
0
def main():
    args = get_args()
    rng = np.random.RandomState(1223)

    # Get context
    from nnabla.ext_utils import get_extension_context
    logger.info("Running in %s" % args.context)
    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)
    iterations = []
    mean_iou = []
    model_dir = args.model_load_path
    for filename in os.listdir(model_dir):
        args.model_load_path = model_dir + filename
        miou = eval.validate(args)
        iterations.append(filename.split('.')[0])
        mean_iou.append(miou)

    for i in range(len(iterations)):
        iterations[i] = iterations[i].replace('param_', '')

    itr = list(map(int, iterations))

    # Plot Iterations Vs mIOU
    plt.axes([0, max(itr), 0.0, 1.0])
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy - mIOU')
    plt.scatter(itr, mean_iou)
    plt.show()

    print(iterations)
    print(mean_iou)
    with open('iterations.txt', 'w') as f:
        for item in iterations:
            f.write('%s\n' % item)
    with open('miou.txt', 'w') as f2:
        for item in mean_iou:
            f2.write('%s\n' % item)
Esempio n. 2
0
def train_kgatt(args: Args,
                kg_train: KnowledgeGraph,
                kg_test: KnowledgeGraph,
                kg_val: KnowledgeGraph,
                total_triplets=None):

    n_ent, n_rel = kg_train.n_ent, kg_train.n_rel

    if total_triplets is None:
        total_triplets = get_valid_triplets(kg_train, kg_test, kg_val)

    dataloader = DataLoader(kg_train,
                            batch_size=args.batch_size,
                            shuffle=False,
                            pin_memory=cuda.is_available())
    model = MultiHeadKGAtt(n_ent,
                           n_rel,
                           100,
                           200,
                           100,
                           args.num_heads,
                           device=args.device).to(args.device)
    params = model.parameters()
    if args.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=args.lr, eps=1e-3)
    elif args.optimizer == 'sgd':
        optimizer = SGD(model.parameters(), lr=args.lr)
    elif args.optimizer == 'adamw':
        optimizer = AdamW(model.parameters(), lr=args.lr, eps=1e-3)

    ent_embed, rel_embed = get_init_embed()
    ent_embed, rel_embed = ent_embed.to(args.device), rel_embed.to(args.device)

    loss = 0
    model.train()
    for epoch in range(args.n_epochs):

        losses = []
        ent_embeds = [0]
        rel_embeds = [0]

        for i, batch in enumerate(dataloader):

            triplets = torch.stack(batch)
            triplets, labels, nodes, edges = negative_sampling(
                triplets, n_ent, args.negative_rate)
            triplets, labels = triplets.to(args.device), labels.to(args.device)

            model.zero_grad()

            # start = time.time()
            model.train()
            ent_embed_, rel_embed_ = model(triplets, ent_embed, rel_embed,
                                           nodes, edges)
            loss = loss_func2(triplets,
                              args.negative_rate,
                              ent_embed_,
                              rel_embed_,
                              device=args.device)

            # loss.backward(retain_graph=True)
            loss.backward()
            optimizer.step()
            # print(f"Finished {time.time() - start}")

            losses.append(loss.item())

            ent_embeds[0] = ent_embed_
            rel_embeds[0] = rel_embed_

            # if i % 100 == 0:
            #     print(loss.item())
            # print(loss.item())

        loss = sum(losses) / (len(losses))
        print(f'Epoch {epoch} Loss: {loss}')
        # writer.add_scalar("Train Loss", loss, epoch)

        if epoch > 10:
            model.eval()
            validate(model, kg_val, total_triplets, 100, 'cuda')

    return loss
Esempio n. 3
0
def train_net(model: UNet3D,
              device,
              loss_fnc=DiceLoss(sigmoid_normalization=False),
              eval_criterion=MeanIoU(),
              epochs=5,
              batch_size=1,
              learning_rate=0.0002,
              val_percent=0.04,
              test_percent=0.1,
              name='U-Net',
              save_cp=True,
              tests=None):
    data_set = BasicDataset(dir_img, dir_mask, 'T1', device)
    train_loader, val_loader, test_loader = data_set.split_to_loaders(
        val_percent, test_percent, batch_size, test_files=tests)

    writer = SummaryWriter(comment=f'LR_{learning_rate}_BS_{batch_size}')
    global_step = 0
    logging.info(f'''Starting {name} training:
        Epochs:          {epochs}
        Batch size:      {batch_size}
        Learning rate:   {learning_rate}
        Training size:   {len(train_loader)}
        Validation size: {len(val_loader)}
        Testing size:    {len(test_loader)}
        Checkpoints:     {save_cp}
        Device:          {device.type}
    ''')

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=0.00001)
    losses = []
    val_scores = []

    for epoch in range(epochs):

        epoch_loss = 0
        for batch in train_loader:
            model.train()
            start_time = timeit.default_timer()

            img = batch['image']
            mask = batch['mask']

            masks_pred = model(img)

            loss = loss_fnc(masks_pred, mask)

            epoch_loss += loss.item()
            losses.append(loss.item())

            writer.add_scalar('Loss/train', loss.item(), global_step)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            global_step += 1
            elapsed = timeit.default_timer() - start_time
            logging.info(
                f'I: {global_step}, Loss: {loss.item()} in {elapsed} seconds')

            if global_step % (len(train_loader) // (5 * batch_size)) == 0:
                val_score = validate(model, val_loader, loss_fnc,
                                     eval_criterion)
                val_scores.append(val_score)

                writer.add_scalar('Validation/test', val_score, global_step)

        if save_cp:
            try:
                os.mkdir(dir_checkpoint)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(model.state_dict(),
                       dir_checkpoint + f'{name}_epoch{epoch + 1}.pth')
            logging.info(f'Epoch: {epoch + 1} Loss: {epoch_loss}')
            logging.info(f'Checkpoint {epoch + 1} saved !')
            plot_cost(losses, name='Loss' + str(epoch), model_name=name)
            plot_cost(val_scores,
                      name='Validation' + str(epoch),
                      model_name=name)

    writer.close()
Esempio n. 4
0
def train(loaders, dist, args):
    # use checkpoint model if given
    if args.m is None:
        checkpoint = torch.load(args.checkpoint)
        model_name = checkpoint['name']
        model = Model(model_name)
        model.load_state_dict(checkpoint['model_state_dict'])
    else:  # else init model
        model_name = args.m
        model = Model(model_name)

    # loss and device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    dist = torch.FloatTensor(dist).to(
        device
    )  # no epsilon needs to be added, each category has at least one sample
    if args.wl:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.CrossEntropyLoss(weight=1 / dist)

    data_parallel = False
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        data_parallel = True
    model.to(device)
    path_to_best_model = ""

    # learning rate
    optimizer = optimizer = optim.Adadelta(model.parameters(),
                                           lr=args.lr,
                                           rho=0.95,
                                           eps=1e-08)
    best_loss = sys.maxsize
    early_stop = False
    # epochs
    iternum = 1
    for epoch in range(args.epoch_num):
        epoch_loss = 0
        num_corrects = 0
        tbar = tqdm(loaders['train'])
        # iterate through images
        for i, (imgs, labels) in enumerate(tbar):
            model.train()
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            _, preds = torch.max(outputs, 1)

            num_corrects += torch.sum(preds == labels.data)
            loss = criterion(outputs, labels)

            epoch_loss += loss.item()
            loss.backward()
            optimizer.step()
            # current training accuracy of epoch
            epoch_acc = num_corrects.double() / ((i + 1) * args.batch_size)

            tbar.set_description(
                'Epoch: [{}/{}], Epoch_loss: {:.5f}, Epoch_acc: {:.5f}'.format(
                    epoch + 1, args.epoch_num, epoch_loss / (i + 1),
                    epoch_acc))
            # early stopping
            if iternum % args.num_iter_to_validate == 0:
                print("Validating model ...")
                if epoch > args.num_iter_to_validate:
                    print('Best validation loss: {}'.format(best_loss))
                val_loss, val_acc = validate(loaders['val'], model, device)
                # if we have the best model so far
                if val_loss < best_loss:
                    best_loss = val_loss
                    path_to_checkpoint = os.path.abspath(
                        os.path.join(args.checkpoint,
                                     f'model_{model_name}_epoch_{epoch}.pth'))
                    if path_to_best_model:
                        os.remove(path_to_best_model)
                    path_to_best_model = path_to_checkpoint
                    num_checks = 0
                    state_dict = model.module.state_dict(
                    ) if data_parallel else model.state_dict()
                    torch.save(
                        {
                            'model_state_dict': state_dict,
                            'model_name': model_name
                        }, path_to_checkpoint)
                else:  # else we increase patience, if patience reaches the limit we stop
                    num_checks += 1
                    if num_checks >= args.patience:
                        print("Early stopping ...")
                        early_stop = True
                print(
                    'Validation loss: {}\n Validation acc: {}'.format(
                        val_loss, val_acc),
                    'Number of checks: {}'.format(num_checks))
            if early_stop:
                break
            iternum += 1
    return model
Esempio n. 5
0
def train(encoder,
          decoder,
          train_loader,
          val_loader,
          optimizer,
          criterion,
          id2word,
          lr_scheduler=None,
          num_epochs=1,
          print_every=100,
          device='cpu',
          early_stop=False):
    """
    Function for training
    
    Inputs:
    - encoder, decoder
    - train_loader, val_loader: DataLoader for training set and validation set
    - optimizer: a torch.optim optimizer (e.g. torch.optim.Adam(...))
    - criterion: loss function (e.g. nn.CrossEntropyLoss())
    - id2word: id2word for target training set
    - lr_scheduler: learning rate scheduler (e.g. torch.optim.lr_scheduler.StepLR)
    - num_epochs
    - print_every
    - device: 'cpu' or 'cuda'
    """
    encoder.train()
    decoder.train()
    best_bleu = 0
    best_statedict = {
        'encoder': encoder.state_dict(),
        'decoder': decoder.state_dict()
    }
    for epoch in range(num_epochs):
        print('Epoch ', epoch + 1)
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device=device, dtype=torch.long)
            y = y.to(device=device, dtype=torch.long)
            enc_out, enc_hidden = encoder(x)
            dec_hidden = enc_hidden
            dec_input = y[:, 0]
            loss = 0
            optimizer.zero_grad()
            for t in range(1, y.size(1)):
                out, dec_hidden = decoder(dec_input, dec_hidden, enc_out)
                dec_input = y[:, t]
                loss += criterion(out.squeeze(1), y[:, t])
            loss.backward()
            optimizer.step()
            if i % print_every == 0:
                print('Iter %d, loss = %f' % (i, loss.item() / y.size(1)))
        if lr_scheduler != None:
            lr_scheduler.step()
        bleu = validate(val_loader, encoder, decoder, id2word, device)
        print('Validation BLEU score: %f\n' % bleu)
        if bleu > best_bleu:
            best_statedict = {
                'encoder': encoder.state_dict(),
                'decoder': decoder.state_dict()
            }
            best_bleu = bleu
        elif early_stop:
            print('=== BLEU begins to decrease, training exits ===')
            return best_statedict
    return best_statedict
Esempio n. 6
0
def train_net(model: UNet3D,
              epochs=5,
              learning_rate=0.0002,
              val_percent=0.1,
              test_percent=0.1,
              name='U-Net',
              tests=None,
              patch_size=16,
              testing_memory=False,
              mask_model=False):

    data_set = BrainDataset(dir_img,
                            'T1',
                            dir_mask,
                            stack_size=patch_size,
                            mask_net=mask_model)
    loader = BrainLoaders(data_set,
                          ratios=[val_percent, test_percent],
                          files=[None, tests])

    train_loader = loader.train_loader()
    val_loader = loader.validation_loader()
    test_loader = loader.test_loader()

    num_images = data_set.num_files()
    log_interval = len(train_loader) if num_images < 10 else len(
        data_set.slices) * (num_images // 10)
    global_step = 0
    logging.info(f'''Starting {name} training:
        Epochs:          {epochs}
        Learning rate:   {learning_rate}
        Training size:   {len(train_loader)} slices
        Validation size: {len(val_loader)} images
        Testing size:    {len(test_loader)} images
        Log Interval     {log_interval}
    ''')

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=0.00001)
    losses = []
    val_scores = {}
    for fnc in METRICS:
        val_scores[fnc] = []

    for epoch in range(epochs):

        epoch_loss = 0
        epoch_start_time = timeit.default_timer()
        log_start_time = timeit.default_timer()
        log_loss = RunningAverage()
        for batch in train_loader:
            model.train()

            img = batch['image']
            mask = batch['mask']

            masks_pred = model(img)

            loss = loss_fnc(masks_pred, mask)

            epoch_loss += loss.item()
            log_loss.update(loss.item(), n=1)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            if testing_memory:  # When testing patch sizes only one iteration is enough
                return

            global_step += 1
            if global_step % log_interval == 0:
                elapsed = timeit.default_timer() - log_start_time
                losses.append(log_loss.avg)
                logging.info(
                    f'I: {global_step}, Avg. Loss: {log_loss.avg} in {elapsed} seconds'
                )
                log_start_time = timeit.default_timer()
                log_loss = RunningAverage()

        scores = validate(model, loader, is_validation=True, loss_fnc=loss_fnc)
        for fnc in METRICS:
            val_scores[fnc].append(scores[fnc])

        make_dir(dir_checkpoint)
        torch.save(model.state_dict(),
                   dir_checkpoint + f'{name}_epoch{epoch + 1}.pth')
        elapsed = timeit.default_timer() - epoch_start_time
        logging.info(
            f'Epoch: {epoch + 1} Total Loss: {epoch_loss} in {elapsed} seconds'
        )
        logging.info(f'Checkpoint {epoch + 1} saved !')
        plot_cost(losses, name='Loss', model_name=name + str(epoch) + '_')
        for fnc in METRICS:
            plot_cost(val_scores[fnc],
                      name='Validation_' + type(fnc).__name__,
                      model_name=name + str(epoch) + '_')

    logging.info('Starting Testing')
    validate(model,
             loader,
             is_validation=False,
             loss_fnc=loss_fnc,
             quiet=False)
Esempio n. 7
0
File: data.py Progetto: yyht/PRS
    def _eval_model(self, model: Model, writer: SummaryWriter, step, t,
                    eval_title, results_dict):

        training = model.training
        model.eval()

        if t in self.config['schedule_simple']:
            t_idx = self.config['schedule_simple'].index(t)
        else:
            t_idx = len(self.config['schedule_simple']) - 1

        # for calculating total performance
        targets_total = []
        probs_total = []

        # Accuracy of each subset
        for order_i, t_i in enumerate(self.config['schedule_simple'][:t_idx +
                                                                     1]):
            subset_name = t_i
            last_id = self.config['schedule_simple'][
                -1]  # XXX should be -1. -2 for debugging.
            subset = self.subsets[t_i]
            data = DataLoader(
                subset,
                batch_size=self.config['eval_batch_size'],
                num_workers=self.config['eval_num_workers'],
                collate_fn=self.collate_fn,
            )

            # results is dict. {method: group_averagemeter_object}
            results, targets, probs = validate(subset_name, model, data,
                                               self.category_map, results_dict,
                                               last_id, self.split_cats_dict)

            targets_total.append(targets)
            probs_total.append(probs)

            if subset_name in results_dict:
                results_dict[subset_name].append(results)
            else:
                results_dict[subset_name] = [results]

            for metric in results.keys():
                results[metric].write_to_excel(
                    os.path.join(writer.logdir,
                                 'results_{}.xlsx'.format(metric)),
                    sheet_name='task {}'.format(subset_name),
                    column_name='task {}'.format(
                        self.config['schedule_simple'][t_idx]),
                    info='avg')

        # =================================================================================================================
        # calculate scores for trained tasks.
        prefix = 'tally_'  # prefix for tensorboard plotting and csv filename

        targets_total = torch.cat(targets_total, axis=0)
        probs_total = torch.cat(probs_total, axis=0)
        predicts_total = probs_total > 0.5  # BCE style predicts
        total_metric = ['CP', 'CR', 'CF1', 'OP', 'OR', 'OF1', 'mAP']
        results = dict()  # reset results

        CP, CR, CF1, OP, OR, OF1, mAP = (AverageMeter()
                                         for _ in range(len(total_metric)))

        ncats = targets_total.sum(axis=0)
        # ignore classes in future tasks
        cats_in_task_idx = ncats > 0
        cats_in_task_name = self.category_map[cats_in_task_idx].tolist()
        targets_total = targets_total
        probs_total = probs_total
        predicts_total = predicts_total

        # calculate score
        precision_pc = torch.mean(
            precision_score_per_class(targets_total[:, cats_in_task_idx],
                                      predicts_total[:, cats_in_task_idx],
                                      zero_division=0))
        recall_pc = torch.mean(
            recall_score_per_class(targets_total[:, cats_in_task_idx],
                                   predicts_total[:, cats_in_task_idx],
                                   zero_division=0))
        # CF1. note that CF1 is not a mean value of categories' f1_score
        f1_pc = ((2 * precision_pc * recall_pc) / (precision_pc + recall_pc)
                 ) if (precision_pc + recall_pc) > 0 else torch.tensor([0.])
        precision_oa = precision_score_overall(
            targets_total[:, cats_in_task_idx],
            predicts_total[:, cats_in_task_idx],
            zero_division=0)
        recall_oa = recall_score_overall(targets_total[:, cats_in_task_idx],
                                         predicts_total[:, cats_in_task_idx],
                                         zero_division=0)
        f1_oa = f1_score_overall(targets_total[:, cats_in_task_idx],
                                 predicts_total[:, cats_in_task_idx],
                                 zero_division=0)
        map_ = mean_average_precision(targets_total[:, cats_in_task_idx],
                                      probs_total[:, cats_in_task_idx])
        # save to AverageMeter
        CP.update(precision_pc.item())
        CR.update(recall_pc.item())
        CF1.update(f1_pc.item())
        OP.update(precision_oa.item())
        OR.update(recall_oa.item())
        OF1.update(f1_oa.item())
        mAP.update(map_.item())

        results[prefix + 'CP'] = CP
        results[prefix + 'CR'] = CR
        results[prefix + 'CF1'] = CF1
        results[prefix + 'OP'] = OP
        results[prefix + 'OR'] = OR
        results[prefix + 'OF1'] = OF1
        results[prefix + 'mAP'] = mAP

        # for reporting major, moderate, minor cateogory performances
        for report_name in self.split_cats_dict.keys():
            reporter = Group_AverageMeter()

            # get report category idxes
            all_cats = self.category_map.tolist()
            task_cats = set(cats_in_task_name)
            report_cats = task_cats & set(self.split_cats_dict[report_name])
            report_cats_idx = torch.tensor(
                [all_cats.index(cat) for cat in report_cats], dtype=torch.long)

            # CP, CR, CF1 performance of report_categories.
            _class_precision = precision_score_per_class(
                targets_total[:, report_cats_idx],
                predicts_total[:, report_cats_idx],
                zero_division=0)
            _class_recall = recall_score_per_class(
                targets_total[:, report_cats_idx],
                predicts_total[:, report_cats_idx],
                zero_division=0)
            _class_precision = torch.mean(_class_precision)
            _class_recall = torch.mean(_class_recall)
            # CF1 bias. note that CF1 is not a mean value of categories' f1_score
            _class_f1 = ((2*_class_precision*_class_recall)/(_class_precision+_class_recall)) \
                if (_class_precision+_class_recall)>0 else torch.tensor([0.])

            # OP, OR, OF1 performance of report_categories.
            _overall_precision = precision_score_overall(
                targets_total[:, report_cats_idx],
                predicts_total[:, report_cats_idx],
                zero_division=0)
            _overall_recall = recall_score_overall(
                targets_total[:, report_cats_idx],
                predicts_total[:, report_cats_idx],
                zero_division=0)
            _overall_f1 = f1_score_overall(targets_total[:, report_cats_idx],
                                           predicts_total[:, report_cats_idx],
                                           zero_division=0)

            # mAP performance of report_categories.
            _mAP = mean_average_precision(targets_total[:, report_cats_idx],
                                          probs_total[:, report_cats_idx])

            reporter.update(['CP'], [_class_precision.item()], [1])
            reporter.update(['CR'], [_class_recall.item()], [1])
            reporter.update(['CF1'], [_class_f1.item()], [1])
            reporter.update(['OP'], [_overall_precision.item()], [1])
            reporter.update(['OR'], [_overall_recall.item()], [1])
            reporter.update(['OF1'], [_overall_f1.item()], [1])
            reporter.update(['mAP'], [_mAP.item()], [1])

            reporter.total.reset()
            results[prefix + report_name] = reporter

        # write to tensorboard and csv.
        task_len = t_idx + 1
        for metric in results.keys():
            if not metric in [
                    prefix + 'CP', prefix + 'CR', prefix + 'OP', prefix + 'OR'
            ]:
                results[metric].write(
                    writer,
                    '%s/%s/%s/task_len(%d)' %
                    (metric, eval_title, self.name, task_len),
                    step,
                    info='avg')

            results[metric].write_to_excel(
                os.path.join(writer.logdir, 'results_{}.xlsx'.format(metric)),
                sheet_name=prefix,
                column_name='task {}'.format(
                    self.config['schedule_simple'][t_idx]),
                info='avg')

        # =================================================================================================================
        # print performances at the end
        if t_idx == len(self.config['schedule_simple']) - 1:
            src = writer.logdir
            csv_files = ['major', 'moderate', 'minor', 'OF1', 'CF1', 'mAP', \
                         prefix+'major', prefix+'moderate', prefix+'minor', prefix+'CF1', prefix+'OF1', prefix+'mAP', \
                         'forget']
            for csv_file in csv_files:
                try:
                    csv = pd.read_csv(os.path.join(
                        src, 'results_{}.csv'.format(csv_file)),
                                      index_col=0)

                    # print performance after training last task
                    pd.set_option('display.max_rows', None)
                    print(
                        colorful.bold_green(
                            '\n{:10} result'.format(csv_file)).styled_string)
                    print(csv.round(4).iloc[:, -1])

                    # save as txt
                    with open(os.path.join(src, 'summary.txt'),
                              'a') as summary_txt:
                        summary_txt.write('\n')
                        summary_txt.write('{:10} result\n'.format(csv_file))
                        summary_txt.write(csv.round(4).iloc[:, -1].to_string())
                        summary_txt.write('\n')

                except FileNotFoundError:
                    print("This excperiment doesn't have {} file!! continue.".
                          format(csv_file))
                    continue

        model.train(training)

        return results_dict