Beispiel #1
0
    def eval_final():
        """ Evaluated model on test set in an extended way: computes estimates over multiple samples of point clouds and stores predictions """
        model.eval()

        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(dbinfo['classes'])
        collected, predictions = defaultdict(list), {}

        # collect predictions over multiple sampling seeds
        for ss in range(args.test_multisamp_n):
            test_dataset_ss = create_dataset(args, ss)[1]
            loader = torch.utils.data.DataLoader(test_dataset_ss,
                                                 batch_size=1,
                                                 collate_fn=spg.eccpc_collate,
                                                 num_workers=args.nworkers)
            if logging.getLogger().getEffectiveLevel() > logging.DEBUG:
                loader = tqdm(loader, ncols=100)

            # iterate over dataset in batches
            for bidx, (targets, GIs, clouds_data) in enumerate(loader):
                model.ecc.set_info(GIs, args.cuda)
                label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,
                                                                       0], targets[:, 2:], targets[:, 1:].sum(
                                                                           1
                                                                       ).float(
                                                                       )

                embeddings = ptnCloudEmbedder.run(model, *clouds_data)
                outputs = model.ecc(embeddings)

                fname = clouds_data[0][0][:clouds_data[0][0].rfind('.')]
                collected[fname].append(
                    (outputs.data.cpu().numpy(), label_mode_cpu.numpy(),
                     label_vec_cpu.numpy()))

        # aggregate predictions (mean)
        for fname, lst in collected.items():
            o_cpu, t_cpu, tvec_cpu = list(zip(*lst))
            if args.test_multisamp_n > 1:
                o_cpu = np.mean(np.stack(o_cpu, 0), 0)
            else:
                o_cpu = o_cpu[0]
            t_cpu, tvec_cpu = t_cpu[0], tvec_cpu[0]
            predictions[fname] = np.argmax(o_cpu, 1)
            o_cpu, t_cpu, tvec_cpu = filter_valid(o_cpu, t_cpu, tvec_cpu)
            if t_cpu.size > 0:
                acc_meter.add(o_cpu, t_cpu)
                #Changed by Arthur#
                # *** WARNING: confusion matrix is commented for the ONERD because it doesn't have any label
                # confusion_matrix.count_predicted_batch(tvec_cpu, np.argmax(o_cpu,1))

        per_class_iou = {}
        perclsiou = confusion_matrix.get_intersection_union_per_class()
        for c, name in dbinfo['inv_class_map'].items():
            per_class_iou[name] = perclsiou[c]

        return meter_value(acc_meter), confusion_matrix.get_overall_accuracy(
        ), confusion_matrix.get_average_intersection_union(
        ), per_class_iou, predictions, confusion_matrix.get_mean_class_accuracy(
        ), confusion_matrix.confusion_matrix
Beispiel #2
0
    def train():
        """ Trains for one epoch """
        model.train()

        loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=spg.eccpc_collate, num_workers=args.nworkers, shuffle=True, drop_last=True)
        if logging.getLogger().getEffectiveLevel() > logging.DEBUG: loader = tqdm(loader, ncols=100)

        loss_meter = tnt.meter.AverageValueMeter()
        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(dbinfo['classes'])
        t0 = time.time()

        # iterate over dataset in batches
        for bidx, (targets, GIs, clouds_data) in enumerate(loader):
            t_loader = 1000*(time.time()-t0)

            model.ecc.set_info(GIs, args.cuda)
            label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,0], targets[:,2:], targets[:,1:].sum(1)
            if args.cuda:
                label_mode, label_vec, segm_size = label_mode_cpu.cuda(), label_vec_cpu.float().cuda(), segm_size_cpu.float().cuda()

            else:
                label_mode, label_vec, segm_size = label_mode_cpu, label_vec_cpu.float(), segm_size_cpu.float()

            optimizer.zero_grad()
            t0 = time.time()

            embeddings = ptnCloudEmbedder.run(model, *clouds_data)
            outputs = model.ecc(embeddings)

            loss = nn.functional.cross_entropy(outputs, Variable(label_mode))
            loss.backward()
            ptnCloudEmbedder.bw_hook()

            if args.grad_clip>0:
                for p in model.parameters():
                    p.grad.data.clamp_(-args.grad_clip, args.grad_clip)
            optimizer.step()

            t_trainer = 1000*(time.time()-t0)
            #loss_meter.add(loss.data[0]) # pytorch 0.3
            loss_meter.add(loss.item()) # pytorch 0.4

            o_cpu, t_cpu, tvec_cpu = filter_valid(outputs.data.cpu().numpy(), label_mode_cpu.numpy(), label_vec_cpu.numpy())
            acc_meter.add(o_cpu, t_cpu)
            confusion_matrix.count_predicted_batch(tvec_cpu, np.argmax(o_cpu,1))

            logging.debug('Batch loss %f, Loader time %f ms, Trainer time %f ms.', loss.data[0], t_loader, t_trainer)
            t0 = time.time()

        return acc_meter.value()[0], loss_meter.value()[0], confusion_matrix.get_overall_accuracy(), confusion_matrix.get_average_intersection_union()
Beispiel #3
0
 def eval(epoch):
     start = timer()
     """ Evaluated model on test set """
     model.eval()
 
     loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, collate_fn=spg.eccpc_collate, num_workers=args.nworkers)
     if logging.getLogger().getEffectiveLevel() > logging.DEBUG: loader = tqdm(loader, ncols=100)
 
     acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
     acc_meter2 = tnt.meter.ClassErrorMeter(accuracy=True)
     confusion_matrix = metrics.ConfusionMatrix(5)
 
     # iterate over dataset in batches
     for bidx, (targets, GIs, clouds_data) in enumerate(loader):
         model.ecc.set_info(GIs, args.cuda)
         label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,0], targets[:,2:], targets[:,1:].sum(1).float()
 
         embeddings = ptnCloudEmbedder.run(model, *clouds_data[0:4])
         outputs = model.ecc(embeddings)
         mixed_node=np.where(np.argmax(np.array(outputs.data),axis=1)==5)
         Nomixed_node=np.where(np.argmax(np.array(outputs.data),axis=1)!=5)
         if epoch>args.fine_seg_epo and len(mixed_node[0])>0:
             for i in range(len(mixed_node[0])):
                 fname='/home/data2/qc/large_scalepcss/learning/datasets/tanker/parsed/'+clouds_data[0][mixed_node[0][i]].split('.')[0] + '.h5'
                 fname_G=clouds_data[0][mixed_node[0][i]].split('.')[1]
                 hf = h5py.File(fname,'r')
                 P = hf[fname_G]
                 if np.shape(P)[0]>args.fine_seg_point_num:
                     tempt1 = np.array(P[:,:13]).T
                     tempt = torch.from_numpy(tempt1.reshape((1,np.shape(tempt1)[0],np.shape(tempt1)[1])))
                     label_modebran = torch.from_numpy(P[:,-1]).long()
                     if args.cuda:
                         tempt = tempt.cuda()
                         label_modebran = label_modebran.cuda()
                     outputsbran = model.FineModule(Variable(tempt.float(), requires_grad=model.training, volatile=not model.training))
                     _outputsbran,_label_modebran=filter_valid(outputsbran.data.cpu().numpy(),label_modebran.cpu().numpy())
                     acc_meter2.add(_outputsbran,_label_modebran)
                     confusion_matrix.count_predicted_batch_branch(_outputsbran, _label_modebran)
                     
         o_cpu, t_cpu, tvec_cpu = filter_valid(outputs.data.cpu().numpy(), label_mode_cpu.numpy(), label_vec_cpu.numpy())
         
         if t_cpu.size > 0:
             if epoch>10 & len(mixed_node[0])>0:
                 acc_meter.add(o_cpu[Nomixed_node[0],:], t_cpu[[Nomixed_node[0]]])
             else:
                 acc_meter.add(o_cpu, t_cpu)
             confusion_matrix.count_predicted_batch(tvec_cpu[[Nomixed_node[0]]], np.argmax(o_cpu[Nomixed_node[0],:],1))
     end = timer()
 
     return meter_value(acc_meter), meter_value(acc_meter2),confusion_matrix.get_overall_accuracy(), confusion_matrix.get_average_intersection_union(), confusion_matrix.get_mean_class_accuracy(),confusion_matrix.get_confusion_matrix(), str(end - start)
Beispiel #4
0
def learningPhase(args, model,loader, w, optimizer, metrics):
    #training function for one epoch of given loader
    model.train() # model in training mode
    
    #initialize metric container
    loss_meter = tnt.meter.AverageValueMeter()
    cm = met.ConfusionMatrix(len(args.c), args.c,args.nodata)
    
    #loop through batch given by data reader, unfold tuple and drop file name (the last param)
    for batch_ndx, (imgs,gt,__) in enumerate(tqdm(loader)):
        optimizer.zero_grad() #put gradient to zero
        
        #if GPU, load batch on it
        if args.cuda :
            batch_tensor = imgs.cuda()
        else :
            batch_tensor=imgs
        
        #generate prediction
        prediction = model(batch_tensor)
        prediction = prediction.cpu() #switch it on CPU for calculation
        
        #get & save batch loss and do backward
        loss = nn.functional.cross_entropy(prediction,gt,weight=w, ignore_index=args.nodata)
        loss.backward()
        loss_meter.add(loss.item())
        
        #clamp gradient to avoid some weight get high gradient actualization
        for p in model.parameters():
            p.grad.data.clamp(-1,1)
        
        #actualize weight
        optimizer.step()
        
        #calculate metrics if epoch_number % args.mem=0
        if metrics:
            for i in range(prediction.size()[0]):
                pred=prediction[i].argmax(0).squeeze()
                cm.add_batch(gt[i].numpy(), pred.numpy())
        
        #free memory
        del imgs
        del gt
        del batch_tensor
        del prediction
    
    return cm, loss_meter.value()[0]
Beispiel #5
0
    def eval():
        """ Evaluated model on test set """
        model.eval()

        loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=1,
                                             collate_fn=spg.eccpc_collate_test,
                                             num_workers=args.nworkers,
                                             drop_last=False)

        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(
            dbinfo['classes'], ignore_label=args.metric_ignore_class)
        test_time = time.time()
        # iterate over dataset in batches
        for bidx, (targets, GIs, clouds_data, clouds_orig, edges_for_ext,
                   fnames) in enumerate(loader):
            model.ecc.set_info(GIs, args.cuda)
            # label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,0], targets[:,2:], targets[:,1:].sum(1).float()
            weak_label_mode_cpu, label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,
                                                                                        0], targets[:,
                                                                                                    1], targets[:,
                                                                                                                2:], targets[:, 2:].sum(
                                                                                                                    1
                                                                                                                )

            embeddings = ptnCloudEmbedder.run(model, *clouds_data)
            outputs, _ = model.ecc(embeddings)

            o_cpu, t_cpu, tvec_cpu = filter_valid(outputs.data.cpu().numpy(),
                                                  label_mode_cpu.numpy(),
                                                  label_vec_cpu.numpy())
            if t_cpu.size > 0:
                acc_meter.add(o_cpu, t_cpu)
                confusion_matrix.count_predicted_batch(tvec_cpu,
                                                       np.argmax(o_cpu, 1))
                print('{}/{}-{} outputs: {}, acc: {}, macc: {}'.format(
                    bidx, len(loader), fnames[0], outputs.shape,
                    confusion_matrix.get_overall_accuracy(),
                    confusion_matrix.get_mean_class_accuracy()))

        return meter_value(acc_meter), confusion_matrix.get_overall_accuracy(
        ), confusion_matrix.get_mean_class_accuracy(
        ), confusion_matrix.get_average_intersection_union(
        ), time.time() - test_time
Beispiel #6
0
def evaluate(test_dataset):
    confusion_matrix = metrics.ConfusionMatrix(model_config.n_classes)

    for (batch, (speaker, utterance, emotion)) in enumerate(test_dataset):
        speaker = tf.squeeze(speaker)  # (batch_size, dial_len)
        emotion = tf.squeeze(emotion)  # (batch_size, dial_len)

        mask = tf.cast(tf.math.not_equal(utterance, 0), dtype=tf.float32)

        utterance = encode_utterance(utterance)

        predictions = model(utterance, False,
                            mask)  # (batch_size, dial_len, n_classes)

        sample_weight = tf.math.not_equal(tf.math.reduce_sum(mask, axis=2), 0)
        sample_weight = tf.cast(sample_weight, dtype=tf.float32)
        pred_emotion = tf.math.argmax(predictions, axis=2)

        confusion_matrix(emotion, pred_emotion, sample_weight=sample_weight)

    return metrics.classification_report(confusion_matrix)
Beispiel #7
0
    def eval():
        """ Evaluated model on test set """
        model.eval()

        loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=1,
                                             collate_fn=spg.eccpc_collate,
                                             num_workers=args.nworkers)
        if logging.getLogger().getEffectiveLevel() > logging.DEBUG:
            loader = tqdm(loader, ncols=100)

        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(dbinfo['classes'])

        # iterate over dataset in batches
        for bidx, (targets, GIs, clouds_data) in enumerate(loader):
            model.ecc.set_info(GIs, args.cuda)
            label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,
                                                                   0], targets[:,
                                                                               2:], targets[:, 1:].sum(
                                                                                   1
                                                                               ).float(
                                                                               )

            embeddings = ptnCloudEmbedder.run(model, *clouds_data)
            outputs = model.ecc(embeddings)

            o_cpu, t_cpu, tvec_cpu = filter_valid(outputs.data.cpu().numpy(),
                                                  label_mode_cpu.numpy(),
                                                  label_vec_cpu.numpy())
            if t_cpu.size > 0:
                acc_meter.add(o_cpu, t_cpu)
                confusion_matrix.count_predicted_batch(tvec_cpu,
                                                       np.argmax(o_cpu, 1))

        return meter_value(acc_meter), confusion_matrix.get_overall_accuracy(
        ), confusion_matrix.get_average_intersection_union(
        ), confusion_matrix.get_mean_class_accuracy()
Beispiel #8
0
def evalutionPhase(args, model,loader,w):
    #evaluation function for one epoch of given loader
    model.eval() # model in training mode
    
    #initialize metric container
    loss_meter = tnt.meter.AverageValueMeter()
    cm = met.ConfusionMatrix(len(args.c), args.c,args.nodata)
    
    #loop through batch given by data reader, unfold tuple and drop file name (the last param)
    for batch_ndx, (imgs,gt,__) in enumerate(tqdm(loader)):
        
        #if GPU, load batch on it
        if args.cuda :
            batch_tensor = imgs.cuda()
        else :
            batch_tensor=imgs
        
        #generate prediction
        prediction = model(batch_tensor)
        prediction = prediction.cpu() #switch it on CPU for calculation
        
        #get & save batch loss
        loss = nn.functional.cross_entropy(prediction,gt,weight=w, ignore_index=args.nodata)
        loss_meter.add(loss.item())
        
        #calculate metrics
        for i in range(prediction.size()[0]):
            pred=prediction[i].argmax(0).squeeze()
            cm.add_batch(gt[i].numpy(), pred.numpy())
        
        #free memory
        del imgs
        del gt
        del batch_tensor
        del prediction
    
    return cm, loss_meter.value()[0]
Beispiel #9
0
def train(config, model_dir, writer):
    """
    Function train and evaluate a part segmentation model for
    the Shapenet dataset. The training parameters are specified
    in the config file (for more details see config/config.py).

    :param config: Dictionary with configuration paramters
    :param model_dir: Checkpoint save directory
    :param writer: Tensorboard SummaryWritter object
    """
    phases = ['train', 'test']
    # phases = ['test', 'train']
    datasets, dataloaders, num_classes = ds.get_s3dis_dataloaders(
        root_dir=config['root_dir'],
        phases=phases,
        batch_size=config['batch_size'],
        category=config['category'],
        augment=config['augment'])

    # add number of classes to config
    config['num_classes'] = num_classes

    # we now set GPU training parameters
    # if the given index is not available then we use index 0
    # also when using multi gpu we should specify index 0
    if config['gpu_index'] + 1 > torch.cuda.device_count(
    ) or config['multi_gpu']:
        config['gpu_index'] = 0
    logging.info('Using GPU cuda:{}, script PID {}'.format(
        config['gpu_index'], os.getpid()))
    if config['multi_gpu']:
        logging.info('Training on multi-GPU mode with {} devices'.format(
            torch.cuda.device_count()))
    device = torch.device('cuda:{}'.format(config['gpu_index']))

    # we load the model defined in the config file
    # todo: now the code is IO bound. No matter which network we use, it is similar speed.
    model = res.sfc_resnet_8(in_channels=config['in_channels'],
                             num_classes=config['num_classes'],
                             kernel_size=config['kernel_size'],
                             channels=config['channels'],
                             use_tnet=config['use_tnet'],
                             n_points=config['n_points']).to(device)
    logging.info('the number of params is {: .2f} M'.format(
        utl.count_model_params(model) / (1e6)))
    # if use multi_gpu then convert the model to DataParallel
    if config['multi_gpu']:
        model = nn.DataParallel(model)

    # create optimizer, loss function, and lr scheduler
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config['lr'],
                                 weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss().to(device)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=config['lr_decay'],
        patience=config['lr_patience'],
        verbose=True)  # verbose. recommended to use.

    logging.info('Config {}'.format(config))
    logging.info(
        'TB logs and checkpoint will be saved in {}'.format(model_dir))

    utl.dump_config_details_to_tensorboard(writer, config)

    # create metric trackers: we track lass, class accuracy, and overall accuracy
    trackers = {
        x: {
            'loss': metrics.LossMean(),
            'cm':
            metrics.ConfusionMatrix(num_classes=int(config['num_classes']))
        }
        for x in phases
    }

    # create initial best state object
    best_state = {
        'config': config,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict() if scheduler else None,
        'train_loss': float('inf'),
        'test_loss': float('inf'),
        'train_mIoU': 0.0,
        'test_mIoU': 0.0,
        'convergence_epoch': 0,
        'num_epochs_since_best_acc': 0
    }

    # now we train!
    for epoch in range(config['max_epochs']):
        for phase in phases:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            # reset metrics
            trackers[phase]['loss'].reset()
            trackers[phase]['cm'].reset()

            # use tqdm to show progress and print message
            # this is for loadding our new data format
            for step_number, batchdata in enumerate(
                    tqdm(dataloaders[phase],
                         desc='[{}/{}] {} '.format(epoch + 1,
                                                   config['max_epochs'],
                                                   phase))):
                data = torch.cat((batchdata.pos, batchdata.x),
                                 dim=2).transpose(1, 2).to(device,
                                                           dtype=torch.float)
                label = batchdata.y.to(device, dtype=torch.long)
                # should we release the memory?
                # todo: add data augmentation

                # compute gradients on train only
                with torch.set_grad_enabled(phase == 'train'):
                    out = model(data)
                    loss = criterion(out, label)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # now we update metrics
                trackers[phase]['loss'].update(average_loss=loss,
                                               batch_size=data.size(0))
                trackers[phase]['cm'].update(y_true=label, y_logits=out)

            # compare with my metrics
            epoch_loss = trackers[phase]['loss'].result()
            epoch_iou = trackers[phase]['cm'].result(metric='iou').mean()

            # we update our learning rate scheduler if loss does not improve
            if phase == 'train' and scheduler:
                scheduler.step(epoch_loss)
                writer.add_scalar('params/lr', optimizer.param_groups[0]['lr'],
                                  epoch + 1)

            # log current results and dump in Tensorboard
            logging.info(
                '[{}/{}] {} Loss: {:.2e}. mIOU {:.4f} \t best testing mIOU {:.4f}'
                .format(epoch + 1, config['max_epochs'], phase, epoch_loss,
                        epoch_iou, best_state['test_mIoU']))

            writer.add_scalar('loss/epoch_{}'.format(phase), epoch_loss,
                              epoch + 1)
            writer.add_scalar('mIoU/epoch_{}'.format(phase), epoch_iou,
                              epoch + 1)

        # after each epoch we update best state values as needed
        # first we save our state when we get better test accuracy
        test_iou = trackers['test']['cm'].result(metric='iou').mean()
        if best_state['test_mIoU'] > test_iou:
            best_state['num_epochs_since_best_acc'] += 1
        else:
            logging.info(
                'Got a new best model with iou {:.4f}'.format(test_iou))
            best_state = {
                'config': config,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict() if scheduler else None,
                'train_loss': trackers['train']['loss'].result(),
                'test_loss': trackers['test']['loss'].result(),
                'train_mIoU':
                trackers['train']['cm'].result(metric='iou').mean(),
                'test_mIoU': test_iou,
                'convergence_epoch': epoch + 1,
                'num_epochs_since_best_acc': 0
            }

            file_name = os.path.join(model_dir, 'best_state.pth')
            torch.save(best_state, file_name)
            logging.info('saved checkpoint in {}'.format(file_name))

        # we check for early stopping when we have trained a min number of epochs
        if epoch >= config['min_epochs'] and best_state[
                'num_epochs_since_best_acc'] >= config['early_stopping']:
            logging.info('Accuracy did not improve for {} iterations!'.format(
                config['early_stopping']))
            logging.info('[Early stopping]')
            break

    utl.dump_best_model_metrics_to_tensorboard(writer, phases, best_state)

    logging.info('************************** DONE **************************')
    # pred.shape == (batch_size, dial_len, n_classes)
    # mask.shape == (batch_size, dial_len, sent_len)

    sample_weight = tf.gather(train_config.loss_weights,
                              real)  # (batch_size, dial_len)
    loss = loss_object(real, pred,
                       sample_weight=sample_weight)  # (batch_size, dial_len)
    mask = tf.cast(tf.math.not_equal(tf.math.reduce_sum(mask, -1), 0),
                   dtype=loss.dtype)  # (batch_size, dial_len)
    loss *= mask
    return tf.math.reduce_sum(loss) / tf.math.reduce_sum(mask)


train_loss = tf.keras.metrics.Mean(name='train_loss')
# train_accuracy = tf.keras.metrics.Accuracy(name='train_accuracy')
train_confusion_matrix = metrics.ConfusionMatrix(model_config.n_classes)


def train_step(speaker, utterance, emotion):
    # speaker.shape == (batch_size, 1, dial_len)
    # emotion.shape == (batch_size, 1, dial_len)
    # utterance.shape == (batch_size, dial_len, sent_len)

    speaker = tf.squeeze(speaker)  # (batch_size, dial_len)
    emotion = tf.squeeze(emotion)  # (batch_size, dial_len)

    mask = tf.cast(tf.math.not_equal(utterance, 0), dtype=tf.float32)

    with tf.GradientTape() as tape:
        predictions = model(utterance, True,
                            mask)  # (batch_size, dial_len, n_classes)
Beispiel #11
0
if __name__ == '__main__':
    cfg.update_from_file("testunet.yaml")

    cfg.TRAINER_ID = int(os.getenv("PADDLE_TRAINER_ID", 0))
    cfg.NUM_TRAINERS = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))

    cfg.check_and_infer()
    # print(pprint.pformat(cfg))

    dataset = myDataset.SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                                   shuffle=True,
                                   mode=ModelPhase.TRAIN,
                                   data_dir=cfg.DATASET.DATA_DIR)

    conf_mat = metrics.ConfusionMatrix(cfg.DATASET.NUM_CLASSES,
                                       streaming=False)
    i = 0
    for img, grt, ignore, imgssrc in dataset.generator():

        conf_mat.calculate(grt[np.newaxis, :, :,
                               np.newaxis], grt[np.newaxis, :, :, np.newaxis],
                           ignore[np.newaxis, :, :, np.newaxis])
        _, iou = conf_mat.mean_iou()
        _, acc = conf_mat.accuracy()
        print(iou, acc)
        if i > 20:
            break
        i += 1

#%%
ignore.shape
Beispiel #12
0
def train(dataset, model_dir, writer):
    dataloaders = dataset.get_dataloaders()

    # we now set GPU training parameters
    # if the given index is not available then we use index 0
    # also when using multi gpu we should specify index 0
    if dataset.config.gpu_index + 1 > torch.cuda.device_count(
    ) or dataset.config.multi_gpu:
        dataset.config.gpu_index = 0

    logging.info('Using GPU cuda:{}, script PID {}'.format(
        dataset.config.gpu_index, os.getpid()))
    if dataset.config.multi_gpu:
        logging.info('Training on multi-GPU mode with {} devices'.format(
            torch.cuda.device_count()))
    device = torch.device('cuda:{}'.format(dataset.config.gpu_index))

    if dataset.config.model == 'unet':
        model = unet(input_size=dataset.config.num_feats,
                     num_classes=dataset.config.num_classes,
                     kernel_size=dataset.config.kernel_size).to(device)
    else:
        model = deeplab(backbone=dataset.config.backbone,
                        input_size=dataset.config.num_feats,
                        num_classes=dataset.config.num_classes,
                        kernel_size=dataset.config.kernel_size,
                        sigma=dataset.config.sigma).to(device)

    # if use multi_gou then convert the model to DataParallel
    if dataset.config.multi_gpu:
        model = nn.DataParallel(model)

    # create optimizer, loss function, and lr scheduler
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=dataset.config.lr,
                                 weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=dataset.config.lr_decay,
        patience=dataset.config.lr_patience,
        verbose=True)

    logging.info('Config {}'.format(dataset.config))
    logging.info(
        'TB logs and checkpoint will be saved in {}'.format(model_dir))

    phases = ['train', 'test']

    # create metric trackers: we track lass, class accuracy, and overall accuracy
    trackers = {
        x: {
            'loss':
            metrics.LossMean(),
            'acc':
            metrics.Accuracy(),
            'iou':
            None,
            'cm':
            metrics.ConfusionMatrix(
                num_classes=int(dataset.config.num_classes))
        }
        for x in phases
    }

    # create initial best state object
    best_state = {
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict() if scheduler else None,
        'train_loss': float('inf'),
        'test_loss': float('inf'),
        'train_acc': 0.0,
        'test_acc': 0.0,
        'train_mIoU': 0.0,
        'test_mIoU': 0.0,
        'convergence_epoch': 0,
        'num_epochs_since_best_acc': 0
    }

    # now we train!
    for epoch in range(dataset.config.max_epochs):
        for phase in phases:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            # reset metrics
            trackers[phase]['loss'].reset()
            trackers[phase]['cm'].reset()

            for step_number, inputs in enumerate(
                    tqdm(dataloaders[phase],
                         desc='[{}/{}] {} '.format(epoch + 1,
                                                   dataset.config.max_epochs,
                                                   phase))):
                data = inputs[0].to(device, dtype=torch.float).permute(0, 2, 1)
                coords = inputs[1].to(device,
                                      dtype=torch.float).permute(0, 2, 1)
                label = inputs[2].to(device, dtype=torch.long)

                # compute gradients on train only
                with torch.set_grad_enabled(phase == 'train'):
                    out = model(data, coords)
                    loss = criterion(out, label)
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # now we update metrics
                trackers[phase]['loss'].update(average_loss=loss,
                                               batch_size=data.size(0))
                trackers[phase]['cm'].update(y_true=label, y_logits=out)

            logging.info('Computing accuracy...')

            # compare with my metrics
            epoch_loss = trackers[phase]['loss'].result()
            epoch_overall_acc = trackers[phase]['cm'].result(metric='accuracy')
            epoch_iou = trackers[phase]['cm'].result(metric='iou')
            epoch_miou = epoch_iou.mean()

            logging.info(
                '--------------------------------------------------------------------------------'
            )
            logging.info(
                '[{}/{}] {} Loss: {:.2e}. Overall Acc: {:.4f}. mIoU {:.4f}'.
                format(epoch + 1, dataset.config.max_epochs, phase, epoch_loss,
                       epoch_overall_acc, epoch_miou))
            iou_per_class_str = ' '.join(
                ['{:.4f}'.format(s) for s in epoch_iou])
            logging.info('IoU per class: {}'.format(iou_per_class_str))
            logging.info(
                '--------------------------------------------------------------------------------'
            )

            # we update our learning rate scheduler if loss does not improve
            if phase == 'test' and scheduler:
                scheduler.step(epoch_loss)
                writer.add_scalar('params/lr', optimizer.param_groups[0]['lr'],
                                  epoch + 1)

            writer.add_scalar('loss/epoch_{}'.format(phase), epoch_loss,
                              epoch + 1)
            writer.add_scalar('miou/epoch_{}'.format(phase), epoch_miou,
                              epoch + 1)
            writer.add_scalar('acc_all/epoch_{}'.format(phase),
                              epoch_overall_acc, epoch + 1)

        # after each epoch we update best state values as needed
        # first we save our state when we get better test accuracy
        if best_state['test_mIoU'] > trackers['test']['cm'].result(
                metric='iou').mean():
            best_state['num_epochs_since_best_acc'] += 1
        else:
            logging.info('Got a new best model with mIoU {:.4f}'.format(
                trackers['test']['cm'].result(metric='iou').mean()))
            best_state = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict() if scheduler else None,
                'train_loss': trackers['train']['loss'].result(),
                'test_loss': trackers['test']['loss'].result(),
                'train_acc': trackers['train']['cm'].result(metric='accuracy'),
                'test_acc': trackers['test']['cm'].result(metric='accuracy'),
                'train_mIoU':
                trackers['train']['cm'].result(metric='iou').mean(),
                'test_mIoU':
                trackers['test']['cm'].result(metric='iou').mean(),
                'convergence_epoch': epoch + 1,
                'num_epochs_since_best_acc': 0
            }

            file_name = os.path.join(model_dir, 'best_state.pth')
            torch.save(best_state, file_name)
            logging.info('saved checkpoint in {}'.format(file_name))

        # we check for early stopping when we have trained a min number of epochs
        if epoch >= dataset.config.min_epochs and best_state[
                'num_epochs_since_best_acc'] >= dataset.config.early_stopping:
            logging.info('Accuracy did not improve for {} iterations!'.format(
                dataset.config.early_stopping))
            logging.info('[Early stopping]')
            break

    utl.dump_best_model_metrics_to_tensorboard(writer, phases, best_state)

    logging.info('************************** DONE **************************')
Beispiel #13
0
    def eval_final():
        start = timer()
        """ Evaluated model on test set in an extended way: computes estimates over multiple samples of point clouds and stores predictions """
        model.eval()
        acc_meter2 = tnt.meter.ClassErrorMeter(accuracy=True)
        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(5)
        collected, predictions = defaultdict(list), {}
    
        # collect predictions over multiple sampling seeds
        for ss in range(args.test_multisamp_n):
            test_dataset_ss = create_dataset(args, ss)[1]
            loader = torch.utils.data.DataLoader(test_dataset_ss, batch_size=1, collate_fn=spg.eccpc_collate, num_workers=args.nworkers)
            if logging.getLogger().getEffectiveLevel() > logging.DEBUG: loader = tqdm(loader, ncols=100)
    
            # iterate over dataset in batches
            for bidx, (targets, GIs, clouds_data) in enumerate(loader):
                
                model.ecc.set_info(GIs, args.cuda)
                label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,0], targets[:,2:], targets[:,1:].sum(1).float()
                fname = clouds_data[0][0][:clouds_data[0][0].rfind('.')]
                embeddings = ptnCloudEmbedder.run(model, *clouds_data[0:4])
                outputs = model.ecc(embeddings)
                
                C=np.where(np.argmax(np.array(outputs.data),axis=1)==5)
                Cother=np.where(np.argmax(np.array(outputs.data),axis=1)!=5)       
                
                for i in range(len(C[0])):
                    fname_G=clouds_data[0][C[0][i]].split('.')[1]
                    hf = h5py.File('/home/data2/qc/large_scalepcss/learning/datasets/tanker/parsed/'+fname + '.h5','r')
                    P = hf[fname_G]
                    if np.shape(P)[0]>2000:
                        tempt1 = np.array(P[:,:13]).T
                        tempt = torch.from_numpy(tempt1.reshape((1,np.shape(tempt1)[0],np.shape(tempt1)[1])))
                        label_modebran = torch.from_numpy(P[:,-1]).long()
                        if args.cuda:
                            tempt = tempt.cuda()
                            label_modebran = label_modebran.cuda()
                        outputsbran = model.FineModule(Variable(tempt.float(), requires_grad=model.training, volatile=not model.training))
                        _outputsbran,_label_modebran=filter_valid(outputsbran.data.cpu().numpy(),label_modebran.cpu().numpy())
                        acc_meter2.add(_outputsbran,_label_modebran)
                        confusion_matrix.count_predicted_batch_branch(_outputsbran, _label_modebran)
  
                if len(C[0])>0:
                    collected[fname].append((outputs.data.cpu().numpy()[Cother[0],:], label_mode_cpu.numpy()[[Cother[0]]], label_vec_cpu.numpy()[Cother[0],:]))
                else:
                    collected[fname].append((outputs.data.cpu().numpy(), label_mode_cpu.numpy(), label_vec_cpu.numpy()))

        # aggregate predictions (mean)
        for fname, lst in collected.items():
            o_cpu, t_cpu, tvec_cpu = list(zip(*lst))
            if args.test_multisamp_n > 1:
                o_cpu = np.mean(np.stack(o_cpu,0),0)
            else:
                o_cpu = o_cpu[0]
            t_cpu, tvec_cpu = t_cpu[0], tvec_cpu[0]
            predictions[fname] = np.argmax(o_cpu,1)
            o_cpu, t_cpu, tvec_cpu = filter_valid(o_cpu, t_cpu, tvec_cpu)
            if t_cpu.size > 0:
                acc_meter.add(o_cpu, t_cpu)
                confusion_matrix.count_predicted_batch(tvec_cpu, np.argmax(o_cpu,1))
    
        per_class_iou = {}
        #perclsiou = confusion_matrix.get_intersection_union_per_class() xuyaohuifu
        #for c, name in dbinfo['inv_class_map'].items():
         #   per_class_iou[name] = perclsiou[c]
        end = timer()
        return meter_value(acc_meter),meter_value(acc_meter2), confusion_matrix.get_overall_accuracy(), confusion_matrix.get_average_intersection_union(), per_class_iou, predictions,  confusion_matrix.get_mean_class_accuracy(), confusion_matrix.get_confusion_matrix(), str(end - start)
Beispiel #14
0
    def train(epoch):
        """ Trains for one epoch """
        model.train()
    
        loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=spg.eccpc_collate, num_workers=args.nworkers, shuffle=True, drop_last=True)
        if logging.getLogger().getEffectiveLevel() > logging.DEBUG: loader = tqdm(loader, ncols=100)
    
        loss_meter = tnt.meter.AverageValueMeter() #coresponding to Coarse-grained Module
        loss2_meter = tnt.meter.AverageValueMeter()#coresponding to Fine-grained Module
        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        acc_meter2 = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(5)
        t0 = time.time()

        # iterate over dataset in batches
        for bidx, (targets, GIs, clouds_data) in enumerate(loader):
            t_loader = 1000*(time.time()-t0)
            model.ecc.set_info(GIs, args.cuda)# put into edge information
            label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,0], targets[:,2:], targets[:,1:].sum(1)
            if args.cuda:
                label_mode, label_vec, segm_size = label_mode_cpu.cuda(), label_vec_cpu.float().cuda(), segm_size_cpu.float().cuda()
    
            else:
                label_mode, label_vec, segm_size = label_mode_cpu, label_vec_cpu.float(), segm_size_cpu.float()
    
            optimizer.zero_grad()
            t0 = time.time()
    
            embeddings = ptnCloudEmbedder.run(model, *clouds_data[0:4])
            outputs = model.ecc(embeddings)
            loss = nn.functional.cross_entropy(outputs, Variable(label_mode))

            """ Node determination """
            mixed_node=np.where(np.argmax(np.array(outputs.data),axis=1)==5)# mixed node
            Nomixed_node=np.where(np.argmax(np.array(outputs.data),axis=1)!=5)
            
            if epoch>args.fine_seg_epo and len(mixed_node[0])>0:
                loss = nn.functional.cross_entropy(outputs[Nomixed_node[0],:], Variable(label_mode[[Nomixed_node[0]]]))
                Sum_slice=[]
                Sum_slice=np.hstack((Sum_slice,mixed_node[0]))
                
                for i in range(len(Sum_slice)):
                    fname=args.TANKER_PATH+'/parsed/'+clouds_data[0][mixed_node[0][i]].split('.')[0] + '.h5'
                    fname_G=clouds_data[0][mixed_node[0][i]].split('.')[1]
                    hf = h5py.File(fname,'r')
                    P = hf[fname_G]
                    if np.shape(P)[0]>args.fine_seg_point_num:
                        tempt1 = np.array(P[:,:13]).T
                        tempt = torch.from_numpy(tempt1.reshape((1,np.shape(tempt1)[0],np.shape(tempt1)[1])))
                        label_modebran = torch.from_numpy(P[:,-1]).long()
                        if args.cuda:
                            tempt = tempt.cuda()
                            label_modebran = label_modebran.cuda()
                        outputsbran = model.FineModule(Variable(tempt.float(), requires_grad=model.training, volatile=not model.training))
                        loss2 = nn.functional.cross_entropy(outputsbran, Variable(label_modebran))
                        _outputsbran,_label_modebran=filter_valid(outputsbran.data.cpu().numpy(),label_modebran.cpu().numpy())
                        loss2.backward()
                        loss2_meter.add(loss2.data[0])
                        acc_meter2.add(_outputsbran,_label_modebran)
                        confusion_matrix.count_predicted_batch_branch(_outputsbran, _label_modebran)

            loss.backward()
            ptnCloudEmbedder.bw_hook()
    
            if args.grad_clip>0:
                for p in model.parameters():
                    if p.grad is not None:
                        p.grad.data.clamp_(-args.grad_clip, args.grad_clip)
            optimizer.step()
    
            t_trainer = 1000*(time.time()-t0)
            loss_meter.add(loss.data[0]) # pytorch 0.3
    
            o_cpu, t_cpu, tvec_cpu = filter_valid(outputs.data.cpu().numpy(), label_mode_cpu.numpy(), label_vec_cpu.numpy())
            
            if epoch>args.fine_seg_epo & len(mixed_node[0])>0:
                acc_meter.add(o_cpu[Nomixed_node[0],:], t_cpu[[Nomixed_node[0]]])
            else:
                acc_meter.add(o_cpu, t_cpu)
            confusion_matrix.count_predicted_batch(tvec_cpu[[Nomixed_node[0]]], np.argmax(o_cpu[Nomixed_node[0],:],1))
    
            logging.debug('Batch loss %f, Loader time %f ms, Trainer time %f ms.', loss.data[0], t_loader, t_trainer)
            t0 = time.time()
    
        return acc_meter.value()[0], meter_value(acc_meter2),loss_meter.value()[0], loss2_meter.value()[0],confusion_matrix.get_overall_accuracy(), confusion_matrix.get_average_intersection_union()
def train(config, model_dir, writer):
    """
    Function train and evaluate a part segmentation model for
    the Shapenet dataset. The training parameters are specified
    in the config file (for more details see config/config.py).

    :param config: Dictionary with configuration paramters
    :param model_dir: Checkpoint save directory
    :param writer: Tensorboard SummaryWritter object
    """
    phases = ['train', 'test']

    datasets, dataloaders = ds.get_modelnet40_dataloaders(
        root_dir=args.root_dir,
        phases=phases,
        batch_size=config['batch_size'],
        augment=config['augment'])

    # add number of classes to config
    config['num_classes'] = 40

    # we now set GPU training parameters
    # if the given index is not available then we use index 0
    # also when using multi gpu we should specify index 0
    if config['gpu_index'] + 1 > torch.cuda.device_count(
    ) or config['multi_gpu']:
        config['gpu_index'] = 0

    logging.info('Using GPU cuda:{}, script PID {}'.format(
        config['gpu_index'], os.getpid()))
    if config['multi_gpu']:
        logging.info('Training on multi-GPU mode with {} devices'.format(
            torch.cuda.device_count()))
    device = torch.device('cuda:{}'.format(config['gpu_index']))

    # we load the model defined in the config file
    model = res.resnet101(in_channels=config['in_channels'],
                          num_classes=config['num_classes'],
                          kernel_size=config['kernel_size']).to(device)

    # if use multi_gpu then convert the model to DataParallel
    if config['multi_gpu']:
        model = nn.DataParallel(model)

    # create optimizer, loss function, and lr scheduler
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config['lr'],
                                 weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=config['lr_decay'],
        patience=config['lr_patience'],
        verbose=True)

    logging.info('Config {}'.format(config))
    logging.info(
        'TB logs and checkpoint will be saved in {}'.format(model_dir))

    utl.dump_config_details_to_tensorboard(writer, config)

    # create metric trackers: we track lass, class accuracy, and overall accuracy
    trackers = {
        x: {
            'loss': metrics.LossMean(),
            'acc': metrics.Accuracy(),
            'iou': None,
            'cm':
            metrics.ConfusionMatrix(num_classes=int(config['num_classes']))
        }
        for x in phases
    }

    # create initial best state object
    best_state = {
        'config': config,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict() if scheduler else None,
        'train_loss': float('inf'),
        'test_loss': float('inf'),
        'train_acc': 0.0,
        'test_acc': 0.0,
        'train_class_acc': 0.0,
        'test_class_acc': 0.0,
        'convergence_epoch': 0,
        'num_epochs_since_best_acc': 0
    }

    # now we train!
    for epoch in range(config['max_epochs']):
        for phase in phases:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            # reset metrics
            trackers[phase]['loss'].reset()
            trackers[phase]['cm'].reset()

            for step_number, (data, label) in enumerate(
                    tqdm(dataloaders[phase],
                         desc='[{}/{}] {} '.format(epoch + 1,
                                                   config['max_epochs'],
                                                   phase))):
                data = data.to(device, dtype=torch.float).permute(0, 2, 1)
                label = label.to(device, dtype=torch.long).squeeze()

                # compute gradients on train only
                with torch.set_grad_enabled(phase == 'train'):
                    out = model(data)
                    loss = criterion(out, label)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # now we update metrics
                trackers[phase]['loss'].update(average_loss=loss,
                                               batch_size=data.size(0))
                trackers[phase]['cm'].update(y_true=label, y_logits=out)

            # logging.info('Computing accuracy...')

            # compare with my metrics
            epoch_loss = trackers[phase]['loss'].result()
            epoch_overall_acc = trackers[phase]['cm'].result(metric='accuracy')
            epoch_class_acc = trackers[phase]['cm'].result(
                metric='class_accuracy').mean()

            # we update our learning rate scheduler if loss does not improve
            if phase == 'test' and scheduler:
                scheduler.step(epoch_loss)
                writer.add_scalar('params/lr', optimizer.param_groups[0]['lr'],
                                  epoch + 1)

            # log current results and dump in Tensorboard
            logging.info(
                '[{}/{}] {} Loss: {:.2e}. Overall Acc: {:.4f}. Class Acc {:.4f}'
                .format(epoch + 1, config['max_epochs'], phase, epoch_loss,
                        epoch_overall_acc, epoch_class_acc))

            writer.add_scalar('loss/epoch_{}'.format(phase), epoch_loss,
                              epoch + 1)
            writer.add_scalar('acc_class/epoch_{}'.format(phase),
                              epoch_class_acc, epoch + 1)
            writer.add_scalar('acc_all/epoch_{}'.format(phase),
                              epoch_overall_acc, epoch + 1)

        # after each epoch we update best state values as needed
        # first we save our state when we get better test accuracy
        if best_state['test_acc'] > trackers['test']['cm'].result(
                metric='accuracy'):
            best_state['num_epochs_since_best_acc'] += 1
        else:
            logging.info('Got a new best model with accuracy {:.4f}'.format(
                trackers['test']['cm'].result(metric='accuracy')))
            best_state = {
                'config':
                config,
                'model':
                model.state_dict(),
                'optimizer':
                optimizer.state_dict(),
                'scheduler':
                scheduler.state_dict() if scheduler else None,
                'train_loss':
                trackers['train']['loss'].result(),
                'test_loss':
                trackers['test']['loss'].result(),
                'train_acc':
                trackers['train']['cm'].result(metric='accuracy'),
                'test_acc':
                trackers['test']['cm'].result(metric='accuracy'),
                'train_class_acc':
                trackers['train']['cm'].result(metric='class_accuracy').mean(),
                'test_class_acc':
                trackers['test']['cm'].result(metric='class_accuracy').mean(),
                'convergence_epoch':
                epoch + 1,
                'num_epochs_since_best_acc':
                0
            }

            file_name = os.path.join(model_dir, 'best_state.pth')
            torch.save(best_state, file_name)
            logging.info('saved checkpoint in {}'.format(file_name))

        # we check for early stopping when we have trained a min number of epochs
        if epoch >= config['min_epochs'] and best_state[
                'num_epochs_since_best_acc'] >= config['early_stopping']:
            logging.info('Accuracy did not improve for {} iterations!'.format(
                config['early_stopping']))
            logging.info('[Early stopping]')
            break

    utl.dump_best_model_metrics_to_tensorboard(writer, phases, best_state)

    logging.info('************************** DONE **************************')
Beispiel #16
0
    def validate(self, epoch):
        """
        Evaluate the model on the validation set.
        """
        losses = AverageMeter()
        accs = AverageMeter()

        cmat = metrics.ConfusionMatrix(self.num_classes)
        auc = metrics.AUC(self.num_classes)

        for i, (x, y) in enumerate(self.valid_loader):
            y = y.squeeze()
            if self.use_gpu:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # duplicate 10 times
            x = x.repeat(self.M, 1, 1, 1)

            # initialize location vector and hidden state
            self.batch_size = x.shape[0]
            h_t, l_t = self.reset()

            # extract the glimpses
            log_pi = []
            baselines = []
            for t in range(self.num_glimpses - 1):

                # forward pass through model
                h_t, l_t, b_t, p = self.model(x, l_t, h_t)

                # store
                baselines.append(b_t)
                log_pi.append(p)

            # last iteration
            h_t, l_t, b_t, log_probas, p = self.model(x, l_t, h_t, last=True)
            log_pi.append(p)
            baselines.append(b_t)

            # convert list to tensors and reshape
            baselines = torch.stack(baselines).transpose(0, 2)
            log_pi = torch.stack(log_pi).transpose(0, 2)

            # average
            log_probas = log_probas.view(self.M, -1, log_probas.shape[-1])
            log_probas = torch.mean(log_probas, dim=0)

            baselines = baselines.contiguous().view(self.M, -1,
                                                    self.num_stacks,
                                                    baselines.shape[-1])
            baselines = torch.mean(baselines, dim=0)

            log_pi = log_pi.contiguous().view(self.M, -1, self.num_stacks,
                                              log_pi.shape[-1])
            log_pi = torch.mean(log_pi, dim=0)

            # calculate reward
            predicted = torch.max(log_probas, 1)[1]
            R = (predicted.detach() == y).float()
            R = R.view(R.size(0), 1, 1).repeat(1, self.num_stacks,
                                               self.num_glimpses)

            # compute losses for differentiable modules
            loss_action = F.nll_loss(log_probas, y)
            loss_baseline = F.mse_loss(baselines, R)

            # compute reinforce loss
            adjusted_reward = R - baselines.detach()
            loss_reinforce = torch.mean(-log_pi * adjusted_reward)

            # sum up into a hybrid loss
            loss = loss_action + loss_baseline + loss_reinforce

            # compute accuracy
            correct = (predicted == y).float()
            acc = 100 * (correct.sum() / len(y))

            # store
            losses.update(loss.data[0], x.size()[0])
            accs.update(acc.data[0], x.size()[0])

            cmat.add(predicted, y)
            auc.add(y, log_probas.exp())

            # log to tensorboard
            if self.use_tensorboard:
                iteration = epoch * len(self.valid_loader) + i
                log_value('valid_loss', losses.avg, iteration)
                log_value('valid_acc', accs.avg, iteration)

        return losses.avg, accs.avg, cmat, auc
Beispiel #17
0
def inference(ind, model, loader, args):
    #function to produce inference data for one data loader
    #ind : loop position
    #model : explicit
    #loader : current data loader
    #args : look at parser args

    #create all subfolder if train/val/test split, else don't create new subfolders
    outs = ["", "", [], []]
    name = ["train", "val", "test"]  #need to match the order of dataloader
    if args.inf:
        outs[0] = ost.createDir(
            args.out + "/inf/" +
            name[ind]) if args.train_set else args.out + "/inf"
    if args.dif:
        outs[1] = ost.createDir(
            args.out + "/dif/" +
            name[ind]) if args.train_set else args.out + "/dif"
    if args.proba:
        outHM = [ost.createDir(args.out + "/proba/" + c) for c in args.c]
        outs[2] = [ost.createDir(p + "/" + name[ind])
                   for p in outHM] if args.train_set else outHM
    if args.probaH5:
        outs[3] = ost.createDir(
            args.out + "/probaH5/" +
            name[ind]) if args.train_set else args.out + "/probaH5"


#        print(outs[3])

#get colors as dict for colortable
    if args.color is not False:
        c = np.loadtxt(args.color, delimiter=",", dtype=int)
        colors = {}
        for i in range(c.shape[0]):
            colors[int(c[i][0])] = (tuple(c[i][1:5]))
    else:
        colors = None

    #metric containers
    if args.metric:
        cm = m.ConfusionMatrix(len(args.c), args.c, args.nodata)

    #loop through batch given by data reader, unfold tuple
    for batch_ndx, (imgs, gt, names) in enumerate(tqdm(loader)):

        #if CUDA, load on GPU
        if args.cuda:
            batch_tensor = imgs.cuda()
        else:
            batch_tensor = imgs

        #generate prediction
        prediction = model(batch_tensor)
        prediction = prediction.cpu()

        #build instruction for multiprocess loop (1 instruction for each tile of the batch)
        if args.noGT:
            mpArg = [(prediction[i].detach(), None, names[i], outs, args,
                      colors) for i in range(prediction.shape[0])]
        else:
            mpArg = [(prediction[i].detach(), gt[i], names[i], outs, args,
                      colors) for i in range(prediction.shape[0])]

        #multi process loop to create inference data of the current batch
        with multiprocessing.Pool() as pool:
            pool.map(multiprocessing_func, mpArg)

        #calculate batch metric and add it to metric containers
        if args.metric:
            for i in range(prediction.size()[0]):
                pred = prediction[i].argmax(0).squeeze()
                cm.add_batch(gt[i].numpy(), pred.numpy())

        #free memory
        del imgs
        del gt
        del batch_tensor
        del prediction

    #produce current dataset metric to metric output folder
    if args.metric:
        out_perf = ost.createDir(
            args.out + "/" + name[ind] +
            "_perf_inf") if args.train_set else ost.createDir(args.out +
                                                              "/perf_inf")
        cm.printPerf(out_perf)
    return 0
Beispiel #18
0
    def train(epoch):
        """ Trains for one epoch """
        args.ext_epoch = epoch
        model.train()

        loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=args.batch_size,
                                             collate_fn=spg.eccpc_collate,
                                             num_workers=args.nworkers,
                                             shuffle=True,
                                             drop_last=True)

        loss_meter = tnt.meter.AverageValueMeter()
        loss_ext_meter = tnt.meter.AverageValueMeter()
        loss_att_meter = tnt.meter.AverageValueMeter()
        acc_meter = tnt.meter.ClassErrorMeter(accuracy=True)
        confusion_matrix = metrics.ConfusionMatrix(
            dbinfo['classes'], ignore_label=args.metric_ignore_class)
        confusion_matrix_ext = metrics.ConfusionMatrix(
            dbinfo['classes'], ignore_label=args.metric_ignore_class)
        confusion_matrix_ext_epoch = metrics.ConfusionMatrix(
            dbinfo['classes'], ignore_label=args.metric_ignore_class)
        t0 = time.time()
        epoch_time = time.time()
        batch_time = AverageMeter()
        end = time.time()

        # iterate over dataset in batches
        for bidx, (targets, GIs, clouds_data, clouds_orig, edges_for_ext,
                   fnames, ext_data, num_sp_list) in enumerate(loader):
            print('fnames: {}'.format(fnames))
            t_loader = time.time() - t0

            model.ecc.set_info(GIs, args.cuda)
            weak_label_mode_cpu, label_mode_cpu, label_vec_cpu, segm_size_cpu = targets[:,
                                                                                        0], targets[:,
                                                                                                    1], targets[:,
                                                                                                                2:], targets[:, 2:].sum(
                                                                                                                    1
                                                                                                                )
            ext_mask, extension_sub_list, extension_full_list = ext_data

            if args.cuda:
                label_mode, label_vec, segm_size, weak_label_mode = label_mode_cpu.cuda(
                ), label_vec_cpu.float().cuda(), segm_size_cpu.float().cuda(
                ), weak_label_mode_cpu.cuda()
            else:
                label_mode, label_vec, segm_size, weak_label_mode = label_mode_cpu, label_vec_cpu.float(
                ), segm_size_cpu.float(), weak_label_mode_cpu

            optimizer.zero_grad()
            t0 = time.time()

            print('num_weak_label/num_sp_all: {}/{}'.format(
                torch.sum(weak_label_mode < args.n_labels + 1),
                weak_label_mode.shape[0]))

            embeddings = ptnCloudEmbedder.run(model, *clouds_data)
            outputs, rnn_fea = model.ecc(embeddings)
            o_cpu, t_cpu, tvec_cpu = filter_valid(outputs.data.cpu().numpy(),
                                                  label_mode_cpu.numpy(),
                                                  label_vec_cpu.numpy())

            # extension
            input = clouds_orig.cuda()
            edges_for_ext = edges_for_ext.cuda()
            ext_weak_label_cuda = torch.argmax(outputs, dim=1, keepdim=False)

            weak_label_cat = weak_label_mode
            weak_label_cat[ext_mask > 0] = ext_weak_label_cuda[ext_mask > 0]

            if (epoch > 0) and (epoch % args.ext_epoch_gap == 0):
                output1, weak_label1, output2, weak_label2, extend_idx, _ = extension_accum2(
                    input,
                    outputs,
                    embeddings,
                    weak_label_cat,
                    edges_for_ext,
                    th=args.extension_th,
                    ext_max=args.single_ext_max)
                print('{}/{} ~ {:.2f} points with labels.'.format(
                    outputs.shape[0], output1.shape[0],
                    output1.shape[0] / outputs.shape[0] * 100))
                print('extension points: {}'.format(extend_idx.shape))
            else:
                extend_idx = torch.Tensor([])
                output2 = torch.Tensor([])
                weak_label2 = torch.Tensor([])

            # loss
            mask1 = weak_label_mode != args.ignore_label
            outputs_valid1 = outputs[mask1, :]
            weak_label1 = weak_label_mode[mask1]
            loss1_cro = nn.functional.cross_entropy(
                outputs_valid1, weak_label1, ignore_index=args.ignore_label)

            ###### extension dropout
            # labeled points: outputs_valid1, weak_label1
            labeled_idx = torch.nonzero(mask1).squeeze().long()  # Nsp_label
            # previous extension points:
            if torch.sum(ext_mask > 0) > 1:
                pre_ext_outputs = outputs[ext_mask > 0, :]  # Nsp_pre_ext
                pre_ext_fea = rnn_fea[ext_mask > 0, :]
                pre_ext_pseudo_label = torch.argmax(pre_ext_outputs,
                                                    dim=1,
                                                    keepdim=False)
                pre_ext_idx = torch.nonzero(ext_mask > 0).squeeze().long()
            # current extension points:
            if extend_idx.shape[0] > 1:
                cur_ext_outputs = output2
                cur_ext_pred_label = weak_label2
                cur_ext_idx = extend_idx
                cur_ext_fea = rnn_fea[extend_idx, :]

            # extension concat
            if (torch.sum(ext_mask > 0) > 1) & (extend_idx.shape[0] > 1):
                ext_outputs_cat = torch.cat((pre_ext_outputs, cur_ext_outputs),
                                            0)
                ext_label_cat = torch.cat((pre_ext_pseudo_label.unsqueeze(-1),
                                           cur_ext_pred_label.unsqueeze(-1)),
                                          0).squeeze(-1)
                ext_idx_cat = torch.cat(
                    (pre_ext_idx.unsqueeze(-1), cur_ext_idx.unsqueeze(-1)),
                    0).squeeze(-1)
                ext_fea_cat = torch.cat((pre_ext_fea, cur_ext_fea), 0)
            elif extend_idx.shape[0] > 1:  # only current extension points
                ext_outputs_cat = cur_ext_outputs
                ext_label_cat = cur_ext_pred_label
                ext_idx_cat = cur_ext_idx
                ext_fea_cat = cur_ext_fea
            elif torch.sum(ext_mask > 0) > 1:  # only previous extension points
                ext_outputs_cat = pre_ext_outputs
                ext_label_cat = pre_ext_pseudo_label
                ext_idx_cat = pre_ext_idx
                ext_fea_cat = pre_ext_fea
            else:
                ext_outputs_cat = None
                ext_label_cat = None
                ext_idx_cat = None
                ext_fea_cat = None

            # compute the cluster center and the distances and then dropout with ratio
            if (ext_idx_cat is not None) and (ext_idx_cat.shape[0] > 20):
                ext_idxs_sample = [
                ]  # sampled id of each extension points in all the extension points
                unique_classes = torch.unique(weak_label1)
                ext_idx_idx = torch.Tensor(
                    list(range(ext_idx_cat.shape[0]))
                ).cuda(
                )  # id of each extension points in all the extension points
                for i in range(unique_classes.shape[0]):
                    sp = unique_classes[i]
                    fea_label = outputs_valid1[weak_label1 ==
                                               sp]  # Nsp_label_class*13
                    fea_ext = ext_outputs_cat[ext_label_cat ==
                                              sp]  # Nsp_ext_class*13
                    ext_idxs = ext_idx_idx[ext_label_cat ==
                                           sp]  # Nsp_ext_class

                    if (fea_ext.shape[0] > 5) & (fea_label.shape[0] > 0):
                        num_retain = math.floor(fea_ext.shape[0] *
                                                args.ext_drop)

                        cluster_center = torch.sum(
                            fea_label, dim=0, keepdim=True) + 0.5 * torch.sum(
                                fea_ext, dim=0, keepdim=True)
                        cluster_center = cluster_center / (
                            fea_label.shape[0] + fea_ext.shape[0])  # 1*13

                        dis = fea_ext - cluster_center  # Nsp_ext*13
                        dis = torch.norm(dis, dim=1)  # Nsp
                        _, idxs = torch.sort(dis, dim=0, descending=False)
                        ext_idxs_sample.append(
                            ext_idxs[idxs[:num_retain]].unsqueeze(-1))
                    elif len(ext_idxs) > 0:
                        ext_idxs_sample.append(ext_idxs.unsqueeze(-1))

                ext_idxs_sample = torch.cat(ext_idxs_sample,
                                            0).squeeze(-1).long()

                ext_idxs_retain = ext_idx_cat[ext_idxs_sample]
                ext_output_retain = ext_outputs_cat[ext_idxs_sample, :]
                ext_fea_retain = ext_fea_cat[ext_idxs_sample, :]
                ext_label_retain = ext_label_cat[ext_idxs_sample]

            else:
                ext_idxs_retain = ext_idx_cat
                ext_output_retain = ext_outputs_cat
                ext_fea_retain = ext_fea_cat
                ext_label_retain = ext_label_cat

            if (ext_idxs_retain
                    is not None) and (ext_idxs_retain.shape[0] > 2):
                lab_fea = rnn_fea[
                    labeled_idx, :]  # M*352, including the previous extension points
                lab_lab = weak_label1
                if lab_fea.shape[0] > args.max_labeled_att:
                    ii = random.sample(range(lab_fea.shape[0]),
                                       k=args.max_labeled_att)
                    lab_fea = lab_fea[ii, :]
                    lab_lab = lab_lab[ii]
                    lab_idxs_sample = ii
                if ext_idxs_retain.shape[0] > args.max_ext_att_loss:
                    ii = random.sample(range(ext_idxs_retain.shape[0]),
                                       k=args.max_ext_att_loss)
                    # ext_idxs_retain_sample = ext_idxs_retain[ii]
                    ext_idxs_retain_sample = ii
                else:
                    # ext_idxs_retain_sample = ext_idxs_retain
                    ext_idxs_retain_sample = list(
                        range(ext_idxs_retain.shape[0]))
                ext_fea = rnn_fea[ext_idxs_retain_sample, :]  # N*352

                # coupled attention
                outputs_att_lab = model.att_lab(lab_fea, ext_fea)
                outputs_att_ext = model.att_ext(ext_fea, lab_fea)

                loss_att_lab_cro = nn.functional.cross_entropy(
                    outputs_att_lab, lab_lab)
                loss_att_meter.add(loss_att_lab_cro.item())

                loss_att_ext_cro = nn.functional.cross_entropy(
                    outputs_att_ext, ext_label_retain[ext_idxs_retain_sample])
                loss_ext_meter.add(loss_att_ext_cro.item())

                loss = args.loss_w1 * loss1_cro + args.loss_w2 * (
                    loss_att_lab_cro + loss_att_ext_cro)

                confusion_matrix_ext.count_predicted_batch(
                    tvec_cpu[ext_idxs_retain.data.cpu().numpy(), :],
                    np.argmax(outputs[ext_idxs_retain, :].data.cpu().numpy(),
                              1))
                print('{} point extend. acc: {:3f}, macc: {:3f}'.format(
                    ext_idxs_retain.shape[0],
                    confusion_matrix_ext.get_overall_accuracy(),
                    confusion_matrix_ext.get_mean_class_accuracy()))
                if extend_idx.shape[0] > 1:
                    confusion_matrix_ext_epoch.count_predicted_batch(
                        tvec_cpu[extend_idx.data.cpu().numpy(), :],
                        np.argmax(outputs[extend_idx, :].data.cpu().numpy(),
                                  1))
                    print(
                        '{} point extend current epoch. acc: {:3f}, macc: {:3f}'
                        .format(
                            extend_idx.shape[0],
                            confusion_matrix_ext_epoch.get_overall_accuracy(),
                            confusion_matrix_ext_epoch.get_mean_class_accuracy(
                            )))

                # update the extension
                extend_idx = ext_idxs_retain.data.cpu().numpy().astype(
                    np.int32)
                weak_label2 = ext_label_retain.data.cpu().numpy().astype(
                    np.int32)
                num_sp_array = np.cumsum(np.array(num_sp_list))

                for b in range(num_sp_array.shape[0]):
                    if b == 0:
                        sp_start = 0
                    else:
                        sp_start = num_sp_array[b - 1]
                    sp_end = num_sp_array[b]

                    mask = (extend_idx >= sp_start) & (extend_idx < sp_end)
                    if np.sum(mask) > 0:
                        extend_idx_batch = extend_idx[mask] - sp_start
                        extend_label_batch = weak_label2[mask]

                        extension_sub_batch = extension_sub_list[b].astype(
                            np.int32)  # Nsp*2
                        extension_full_batch = extension_full_list[b].astype(
                            np.int32)  # N*2

                        extension_full_batch[extension_sub_batch[
                            extend_idx_batch, 0]] = extend_label_batch
                        current_fname = fnames[b]
                        np.savetxt(os.path.join(
                            args.extension_dir, 'epoch_{:d}'.format(
                                int(epoch // args.ext_epoch_gap)),
                            '{}.txt'.format(current_fname)),
                                   extension_full_batch,
                                   fmt='%d')

            else:
                loss = loss1_cro

            loss.backward()
            ptnCloudEmbedder.bw_hook()

            if args.grad_clip > 0:
                for p in model.parameters():
                    if p.grad is not None:
                        p.grad.data.clamp_(-args.grad_clip, args.grad_clip)
            optimizer.step()

            t_trainer = time.time() - t0
            # loss_meter.add(loss.data[0]) # pytorch 0.3
            loss_meter.add(loss.item())  # pytorch 0.4

            acc_meter.add(o_cpu, t_cpu)
            confusion_matrix.count_predicted_batch(tvec_cpu,
                                                   np.argmax(o_cpu, 1))

            batch_time.update(time.time() - end)
            end = time.time()

            print(
                'Batch {}/{} - loss {:.3f}/{:.3f}, acc {:.3f}, lr {:.3f}, Loader time {:.3f}, Trainer time {:.3f}, Batch time {:.3f}/{:.3f}.'
                .format(bidx + 1, len(loader), loss.item(),
                        loss_meter.value()[0],
                        confusion_matrix.get_overall_accuracy(),
                        get_lr(optimizer), t_loader, t_trainer, batch_time.val,
                        batch_time.avg))
            t0 = time.time()

        if args.ext_epoch % args.ext_epoch_gap == (
                args.ext_epoch_gap -
                1):  # a new extension folder need to be built
            shutil.copytree(
                os.path.join(
                    args.extension_dir, 'epoch_{}'.format(
                        int(args.ext_epoch // args.ext_epoch_gap))),
                os.path.join(
                    args.extension_dir, 'epoch_{}'.format(
                        int(args.ext_epoch // args.ext_epoch_gap) + 1)))
        return acc_meter.value()[0], confusion_matrix.get_overall_accuracy(), confusion_matrix.get_mean_class_accuracy(), confusion_matrix.get_average_intersection_union(), loss_meter.value()[0], time.time()-epoch_time, \
                    confusion_matrix_ext_epoch.get_overall_accuracy(), confusion_matrix_ext_epoch.get_mean_class_accuracy(), confusion_matrix_ext_epoch.get_average_intersection_union()