Esempio n. 1
0
def process(data_tuple, model, output_folder, training_args, overwrite):
    fname, edg_source, edg_target, is_transition, labels, objects, clouds_data, xyz = data_tuple
    spg_file = os.path.join(output_folder, fname[0])
    logging.info("\nGenerating SPG file %s...", spg_file)
    if os.path.exists(os.path.dirname(spg_file)) and not overwrite:
        logging.info("Already exists, skipping")
        return
    elif not os.path.exists(os.path.dirname(spg_file)):
        os.makedirs(os.path.dirname(spg_file))

    if training_args.cuda:
        is_transition = is_transition.to('cuda', non_blocking=True)
        objects = objects.to('cuda', non_blocking=True)
        clouds, clouds_global, nei = clouds_data
        clouds_data = (clouds.to('cuda', non_blocking=True), clouds_global.to('cuda', non_blocking=True), nei)

    ptnCloudEmbedder = get_embedder(training_args)
    num_classes = get_num_classes(training_args)

    embeddings = ptnCloudEmbedder.run_batch(model, *clouds_data, xyz)

    diff = losses.compute_dist(embeddings, edg_source, edg_target, training_args.dist_type)

    pred_components, pred_in_component = losses.compute_partition(
        training_args, embeddings, edg_source, edg_target, diff, xyz)

    graph_sp = graphs.compute_sp_graph(xyz, 100, pred_in_component, pred_components, labels, num_classes)

    provider.write_spg(spg_file, graph_sp, pred_components, pred_in_component)
    def evaluate(i_epoch):
        """ Evaluated model on test set """
        model.eval()

        with torch.no_grad():

            loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, collate_fn=graph_collate, num_workers=args.nworkers)

            if logging.getLogger().getEffectiveLevel() > logging.DEBUG:
                loader = tqdm(loader, ncols=100)

            loss_meter = tnt.meter.AverageValueMeter()
            n_clusters_meter = tnt.meter.AverageValueMeter()
            BR_meter = tnt.meter.AverageValueMeter()
            BP_meter = tnt.meter.AverageValueMeter()
            CM_classes = metrics.ConfusionMatrix(dbinfo['classes'])

            # iterate over dataset in batches
            for bidx, (fname, edg_source, edg_target, is_transition, labels, objects, clouds_data, xyz) in enumerate(loader):

                if args.cuda:
                    is_transition = is_transition.to('cuda', non_blocking=True)
                    # labels = torch.from_numpy(labels).cuda()
                    objects = objects.to('cuda', non_blocking=True)
                    clouds, clouds_global, nei = clouds_data
                    clouds_data = (clouds.to('cuda', non_blocking=True), clouds_global.to('cuda', non_blocking=True), nei)

                embeddings = ptnCloudEmbedder.run_batch(model, *clouds_data, xyz)

                diff = compute_dist(embeddings, edg_source, edg_target, args.dist_type)

                if len(is_transition) > 1:
                    weights_loss, pred_components, pred_in_component = compute_weight_loss(args, embeddings, objects, edg_source, edg_target,
                                                                                           is_transition, diff, True, xyz)
                    loss1, loss2 = compute_loss(args, diff, is_transition, weights_loss)
                    loss = (loss1 + loss2) / weights_loss.shape[0]
                    pred_transition = pred_in_component[edg_source] != pred_in_component[edg_target]
                    per_pred = perfect_prediction(pred_components, labels)
                    CM_classes.count_predicted_batch(labels[:, 1:], per_pred)
                else:
                    loss = 0

                if len(is_transition) > 1:
                    loss_meter.add(loss.item())  # /weights_loss.sum().item())
                    is_transition = is_transition.cpu().numpy()
                    n_clusters_meter.add(len(pred_components))
                    BR_meter.add((is_transition.sum()) * compute_boundary_recall(is_transition, relax_edge_binary(pred_transition, edg_source,
                                                                                                                  edg_target, xyz.shape[0],
                                                                                                                  args.BR_tolerance)),
                                 n=is_transition.sum())
                    BP_meter.add((pred_transition.sum()) * compute_boundary_precision(relax_edge_binary(is_transition, edg_source,
                                                                                                        edg_target, xyz.shape[0],
                                                                                                        args.BR_tolerance), pred_transition),
                                 n=pred_transition.sum())
        CM = CM_classes.confusion_matrix
        return loss_meter.value()[0], n_clusters_meter.value()[0], 100 * CM.trace() / CM.sum(), BR_meter.value()[0], BP_meter.value()[0]
    def train(i_epoch):
        """ Trains for one epoch """
        #return 0
        model.train()
        loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=graph_collate, num_workers=args.nworkers, shuffle=True, drop_last=True)
        
        if logging.getLogger().getEffectiveLevel() > logging.DEBUG: loader = tqdm(loader, ncols=100)
    
        loss_meter = tnt.meter.AverageValueMeter()
        n_clusters_meter = tnt.meter.AverageValueMeter()

        t0 = time.time()
    
        for bidx, (fname, edg_source, edg_target, is_transition, labels, objects, clouds_data, xyz) in enumerate(loader):
            
            if args.cuda:
                is_transition = is_transition.to('cuda',non_blocking=True)
                #labels = torch.from_numpy(labels).cuda()
                objects = objects.to('cuda',non_blocking=True)
                clouds, clouds_global, nei = clouds_data
                clouds_data = (clouds.to('cuda',non_blocking=True),clouds_global.to('cuda',non_blocking=True),nei) 

            t_loader = 1000*(time.time()-t0)
            optimizer.zero_grad()
            t0 = time.time()

            embeddings = ptnCloudEmbedder.run_batch(model, *clouds_data, xyz)
            
            diff = compute_dist(embeddings, edg_source, edg_target, args.dist_type)
            
            weights_loss, pred_comp, in_comp = compute_weight_loss(args, embeddings, objects, edg_source, edg_target, is_transition, diff, True, xyz)
            
            loss1, loss2 = compute_loss(args, diff, is_transition, weights_loss)
            
            factor = 1000 #scaling for better usage of float precision
            
            loss = (loss1 + loss2) / weights_loss.shape[0]*factor
            
            loss.backward()
            
            if args.grad_clip>0:
                for p in model.parameters():
                    p.grad.data.clamp_(-args.grad_clip*factor, args.grad_clip*factor)
                    
            optimizer.step()

            t_trainer = 1000*(time.time()-t0)
            loss_meter.add(loss.item()/factor)#/weights_loss.mean().item())
            n_clusters_meter.add(embeddings.shape[0] / len(pred_comp))
            
            logging.debug('Batch loss %f, Loader time %f ms, Trainer time %f ms.', loss.item() / factor, t_loader, t_trainer)
            t0 = time.time()
            
        #return 0,0,0
        return loss_meter.value()[0], n_clusters_meter.value()[0]
    def evaluate_final():
        """ Evaluated model on test set """

        print("Final evaluation")
        model.eval()

        loss_meter = tnt.meter.AverageValueMeter()
        n_clusters_meter = tnt.meter.AverageValueMeter()
        confusion_matrix_classes = metrics.ConfusionMatrix(dbinfo['classes'])
        confusion_matrix_BR = metrics.ConfusionMatrix(2)
        confusion_matrix_BP = metrics.ConfusionMatrix(2)

        with torch.no_grad():

            loader = torch.utils.data.DataLoader(test_dataset,
                                                 batch_size=1,
                                                 collate_fn=graph_collate,
                                                 num_workers=args.nworkers)

            if logging.getLogger().getEffectiveLevel() > logging.DEBUG:
                loader = tqdm(loader, ncols=100)

    # iterate over dataset in batches
            for bidx, (fname, edg_source, edg_target, is_transition, labels,
                       objects, clouds_data, xyz) in enumerate(loader):

                if args.cuda:
                    is_transition = is_transition.to('cuda', non_blocking=True)
                    # labels = torch.from_numpy(labels).cuda()
                    objects = objects.to('cuda', non_blocking=True)
                    clouds, clouds_global, nei = clouds_data
                    clouds_data = (clouds.to('cuda', non_blocking=True),
                                   clouds_global.to('cuda',
                                                    non_blocking=True), nei)

                if args.dataset == 'sema3d':
                    embeddings = ptnCloudEmbedder.run_batch_cpu(
                        model, *clouds_data, xyz)
                else:
                    embeddings = ptnCloudEmbedder.run_batch(
                        model, *clouds_data, xyz)

                diff = compute_dist(embeddings, edg_source, edg_target,
                                    args.dist_type)

                pred_components, pred_in_component = compute_partition(
                    args, embeddings, edg_source, edg_target, diff, xyz)

                if len(is_transition) > 1:
                    pred_transition = pred_in_component[
                        edg_source] != pred_in_component[edg_target]
                    is_transition = is_transition.cpu().numpy()

                    n_clusters_meter.add(len(pred_components))

                    per_pred = perfect_prediction(pred_components, labels)
                    confusion_matrix_classes.count_predicted_batch(
                        labels[:, 1:], per_pred)
                    confusion_matrix_BR.count_predicted_batch_hard(
                        is_transition,
                        relax_edge_binary(pred_transition, edg_source,
                                          edg_target, xyz.shape[0],
                                          args.BR_tolerance).astype('uint8'))
                    confusion_matrix_BP.count_predicted_batch_hard(
                        relax_edge_binary(is_transition, edg_source,
                                          edg_target, xyz.shape[0],
                                          args.BR_tolerance),
                        pred_transition.astype('uint8'))

                if args.spg_out:
                    graph_sp = compute_sp_graph(xyz, 100, pred_in_component,
                                                pred_components, labels,
                                                dbinfo["classes"])
                    spg_file = os.path.join(folder_hierarchy.spg_folder,
                                            fname[0])
                    if not os.path.exists(os.path.dirname(spg_file)):
                        os.makedirs(os.path.dirname(spg_file))
                    try:
                        os.remove(spg_file)
                    except OSError:
                        pass
                    write_spg(spg_file, graph_sp, pred_components,
                              pred_in_component)

                    # Debugging purpose - write the embedding file and an exemple of scalar files
                    # if bidx % 0 == 0:
                    #     embedding2ply(os.path.join(folder_hierarchy.emb_folder , fname[0][:-3] + '_emb.ply'), xyz, embeddings.detach().cpu().numpy())
                    #     scalar2ply(os.path.join(folder_hierarchy.scalars , fname[0][:-3] + '_elevation.ply') , xyz, clouds_data[1][:,1].cpu())
                    #     edg_class = is_transition + 2*pred_transition
                    #     edge_class2ply2(os.path.join(folder_hierarchy.emb_folder , fname[0][:-3] + '_transition.ply'), edg_class, xyz, edg_source, edg_target)
            if len(is_transition) > 1:
                res_name = folder_hierarchy.outputdir + '/res.h5'
                res_file = h5py.File(res_name, 'w')
                res_file.create_dataset(
                    'confusion_matrix_classes',
                    data=confusion_matrix_classes.confusion_matrix,
                    dtype='uint64')
                res_file.create_dataset(
                    'confusion_matrix_BR',
                    data=confusion_matrix_BR.confusion_matrix,
                    dtype='uint64')
                res_file.create_dataset(
                    'confusion_matrix_BP',
                    data=confusion_matrix_BP.confusion_matrix,
                    dtype='uint64')
                res_file.create_dataset('n_clusters',
                                        data=n_clusters_meter.value()[0],
                                        dtype='uint64')
                res_file.close()

        return