コード例 #1
0
def save_config(_config, _log, config_filename='config.json'):
    """
    Store the updated configuration in a file.

    By default uses the filename "config.json", but that can be changed by
    setting the config_filename config entry.
    """
    if 'config_filename' in _config:
        del _config['config_filename']
    _log.info('Saving config to "{}"'.format(config_filename))
    save_config_file(flatten(_config), config_filename)
コード例 #2
0
ファイル: commands.py プロジェクト: elanmart/sacred
def save_config(_config, _log, config_filename='config.json'):
    """
    Store the updated configuration in a file.

    By default uses the filename "config.json", but that can be changed by
    setting the config_filename config entry.
    """
    if 'config_filename' in _config:
        del _config['config_filename']
    _log.info('Saving config to "{}"'.format(config_filename))
    save_config_file(flatten(_config), config_filename)
コード例 #3
0
ファイル: commands.py プロジェクト: vishalbelsare/sacred
def save_config(_config, _log, config_filename="config.json"):
    """
    Store the updated configuration in a file.

    By default uses the filename "config.json", but that can be changed by
    setting the config_filename config entry.
    """
    # Copy the config to make it mutable
    _config = copy.deepcopy(_config)
    if "config_filename" in _config:
        del _config["config_filename"]
    _log.info('Saving config to "{}"'.format(config_filename))
    save_config_file(flatten(_config), config_filename)
コード例 #4
0
ファイル: train.py プロジェクト: valeoai/FKAConv
def main(_run, _config):

    print(_config)

    savedir_root = _config['training']['savedir']
    device = torch.device(_config['misc']['device'])

    # save the config file
    os.makedirs(savedir_root, exist_ok=True)
    save_config_file(eval(str(_config)),
                     os.path.join(savedir_root, "config.yaml"))

    # create the path to data
    rootdir = _config['dataset']['dir']

    N_CLASSES = 13

    # create the network
    print("Creating the network...", end="", flush=True)
    if "Fusion" == _config["network"]["model"]:

        def network_function():
            return NetworkFusion(3,
                                 N_CLASSES,
                                 get_conv(_config["network"]["backend_conv"]),
                                 get_search(
                                     _config["network"]["backend_search"]),
                                 config=_config)
    else:

        def network_function():
            return Network(3,
                           N_CLASSES,
                           get_conv(_config["network"]["backend_conv"]),
                           get_search(_config["network"]["backend_search"]),
                           config=_config)

    net = network_function()
    net.to(device)
    print("Done")

    training_transformations_data = [
        lcp_transfo.PillarSelection(_config["dataset"]["pillar_size"]),
        lcp_transfo.RandomSubSample(_config["dataset"]["num_points"])
    ]
    validation_transformations_data = [
        lcp_transfo.PillarSelection(_config["dataset"]["pillar_size"]),
        lcp_transfo.RandomSubSample(_config["dataset"]["num_points"])
    ]

    training_transformations_features = [
        lcp_transfo.ColorJittering(_config["training"]['jitter'])
    ]
    validation_transformations_features = []

    if not _config['training']['rgb']:
        training_transformations_features.append(lcp_transfo.NoColor())
        validation_transformations_features.append(lcp_transfo.NoColor())

    ds = Dataset(rootdir,
                 _config,
                 split='training',
                 network_function=network_function,
                 transformations_data=training_transformations_data,
                 transformations_features=training_transformations_features)
    train_loader = torch.utils.data.DataLoader(
        ds,
        batch_size=_config['training']['batch_size'],
        shuffle=True,
        num_workers=_config['misc']['threads'])

    ds_val = Dataset(
        rootdir,
        _config,
        split='validation',
        network_function=network_function,
        transformations_data=validation_transformations_data,
        transformations_features=validation_transformations_features)
    test_loader = torch.utils.data.DataLoader(
        ds_val,
        batch_size=_config['training']['batch_size'],
        shuffle=False,
        num_workers=_config['misc']['threads'])
    if _config['training']['weights']:
        weights = ds.get_class_weights().to(device)
    else:
        weights = torch.ones_like(ds.get_class_weights()).to(device)
    print("Done")

    print("Creating optimizer...", end="", flush=True)
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=_config['training']['lr_start'])
    print("done")

    # iterate over epochs
    for epoch in range(0, _config['training']['epoch_nbr']):

        #######
        # training
        net.train()

        count = 0

        train_loss = 0
        cm = np.zeros((N_CLASSES, N_CLASSES))
        t = tqdm(train_loader,
                 ncols=100,
                 desc="Epoch {}".format(epoch),
                 disable=_config['misc']['disable_tqdm'])
        for data in t:

            pts = data['pts'].to(device)
            features = data['features'].to(device)
            seg = data['target'].to(device)
            net_ids = data["net_indices"]
            net_pts = data["net_support"]
            for i in range(len(net_ids)):
                net_ids[i] = net_ids[i].to(device)
            for i in range(len(net_pts)):
                net_pts[i] = net_pts[i].to(device)

            optimizer.zero_grad()
            outputs = net(features,
                          pts,
                          indices=net_ids,
                          support_points=net_pts)
            loss = F.cross_entropy(outputs, seg, weight=weights)
            loss.backward()
            optimizer.step()

            output_np = np.argmax(outputs.cpu().detach().numpy(),
                                  axis=1).copy()
            target_np = seg.cpu().numpy().copy()

            cm_ = confusion_matrix(target_np.ravel(),
                                   output_np.ravel(),
                                   labels=list(range(N_CLASSES)))
            cm += cm_

            oa = f"{metrics.stats_overall_accuracy(cm):.5f}"
            aa = f"{metrics.stats_accuracy_per_class(cm)[0]:.5f}"
            iou = f"{metrics.stats_iou_per_class(cm)[0]:.5f}"

            train_loss += loss.detach().cpu().item()

            t.set_postfix(OA=wblue(oa),
                          AA=wblue(aa),
                          IOU=wblue(iou),
                          LOSS=wblue(f"{train_loss/cm.sum():.4e}"))

        ######
        ## validation
        net.eval()
        cm_test = np.zeros((N_CLASSES, N_CLASSES))
        test_loss = 0
        t = tqdm(test_loader,
                 ncols=80,
                 desc="  Test epoch {}".format(epoch),
                 disable=_config['misc']['disable_tqdm'])
        with torch.no_grad():
            for data in t:

                pts = data['pts'].to(device)
                features = data['features'].to(device)
                seg = data['target'].to(device)
                net_ids = data["net_indices"]
                net_pts = data["net_support"]
                for i in range(len(net_ids)):
                    net_ids[i] = net_ids[i].to(device)
                for i in range(len(net_pts)):
                    net_pts[i] = net_pts[i].to(device)

                outputs = net(features,
                              pts,
                              indices=net_ids,
                              support_points=net_pts)
                loss = F.cross_entropy(outputs, seg)

                output_np = np.argmax(outputs.cpu().detach().numpy(),
                                      axis=1).copy()
                target_np = seg.cpu().numpy().copy()

                cm_ = confusion_matrix(target_np.ravel(),
                                       output_np.ravel(),
                                       labels=list(range(N_CLASSES)))
                cm_test += cm_

                oa_val = f"{metrics.stats_overall_accuracy(cm_test):.5f}"
                aa_val = f"{metrics.stats_accuracy_per_class(cm_test)[0]:.5f}"
                iou_val = f"{metrics.stats_iou_per_class(cm_test)[0]:.5f}"

                test_loss += loss.detach().cpu().item()

                t.set_postfix(OA=wgreen(oa_val),
                              AA=wgreen(aa_val),
                              IOU=wgreen(iou_val),
                              LOSS=wgreen(f"{test_loss/cm_test.sum():.4e}"))

        # create the root folder
        os.makedirs(savedir_root, exist_ok=True)

        # save the checkpoint
        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, os.path.join(savedir_root, "checkpoint.pth"))

        # write the logs
        logs = open(os.path.join(savedir_root, "logs.txt"), "a+")
        logs.write(f"{epoch} {oa} {aa} {iou} {oa_val} {aa_val} {iou_val}\n")
        logs.close()

        # log train values
        _run.log_scalar("trainOA", oa, epoch)
        _run.log_scalar("trainAA", aa, epoch)
        _run.log_scalar("trainIoU", iou, epoch)
        _run.log_scalar("testOA", oa_val, epoch)
        _run.log_scalar("testAA", aa_val, epoch)
        _run.log_scalar("testAIoU", iou_val, epoch)
コード例 #5
0
ファイル: train.py プロジェクト: valeoai/LightConvPoint
def main(_run, _config):

    print(_config)

    savedir_root = _config['training']['savedir']
    device = torch.device(_config['misc']['device'])

    # save the config file
    os.makedirs(savedir_root, exist_ok=True)
    save_config_file(eval(str(_config)),
                     os.path.join(savedir_root, "config.yaml"))

    # create the path to data
    rootdir = os.path.join(_config['dataset']['datasetdir'],
                           _config['dataset']['dataset'])

    N_CLASSES = 13

    # create the network
    print("Creating the network...", end="", flush=True)

    def network_function():
        return get_network(_config["network"]["model"],
                           in_channels=3,
                           out_channels=N_CLASSES,
                           backend_conv=_config["network"]["backend_conv"],
                           backend_search=_config["network"]["backend_search"],
                           config=_config)

    net = network_function()
    net.to(device)
    print("Done")

    # create the filelits (train / val) according to area
    print("Create filelist...", end="")
    filelist_train = []
    filelist_test = []
    for area_idx in range(1, 7):
        folder = os.path.join(rootdir, f"Area_{area_idx}")
        datasets = [
            os.path.join(f"Area_{area_idx}", dataset)
            for dataset in os.listdir(folder)
        ]
        if area_idx == _config['dataset']['area']:
            filelist_test = filelist_test + datasets
        else:
            filelist_train = filelist_train + datasets
    filelist_train.sort()
    filelist_test.sort()
    print(
        f"done, {len(filelist_train)} train files, {len(filelist_test)} test files"
    )

    print("Creating dataloader and optimizer...", end="", flush=True)
    ds = Dataset(filelist_train,
                 rootdir,
                 training=True,
                 block_size=_config['dataset']['pillar_size'],
                 npoints=_config['dataset']['npoints'],
                 iteration_number=_config['training']['batchsize'] *
                 _config['training']['epoch_iter'],
                 jitter=_config['training']['jitter'],
                 scaling_param=_config['training']['scaling_param'],
                 rgb_dropout=_config['training']['rgb_dropout'],
                 rgb=_config['training']['rgb'],
                 network_function=network_function)
    train_loader = torch.utils.data.DataLoader(
        ds,
        batch_size=_config['training']['batchsize'],
        shuffle=True,
        num_workers=_config['misc']['threads'])

    ds_val = Dataset(filelist_test,
                     rootdir,
                     training=False,
                     block_size=_config['dataset']['pillar_size'],
                     npoints=_config['dataset']['npoints'],
                     iteration_number=_config['training']['batchsize'] * 100,
                     rgb=_config['training']['rgb'],
                     network_function=network_function)
    test_loader = torch.utils.data.DataLoader(
        ds_val,
        batch_size=_config['training']['batchsize'],
        shuffle=False,
        num_workers=_config['misc']['threads'])
    print("Done")

    print("Creating optimizer...", end="", flush=True)
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=_config['training']['lr_start'])
    print("done")

    print("Weights")
    if _config['training']['weights']:  # computed on the train set
        if _config['area'] == 1:
            weights = torch.Tensor([
                0.7615, 0.3969, 0.4546, 0.2727, 6.7376, 4.1650, 1.6270, 3.2547,
                2.3042, 2.1289, 17.7709, 1.1333, 6.7996
            ])
        elif _config['area'] == 2:
            weights = torch.Tensor([
                0.7366, 0.4071, 0.4866, 0.2736, 4.0031, 3.3682, 1.6507, 2.5912,
                2.0347, 3.0115, 17.2155, 1.1268, 5.9607
            ])
        elif _config['area'] == 3:
            weights = torch.Tensor([
                0.7499, 0.3991, 0.4636, 0.2758, 4.4585, 3.7786, 1.6039, 2.9821,
                2.2443, 2.1931, 20.1374, 1.2197, 6.2980
            ])
        elif _config['area'] == 4:
            weights = torch.Tensor([
                0.7543, 0.3921, 0.4622, 0.2818, 3.8026, 3.8313, 1.7192, 3.0418,
                2.1892, 2.1827, 19.7227, 1.2032, 5.5455
            ])
        elif _config['area'] == 5:
            weights = torch.Tensor([
                0.7045, 0.4006, 0.4644, 0.2815, 3.1686, 3.6080, 1.4001, 3.6230,
                2.3671, 1.8859, 15.7542, 1.6276, 6.0848
            ])
        elif _config['area'] == 6:
            weights = torch.Tensor([
                0.7508, 0.3955, 0.4576, 0.2720, 5.9368, 4.1264, 1.6474, 3.0501,
                2.5304, 2.2307, 18.0194, 1.1336, 6.5966
            ])
        else:
            raise Exception('Unknown area')
    else:
        weights = torch.ones(N_CLASSES).float()
    weights = weights.to(device)
    print("Done")

    # iterate over epochs
    for epoch in range(0, _config['training']['epoch_nbr']):

        #######
        # training
        net.train()

        count = 0

        train_loss = 0
        cm = np.zeros((N_CLASSES, N_CLASSES))
        t = tqdm(train_loader,
                 ncols=100,
                 desc="Epoch {}".format(epoch),
                 disable=_config['misc']['disable_tqdm'])
        for data in t:

            pts = data['pts'].to(device)
            features = data['features'].to(device)
            seg = data['target'].to(device)
            net_ids = data["net_indices"]
            net_pts = data["net_support"]
            for i in range(len(net_ids)):
                net_ids[i] = net_ids[i].to(device)
            for i in range(len(net_pts)):
                net_pts[i] = net_pts[i].to(device)

            optimizer.zero_grad()
            outputs = net(features,
                          pts,
                          indices=net_ids,
                          support_points=net_pts)
            loss = F.cross_entropy(outputs, seg, weight=weights)
            loss.backward()
            optimizer.step()

            output_np = np.argmax(outputs.cpu().detach().numpy(),
                                  axis=1).copy()
            target_np = seg.cpu().numpy().copy()

            cm_ = confusion_matrix(target_np.ravel(),
                                   output_np.ravel(),
                                   labels=list(range(N_CLASSES)))
            cm += cm_

            oa = f"{metrics.stats_overall_accuracy(cm):.5f}"
            aa = f"{metrics.stats_accuracy_per_class(cm)[0]:.5f}"
            iou = f"{metrics.stats_iou_per_class(cm)[0]:.5f}"

            train_loss += loss.detach().cpu().item()

            t.set_postfix(OA=wblue(oa),
                          AA=wblue(aa),
                          IOU=wblue(iou),
                          LOSS=wblue(f"{train_loss/cm.sum():.4e}"))

        ######
        ## validation
        net.eval()
        cm_test = np.zeros((N_CLASSES, N_CLASSES))
        test_loss = 0
        t = tqdm(test_loader,
                 ncols=80,
                 desc="  Test epoch {}".format(epoch),
                 disable=_config['misc']['disable_tqdm'])
        with torch.no_grad():
            for data in t:

                pts = data['pts'].to(device)
                features = data['features'].to(device)
                seg = data['target'].to(device)
                net_ids = data["net_indices"]
                net_pts = data["net_support"]
                for i in range(len(net_ids)):
                    net_ids[i] = net_ids[i].to(device)
                for i in range(len(net_pts)):
                    net_pts[i] = net_pts[i].to(device)

                outputs = net(features,
                              pts,
                              indices=net_ids,
                              support_points=net_pts)
                loss = F.cross_entropy(outputs, seg)

                output_np = np.argmax(outputs.cpu().detach().numpy(),
                                      axis=1).copy()
                target_np = seg.cpu().numpy().copy()

                cm_ = confusion_matrix(target_np.ravel(),
                                       output_np.ravel(),
                                       labels=list(range(N_CLASSES)))
                cm_test += cm_

                oa_val = f"{metrics.stats_overall_accuracy(cm_test):.5f}"
                aa_val = f"{metrics.stats_accuracy_per_class(cm_test)[0]:.5f}"
                iou_val = f"{metrics.stats_iou_per_class(cm_test)[0]:.5f}"

                test_loss += loss.detach().cpu().item()

                t.set_postfix(OA=wgreen(oa_val),
                              AA=wgreen(aa_val),
                              IOU=wgreen(iou_val),
                              LOSS=wgreen(f"{test_loss/cm_test.sum():.4e}"))

        # create the root folder
        os.makedirs(savedir_root, exist_ok=True)

        # save the checkpoint
        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, os.path.join(savedir_root, "checkpoint.pth"))

        # write the logs
        logs = open(os.path.join(savedir_root, "logs.txt"), "a+")
        logs.write(f"{epoch} {oa} {aa} {iou} {oa_val} {aa_val} {iou_val}\n")
        logs.close()

        # log train values
        _run.log_scalar("trainOA", oa, epoch)
        _run.log_scalar("trainAA", aa, epoch)
        _run.log_scalar("trainIoU", iou, epoch)
        _run.log_scalar("testOA", oa_val, epoch)
        _run.log_scalar("testAA", aa_val, epoch)
        _run.log_scalar("testAIoU", iou_val, epoch)
コード例 #6
0
ファイル: train.py プロジェクト: valeoai/FKAConv
def main(_run, _config):

    print(_config)

    savedir_root = _config["training"]["savedir"]
    device = torch.device(_config["misc"]["device"])

    # save the config file
    os.makedirs(savedir_root, exist_ok=True)
    save_config_file(eval(str(_config)), os.path.join(savedir_root, "config.yaml"))

    print("get the data path...", end="", flush=True)
    rootdir = _config["dataset"]["dir"]
    print("done")

    N_CLASSES = 50

    print("Creating network...", end="", flush=True)

    def network_function():
        return Network(
            1, N_CLASSES,
            get_conv(_config["network"]["backend_conv"]),
            get_search(_config["network"]["backend_search"]),
        )

    net = network_function()
    net.to(device)
    network_parameters = count_parameters(net)
    print("parameters", network_parameters)

    training_transformations = [
        lcp_transfo.UnitBallNormalize(),
        lcp_transfo.RandomSubSample(_config["dataset"]["npoints"]),
        lcp_transfo.NormalPerturbation(sigma=0.001)
    ]
    test_transformations = [
        lcp_transfo.UnitBallNormalize(),
        lcp_transfo.RandomSubSample(_config["dataset"]["npoints"]),
    ]

    print("Creating dataloader...", end="", flush=True)
    ds = Dataset(
        rootdir,
        'training',
        network_function=network_function,
        transformations_points=training_transformations
    )
    train_loader = torch.utils.data.DataLoader(
        ds,
        batch_size=_config["training"]["batchsize"],
        shuffle=True,
        num_workers=_config["misc"]["threads"],
    )
    ds_test = Dataset(
        rootdir,
        'test',
        network_function=network_function,
        transformations_points=test_transformations
    )
    test_loader = torch.utils.data.DataLoader(
        ds_test,
        batch_size=_config["training"]["batchsize"],
        shuffle=False,
        num_workers=_config["misc"]["threads"],
    )
    print("Done")


    # define weights
    print("Computing weights...", end="", flush=True)
    weights = torch.from_numpy(ds.get_weights()).float().to(device)
    print("Done")

    print("Creating optimizer...", end="", flush=True)
    optimizer = torch.optim.Adam(net.parameters(), lr=_config["training"]["lr_start"], eps=1e-3)
    epoch_start = 0
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        _config["training"]["milestones"],
        gamma=_config["training"]["gamma"],
        last_epoch=epoch_start - 1,
    )
    print("Done")


    def get_data(data):

        pts = data["pts"].to(device)
        features = data["features"].to(device)
        seg = data["seg"].to(device)
        labels = data["label"]
        net_ids = data["net_indices"]
        net_pts = data["net_support"]
        for i in range(len(net_ids)):
            net_ids[i] = net_ids[i].to(device)
        for i in range(len(net_pts)):
            net_pts[i] = net_pts[i].to(device)

        return pts, features, seg, labels, net_ids, net_pts


    # create the log file
    for epoch in range(epoch_start, _config["training"]["epoch_nbr"]):

        # train
        net.train()
        cm = np.zeros((N_CLASSES, N_CLASSES))
        t = tqdm(
            train_loader,
            ncols=120,
            desc=f"Epoch {epoch}",
            disable=_config["misc"]["disable_tqdm"],
        )
        for data in t:

            pts, features, seg, labels, net_ids, net_pts = get_data(data)

            optimizer.zero_grad()
            outputs = net(features, pts, support_points=net_pts, indices=net_ids)
            loss = F.cross_entropy(outputs, seg, weight=weights)
            loss.backward()
            optimizer.step()

            outputs_np = outputs.cpu().detach().numpy()
            for i in range(pts.size(0)):
                # get the number of part for the shape
                object_label = labels[i]
                part_start, part_end = ds.category_range[object_label]

                outputs_np[i, :part_start] = -1e7
                outputs_np[i, part_end:] = -1e7

            output_np = np.argmax(outputs_np, axis=1).copy()
            target_np = seg.cpu().numpy().copy()

            cm_ = confusion_matrix(
                target_np.ravel(), output_np.ravel(), labels=list(range(N_CLASSES))
            )
            cm += cm_

            oa = "{:.3f}".format(metrics.stats_overall_accuracy(cm))
            aa = "{:.3f}".format(metrics.stats_accuracy_per_class(cm)[0])
            iou = "{:.3f}".format(metrics.stats_iou_per_class(cm)[0])

            t.set_postfix(OA=oa, AA=aa, IOU=iou)

        # eval (this is not the final evaluation, see dedicated evaluation)
        net.eval()
        with torch.no_grad():
            cm = np.zeros((N_CLASSES, N_CLASSES))
            t = tqdm(
                test_loader,
                ncols=120,
                desc=f"Test {epoch}",
                disable=_config["misc"]["disable_tqdm"],
            )
            for data in t:

                pts, features, seg, labels, net_ids, net_pts = get_data(data)

                outputs = net(features, pts, support_points=net_pts, indices=net_ids)
                loss = 0

                for i in range(pts.size(0)):
                    # get the number of part for the shape
                    object_label = labels[i]
                    part_start, part_end = ds_test.category_range[object_label]

                    outputs_ = (outputs[i, part_start:part_end]).unsqueeze(0)
                    seg_ = (seg[i] - part_start).unsqueeze(0)

                    loss = loss + weights[object_label] * F.cross_entropy(
                        outputs_, seg_
                    )

                outputs_np = outputs.cpu().detach().numpy()
                for i in range(pts.size(0)):
                    # get the number of part for the shape
                    object_label = labels[i]
                    part_start, part_end = ds_test.category_range[object_label]

                    outputs_np[i, :part_start] = -1e7
                    outputs_np[i, part_end:] = -1e7

                output_np = np.argmax(outputs_np, axis=1).copy()
                target_np = seg.cpu().numpy().copy()

                cm_ = confusion_matrix(
                    target_np.ravel(), output_np.ravel(), labels=list(range(N_CLASSES))
                )
                cm += cm_

                oa_test = "{:.3f}".format(metrics.stats_overall_accuracy(cm))
                aa_test = "{:.3f}".format(metrics.stats_accuracy_per_class(cm)[0])
                iou_test = "{:.3f}".format(metrics.stats_iou_per_class(cm)[0])

                t.set_postfix(OA=oa_test, AA=aa_test, IOU=iou_test)

        # scheduler update
        scheduler.step()

        # save the model
        os.makedirs(savedir_root, exist_ok=True)
        torch.save(
            {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "optimizer": optimizer.state_dict(),
            },
            os.path.join(savedir_root, "checkpoint.pth"),
        )

        # write the logs
        logs = open(os.path.join(savedir_root, "log.txt"), "a+")
        logs.write(f"{epoch} {oa} {aa} {iou} {oa_test} {aa_test} {iou_test} \n")
        logs.close()

        _run.log_scalar("trainOA", oa, epoch)
        _run.log_scalar("trainAA", aa, epoch)
        _run.log_scalar("trainIoU", iou, epoch)
        _run.log_scalar("testOA", oa_test, epoch)
        _run.log_scalar("testAA", aa_test, epoch)
        _run.log_scalar("testIoU", iou_test, epoch)

    logs.close()
コード例 #7
0
ファイル: train.py プロジェクト: valeoai/LightConvPoint
def main(_run, _config):

    print(_config)
    savedir_root = _config["training"]["savedir"]
    device = torch.device(_config["misc"]["device"])

    # save the config file in the directory to restore the configuration
    os.makedirs(savedir_root, exist_ok=True)
    save_config_file(eval(str(_config)), os.path.join(savedir_root, "config.yaml"))

    # parameters for training
    N_LABELS = 40
    input_channels = 1

    print("Creating network...", end="", flush=True)

    def network_function():
        return get_network(
            _config["network"]["model"],
            input_channels,
            N_LABELS,
            _config["network"]["backend_conv"],
            _config["network"]["backend_search"],
        )

    net = network_function()
    net.to(device)
    print("Number of parameters", count_parameters(net))

    print("get the data path...", end="", flush=True)
    rootdir = os.path.join(_config["dataset"]["dir"])
    print("done")

    training_transformations = [
        lcp_transfo.UnitBallNormalize(),
        lcp_transfo.RandomSubSample(_config["dataset"]["npoints"]),
        lcp_transfo.NormalPerturbation(sigma=0.01)
    ]
    test_transformations = [
        lcp_transfo.UnitBallNormalize(),
        lcp_transfo.RandomSubSample(_config["dataset"]["npoints"]),
    ]


    print("Creating dataloaders...", end="", flush=True)
    if _config['dataset']['name'] == "Modelnet40_normal_resampled":
        Dataset = Modelnet40_normal_resampled
    elif _config['dataset']['name'] == "Modelnet40_ply_hdf5_2048":
        Dataset = Modelnet40_ply_hdf5_2048
    ds = Dataset(
        rootdir,
        split='training',
        network_function=network_function,
        transformations_points=training_transformations,
    )
    train_loader = torch.utils.data.DataLoader(
        ds,
        batch_size=_config["training"]["batchsize"],
        shuffle=True,
        num_workers=_config["misc"]["threads"],
    )
    ds_test = Dataset(
        rootdir,
        split='test',
        network_function=network_function,
        transformations_points=test_transformations,
    )
    test_loader = torch.utils.data.DataLoader(
        ds_test,
        batch_size=_config["training"]["batchsize"],
        shuffle=False,
        num_workers=_config["misc"]["threads"],
    )
    print("done")

    print("Creating optimizer...", end="")
    optimizer = torch.optim.Adam(net.parameters(), lr=_config["training"]["lr_start"])
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, _config["training"]["milestones"], gamma=0.5
    )
    print("done")

    def get_data(data):
        
        pts = data["pts"]
        features = data["features"]
        targets = data["target"]
        net_ids = data["net_indices"]
        net_support = data["net_support"]

        features = features.to(device)
        pts = pts.to(device)
        targets = targets.to(device)
        for i in range(len(net_ids)):
            net_ids[i] = net_ids[i].to(device)
        for i in range(len(net_support)):
            net_support[i] = net_support[i].to(device)

        return pts, features, targets, net_ids, net_support

    for epoch in range(_config["training"]["epoch_nbr"]):

        net.train()
        error = 0
        cm = np.zeros((N_LABELS, N_LABELS))

        train_aloss = "0"
        train_oa = "0"
        train_aa = "0"
        train_aiou = "0"

        t = tqdm(
            train_loader,
            desc="Epoch " + str(epoch),
            ncols=130,
            disable=_config["misc"]["disable_tqdm"],
        )
        for data in t:

            pts, features, targets, net_ids, net_support = get_data(data)

            optimizer.zero_grad()
            outputs = net(features, pts, support_points=net_support, indices=net_ids)
            loss = F.cross_entropy(outputs, targets)
            loss.backward()
            optimizer.step()

            # compute scores
            output_np = np.argmax(outputs.cpu().detach().numpy(), axis=1)
            target_np = targets.cpu().numpy()
            cm_ = confusion_matrix(
                target_np.ravel(), output_np.ravel(), labels=list(range(N_LABELS))
            )
            cm += cm_
            error += loss.item()

            # point wise scores on training
            train_oa = "{:.5f}".format(metrics.stats_overall_accuracy(cm))
            train_aa = "{:.5f}".format(metrics.stats_accuracy_per_class(cm)[0])
            train_aiou = "{:.5f}".format(metrics.stats_iou_per_class(cm)[0])
            train_aloss = "{:.5e}".format(error / cm.sum())

            t.set_postfix(OA=train_oa, AA=train_aa, AIOU=train_aiou, ALoss=train_aloss)

        net.eval()
        error = 0
        cm = np.zeros((N_LABELS, N_LABELS))
        test_aloss = "0"
        test_oa = "0"
        test_aa = "0"
        test_aiou = "0"
        with torch.no_grad():

            t = tqdm(
                test_loader,
                desc="  Test " + str(epoch),
                ncols=100,
                disable=_config["misc"]["disable_tqdm"],
            )
            for data in t:

                pts, features, targets, net_ids, net_support = get_data(data)

                outputs = net(
                    features, pts, support_points=net_support, indices=net_ids
                )
                loss = F.cross_entropy(outputs, targets)

                outputs_np = outputs.cpu().detach().numpy()
                pred_labels = np.argmax(outputs_np, axis=1)
                cm_ = confusion_matrix(
                    targets.cpu().numpy(), pred_labels, labels=list(range(N_LABELS))
                )
                cm += cm_
                error += loss.item()

                # point-wise scores on testing
                test_oa = "{:.5f}".format(metrics.stats_overall_accuracy(cm))
                test_aa = "{:.5f}".format(metrics.stats_accuracy_per_class(cm)[0])
                test_aiou = "{:.5f}".format(metrics.stats_iou_per_class(cm)[0])
                test_aloss = "{:.5e}".format(error / cm.sum())

                t.set_postfix(OA=test_oa, AA=test_aa, AIOU=test_aiou, ALoss=test_aloss)

        scheduler.step()

        # create the root folder
        os.makedirs(savedir_root, exist_ok=True)

        # save the checkpoint
        torch.save(
            {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "optimizer": optimizer.state_dict(),
            },
            os.path.join(savedir_root, "checkpoint.pth"),
        )

        # write the logs
        logs = open(os.path.join(savedir_root, "logs.txt"), "a+")
        logs.write(str(epoch) + " ")
        logs.write(train_aloss + " ")
        logs.write(train_oa + " ")
        logs.write(train_aa + " ")
        logs.write(train_aiou + " ")
        logs.write(test_aloss + " ")
        logs.write(test_oa + " ")
        logs.write(test_aa + " ")
        logs.write(test_aiou + "\n")
        logs.flush()
        logs.close()

        # log for Sacred
        _run.log_scalar("trainOA", train_oa, epoch)
        _run.log_scalar("trainAA", train_aa, epoch)
        _run.log_scalar("trainAIoU", train_aiou, epoch)
        _run.log_scalar("trainLoss", train_aloss, epoch)
        _run.log_scalar("testOA", test_oa, epoch)
        _run.log_scalar("testAA", test_aa, epoch)
        _run.log_scalar("testAIoU", test_aiou, epoch)
        _run.log_scalar("testLoss", test_aloss, epoch)
コード例 #8
0
ファイル: train.py プロジェクト: valeoai/LightConvPoint
def main(_run, _config):

    print(_config)

    savedir_root = _config['training']['savedir']
    device = torch.device(_config['misc']['device'])

    # save the config file
    os.makedirs(savedir_root, exist_ok=True)
    save_config_file(eval(str(_config)), os.path.join(
        savedir_root, "config.yaml"))
    
    # create the path to data
    rootdir = os.path.join(_config['dataset']['datasetdir'], _config['dataset']['dataset'], 'train_voxel_0_05m/pointcloud')

    N_CLASSES = 8

    # create the network
    print("Creating the network...", end="", flush=True)
    def network_function():
        return get_network(
            _config["network"]["model"],
            in_channels=3,
            out_channels=N_CLASSES,
            backend_conv=_config["network"]["backend_conv"],
            backend_search=_config["network"]["backend_search"],
            config=_config
        )
    net = network_function()
    net.to(device)
    print("Done")

    filelist_train=[
            "bildstein_station1_xyz_intensity_rgb_voxels.npy",
            "bildstein_station3_xyz_intensity_rgb_voxels.npy",
            "bildstein_station5_xyz_intensity_rgb_voxels.npy",
            "domfountain_station1_xyz_intensity_rgb_voxels.npy",
            "domfountain_station2_xyz_intensity_rgb_voxels.npy",
            "domfountain_station3_xyz_intensity_rgb_voxels.npy",
            "neugasse_station1_xyz_intensity_rgb_voxels.npy",
            "sg27_station1_intensity_rgb_voxels.npy",
            "sg27_station2_intensity_rgb_voxels.npy",
            "sg27_station4_intensity_rgb_voxels.npy",
            "sg27_station5_intensity_rgb_voxels.npy",
            "sg27_station9_intensity_rgb_voxels.npy",
            "sg28_station4_intensity_rgb_voxels.npy",
            "untermaederbrunnen_station1_xyz_intensity_rgb_voxels.npy",
            "untermaederbrunnen_station3_xyz_intensity_rgb_voxels.npy",
        ]
    filelist_val=[]

    print("Creating dataloader and optimizer...", end="", flush=True)
    ds = Dataset(filelist_train, rootdir,
                             training=True, block_size=_config['dataset']['pillar_size'],
                             npoints=_config['dataset']['npoints'],
                             iteration_number=_config['training']['batchsize']*_config['training']['epoch_iter'],
                             jitter=_config['training']['jitter'],
                             rgb_dropout=_config['training']['rgb_dropout'], 
                             rgb=_config['training']['rgb'], network_function=network_function)
    train_loader = torch.utils.data.DataLoader(ds, batch_size=_config['training']['batchsize'], shuffle=True,
                                        num_workers=_config['misc']['threads']
                                        )

    if len(filelist_val) > 0:
        ds_val = Dataset(filelist_test, rootdir,
                                training=False, block_size=_config['dataset']['pillar_size'],
                                npoints=_config['dataset']['npoints'],
                                iteration_number=_config['training']['batchsize']*100,
                                rgb=_config['training']['rgb'],
                                network_function=network_function)
        test_loader = torch.utils.data.DataLoader(ds_val, batch_size=_config['training']['batchsize'], shuffle=False,
                                            num_workers=_config['misc']['threads']
                                            )
    print("Done")


    print("Creating optimizer...", end="", flush=True)
    optimizer = torch.optim.Adam(net.parameters(), lr=_config['training']['lr_start'])
    print("done")


    print("Weights")
    if _config['training']['weights']: # computed on the train set
        weights = torch.Tensor([ 0.7772,  0.7216,  0.4977,  2.9913,  0.3884,  4.2342,  9.2966, 15.1820])
    else:
        weights = torch.ones(N_CLASSES).float()
    weights=weights.to(device)
    print("Done")


    # iterate over epochs
    for epoch in range(0, _config['training']['epoch_nbr']):

        #######
        # training
        net.train()

        count=0

        train_loss = 0
        cm = np.zeros((N_CLASSES, N_CLASSES))
        t = tqdm(train_loader, ncols=100, desc="Epoch {}".format(epoch), disable=_config['misc']['disable_tqdm'])
        for data in t:

            pts = data['pts'].to(device)
            features = data['features'].to(device)
            seg = data['target'].to(device)
            net_ids = data["net_indices"]
            net_pts = data["net_support"]
            for i in range(len(net_ids)):
                net_ids[i] = net_ids[i].to(device)
            for i in range(len(net_pts)):
                net_pts[i] = net_pts[i].to(device)

            optimizer.zero_grad()
            outputs = net(features, pts, indices=net_ids, support_points=net_pts)
            loss =  F.cross_entropy(outputs, seg, weight=weights)
            loss.backward()
            optimizer.step()

            output_np = np.argmax(outputs.cpu().detach().numpy(), axis=1).copy()
            target_np = seg.cpu().numpy().copy()

            cm_ = confusion_matrix(target_np.ravel(), output_np.ravel(), labels=list(range(N_CLASSES)))
            cm += cm_

            oa = f"{metrics.stats_overall_accuracy(cm):.5f}"
            aa = f"{metrics.stats_accuracy_per_class(cm)[0]:.5f}"
            iou = f"{metrics.stats_iou_per_class(cm)[0]:.5f}"

            train_loss += loss.detach().cpu().item()

            t.set_postfix(OA=wblue(oa), AA=wblue(aa), IOU=wblue(iou), LOSS=wblue(f"{train_loss/cm.sum():.4e}"))

        ######
        ## validation
        if len(filelist_val) > 0:
            net.eval()
            cm_test = np.zeros((N_CLASSES, N_CLASSES))
            test_loss = 0
            t = tqdm(test_loader, ncols=80, desc="  Test epoch {}".format(epoch), disable=_config['misc']['disable_tqdm'])
            with torch.no_grad():
                for data in t:

                    pts = data['pts'].to(device)
                    features = data['features'].to(device)
                    seg = data['target'].to(device)
                    net_ids = data["net_indices"]
                    net_pts = data["net_support"]
                    for i in range(len(net_ids)):
                        net_ids[i] = net_ids[i].to(device)
                    for i in range(len(net_pts)):
                        net_pts[i] = net_pts[i].to(device)
                    
                    outputs = net(features, pts, indices=net_ids, support_points=net_pts)
                    loss =  F.cross_entropy(outputs, seg)

                    output_np = np.argmax(outputs.cpu().detach().numpy(), axis=1).copy()
                    target_np = seg.cpu().numpy().copy()

                    cm_ = confusion_matrix(target_np.ravel(), output_np.ravel(), labels=list(range(N_CLASSES)))
                    cm_test += cm_

                    oa_val = f"{metrics.stats_overall_accuracy(cm_test):.5f}"
                    aa_val = f"{metrics.stats_accuracy_per_class(cm_test)[0]:.5f}"
                    iou_val = f"{metrics.stats_iou_per_class(cm_test)[0]:.5f}"

                    test_loss += loss.detach().cpu().item()

                    t.set_postfix(OA=wgreen(oa_val), AA=wgreen(aa_val), IOU=wgreen(iou_val), LOSS=wgreen(f"{test_loss/cm_test.sum():.4e}"))

        # create the root folder
        os.makedirs(savedir_root, exist_ok=True)

        # save the checkpoint
        torch.save({
            'epoch': epoch + 1,
            'state_dict': net.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, os.path.join(savedir_root, "checkpoint.pth"))

        # write the logs
        logs = open(os.path.join(savedir_root, "logs.txt"), "a+")
        logs.write(f"{epoch} {oa} {aa} {iou}")
        if len(filelist_val)>0:
            logs.write(" {oa_val} {aa_val} {iou_val}")
        logs.write("\n")
        logs.close()

        # log train values
        _run.log_scalar("trainOA", oa, epoch)
        _run.log_scalar("trainAA", aa, epoch)
        _run.log_scalar("trainIoU", iou, epoch)
        if len(filelist_val) > 0:
            _run.log_scalar("testOA", oa_val, epoch)
            _run.log_scalar("testAA", aa_val, epoch)
            _run.log_scalar("testAIoU", iou_val, epoch)
コード例 #9
0
def main(_run, _config):

    print(_config)

    savedir_root = _config["training"]["savedir"]
    device = torch.device(_config["misc"]["device"])

    # save the config file
    os.makedirs(savedir_root, exist_ok=True)
    save_config_file(eval(str(_config)), os.path.join(savedir_root, "config.yaml"))

    print("get the data path...", end="", flush=True)
    rootdir = os.path.join(_config["dataset"]["datasetdir"], _config["dataset"]["dataset"])
    print("done")

    filelist_train = os.path.join(rootdir, "train_files.txt")
    filelist_val = os.path.join(rootdir, "val_files.txt")
    filelist_test = os.path.join(rootdir, "test_files.txt")

    N_CLASSES = 50

    shapenet_labels = [
        ["Airplane", 4],
        ["Bag", 2],
        ["Cap", 2],
        ["Car", 4],
        ["Chair", 4],
        ["Earphone", 3],
        ["Guitar", 3],
        ["Knife", 2],
        ["Lamp", 4],
        ["Laptop", 2],
        ["Motorbike", 6],
        ["Mug", 2],
        ["Pistol", 3],
        ["Rocket", 3],
        ["Skateboard", 3],
        ["Table", 3],
    ]
    category_range = []
    count = 0
    for element in shapenet_labels:
        part_start = count
        count += element[1]
        part_end = count
        category_range.append([part_start, part_end])

    # Prepare inputs
    print("Preparing datasets...", end="", flush=True)
    (
        data_train,
        labels_shape_train,
        data_num_train,
        labels_pts_train,
        _,
    ) = data_utils.load_seg(filelist_train)
    data_val, labels_shape_val, data_num_val, labels_pts_val, _ = data_utils.load_seg(
        filelist_val
    )
    (
        data_test,
        labels_shape_test,
        data_num_test,
        labels_pts_test,
        _,
    ) = data_utils.load_seg(filelist_test)
    data_train = np.concatenate([data_train, data_val], axis=0)
    labels_shape_train = np.concatenate([labels_shape_train, labels_shape_val], axis=0)
    data_num_train = np.concatenate([data_num_train, data_num_val], axis=0)
    labels_pts_train = np.concatenate([labels_pts_train, labels_pts_val], axis=0)
    print("Done", data_train.shape)

    # define weights
    print("Computing weights...", end="", flush=True)
    frequences = [0 for i in range(len(shapenet_labels))]
    for i in range(len(shapenet_labels)):
        frequences[i] += (labels_shape_train == i).sum()
    for i in range(len(shapenet_labels)):
        frequences[i] /= shapenet_labels[i][1]
    frequences = np.array(frequences)
    frequences = frequences.mean() / frequences
    repeat_factor = [sh[1] for sh in shapenet_labels]
    frequences = np.repeat(frequences, repeat_factor)
    weights = torch.from_numpy(frequences).float().to(device)
    print("Done")

    print("Creating network...", end="", flush=True)

    def network_function():
        return get_network(
            _config["network"]["model"],
            in_channels=1,
            out_channels=N_CLASSES,
            backend_conv=_config["network"]["backend_conv"],
            backend_search=_config["network"]["backend_search"],
        )

    net = network_function()
    net.to(device)
    network_parameters = count_parameters(net)
    print("parameters", network_parameters)

    print("Creating dataloader...", end="", flush=True)
    ds = Dataset(
        data_train,
        data_num_train,
        labels_pts_train,
        labels_shape_train,
        npoints=_config["dataset"]["npoints"],
        training=True,
        network_function=network_function,
    )
    train_loader = torch.utils.data.DataLoader(
        ds,
        batch_size=_config["training"]["batchsize"],
        shuffle=True,
        num_workers=_config["misc"]["threads"],
    )
    ds_test = Dataset(
        data_test,
        data_num_test,
        labels_pts_test,
        labels_shape_test,
        npoints=_config["dataset"]["npoints"],
        training=False,
        network_function=network_function,
    )
    test_loader = torch.utils.data.DataLoader(
        ds_test,
        batch_size=_config["training"]["batchsize"],
        shuffle=False,
        num_workers=_config["misc"]["threads"],
    )
    print("Done")

    print("Creating optimizer...", end="", flush=True)
    optimizer = torch.optim.Adam(net.parameters(), lr=_config["training"]["lr_start"], eps=1e-3)
    epoch_start = 0
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        _config["training"]["milestones"],
        gamma=_config["training"]["gamma"],
        last_epoch=epoch_start - 1,
    )
    print("Done")

    # create the log file
    for epoch in range(epoch_start, _config["training"]["epoch_nbr"]):

        # train
        net.train()
        cm = np.zeros((N_CLASSES, N_CLASSES))
        t = tqdm(
            train_loader,
            ncols=120,
            desc=f"Epoch {epoch}",
            disable=_config["misc"]["disable_tqdm"],
        )
        for data in t:

            pts = data["pts"].to(device)
            features = data["features"].to(device)
            seg = data["seg"].to(device)
            labels = data["label"]
            net_ids = data["net_indices"]
            net_pts = data["net_support"]
            for i in range(len(net_ids)):
                net_ids[i] = net_ids[i].to(device)
            for i in range(len(net_pts)):
                net_pts[i] = net_pts[i].to(device)

            optimizer.zero_grad()
            outputs = net(features, pts, support_points=net_pts, indices=net_ids)
            loss = F.cross_entropy(outputs, seg, weight=weights)

            loss.backward()
            optimizer.step()

            outputs_np = outputs.cpu().detach().numpy()
            for i in range(pts.size(0)):
                # get the number of part for the shape
                object_label = labels[i]
                part_start, part_end = category_range[object_label]

                outputs_np[i, :part_start] = -1e7
                outputs_np[i, part_end:] = -1e7

            output_np = np.argmax(outputs_np, axis=1).copy()
            target_np = seg.cpu().numpy().copy()

            cm_ = confusion_matrix(
                target_np.ravel(), output_np.ravel(), labels=list(range(N_CLASSES))
            )
            cm += cm_

            oa = "{:.3f}".format(metrics.stats_overall_accuracy(cm))
            aa = "{:.3f}".format(metrics.stats_accuracy_per_class(cm)[0])
            iou = "{:.3f}".format(metrics.stats_iou_per_class(cm)[0])

            t.set_postfix(OA=oa, AA=aa, IOU=iou)

        # eval (this is not the final evaluation, see dedicated evaluation)
        net.eval()
        with torch.no_grad():
            cm = np.zeros((N_CLASSES, N_CLASSES))
            t = tqdm(
                test_loader,
                ncols=120,
                desc=f"Test {epoch}",
                disable=_config["misc"]["disable_tqdm"],
            )
            for data in t:
                pts = data["pts"].to(device)
                features = data["features"].to(device)
                seg = data["seg"].to(device)
                labels = data["label"]
                net_ids = data["net_indices"]
                net_pts = data["net_support"]
                for i in range(len(net_ids)):
                    net_ids[i] = net_ids[i].to(device)
                for i in range(len(net_pts)):
                    net_pts[i] = net_pts[i].to(device)

                outputs = net(features, pts, support_points=net_pts, indices=net_ids)
                loss = 0

                for i in range(pts.size(0)):
                    # get the number of part for the shape
                    object_label = labels[i]
                    part_start, part_end = category_range[object_label]

                    outputs_ = (outputs[i, part_start:part_end]).unsqueeze(0)
                    seg_ = (seg[i] - part_start).unsqueeze(0)

                    loss = loss + weights[object_label] * F.cross_entropy(
                        outputs_, seg_
                    )

                outputs_np = outputs.cpu().detach().numpy()
                for i in range(pts.size(0)):
                    # get the number of part for the shape
                    object_label = labels[i]
                    part_start, part_end = category_range[object_label]

                    outputs_np[i, :part_start] = -1e7
                    outputs_np[i, part_end:] = -1e7

                output_np = np.argmax(outputs_np, axis=1).copy()
                target_np = seg.cpu().numpy().copy()

                cm_ = confusion_matrix(
                    target_np.ravel(), output_np.ravel(), labels=list(range(N_CLASSES))
                )
                cm += cm_

                oa_test = "{:.3f}".format(metrics.stats_overall_accuracy(cm))
                aa_test = "{:.3f}".format(metrics.stats_accuracy_per_class(cm)[0])
                iou_test = "{:.3f}".format(metrics.stats_iou_per_class(cm)[0])

                t.set_postfix(OA=oa_test, AA=aa_test, IOU=iou_test)

        # scheduler update
        scheduler.step()

        # save the model
        os.makedirs(savedir_root, exist_ok=True)
        torch.save(
            {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "optimizer": optimizer.state_dict(),
            },
            os.path.join(savedir_root, "checkpoint.pth"),
        )

        # write the logs
        logs = open(os.path.join(savedir_root, "log.txt"), "a+")
        logs.write(f"{epoch} {oa} {aa} {iou} {oa_test} {aa_test} {iou_test} \n")
        logs.close()

        _run.log_scalar("trainOA", oa, epoch)
        _run.log_scalar("trainAA", aa, epoch)
        _run.log_scalar("trainIoU", iou, epoch)
        _run.log_scalar("testOA", oa_test, epoch)
        _run.log_scalar("testAA", aa_test, epoch)
        _run.log_scalar("testIoU", iou_test, epoch)

    logs.close()