Ejemplo n.º 1
0
def main() -> None:
    dist.init()

    torch.backends.cudnn.benchmark = True
    torch.cuda.set_device(dist.local_rank())

    parser = argparse.ArgumentParser()
    parser.add_argument('config', metavar='FILE', help='config file')
    parser.add_argument('--run-dir', metavar='DIR', help='run directory')
    args, opts = parser.parse_known_args()

    configs.load(args.config, recursive=True)
    configs.update(opts)

    if args.run_dir is None:
        args.run_dir = auto_set_run_dir()
    else:
        set_run_dir(args.run_dir)

    logger.info(' '.join([sys.executable] + sys.argv))
    logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}')

    dataset = builder.make_dataset()
    dataflow = {}
    for split in dataset:
        sampler = torch.utils.data.DistributedSampler(
            dataset[split],
            num_replicas=dist.size(),
            rank=dist.rank(),
            shuffle=(split == 'train'),
        )
        dataflow[split] = torch.utils.data.DataLoader(
            dataset[split],
            batch_size=configs.batch_size // dist.size(),
            sampler=sampler,
            num_workers=configs.workers_per_gpu,
            pin_memory=True,
        )

    model = builder.make_model()
    model = torch.nn.parallel.DistributedDataParallel(
        model.cuda(),
        device_ids=[dist.local_rank()],
    )

    criterion = builder.make_criterion()
    optimizer = builder.make_optimizer(model)
    scheduler = builder.make_scheduler(optimizer)

    trainer = ClassificationTrainer(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        amp_enabled=configs.amp.enabled,
    )
    trainer.train_with_defaults(
        dataflow['train'],
        num_epochs=configs.num_epochs,
        callbacks=[
            SaverRestore(),
            InferenceRunner(
                dataflow['test'],
                callbacks=[
                    TopKCategoricalAccuracy(k=1, name='acc/top1'),
                    TopKCategoricalAccuracy(k=5, name='acc/top5'),
                ],
            ),
            MaxSaver('acc/top1'),
            Saver(),
        ],
    )
Ejemplo n.º 2
0
def main() -> None:
    # dist.init()
    torch.backends.cudnn.benchmark = True
    # torch.cuda.set_device(dist.local_rank())

    parser = argparse.ArgumentParser()
    parser.add_argument('config', metavar='FILE', help='config file')
    parser.add_argument('--run-dir', metavar='DIR', help='run directory')
    parser.add_argument('--pdb', action='store_true', help='pdb')
    parser.add_argument('--gpu', type=str, help='gpu ids', default=None)
    args, opts = parser.parse_known_args()

    configs.load(args.config, recursive=True)
    configs.update(opts)

    if configs.debug.pdb or args.pdb:
        pdb.set_trace()

    if args.gpu is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if configs.debug.set_seed:
        torch.manual_seed(configs.debug.seed)
        np.random.seed(configs.debug.seed)

    if configs.run.device == 'gpu':
        device = torch.device('cuda')
    elif configs.run.device == 'cpu':
        device = torch.device('cpu')
    else:
        raise ValueError(configs.run.device)

    if isinstance(configs.optimizer.lr, str):
        configs.optimizer.lr = eval(configs.optimizer.lr)

    if args.run_dir is None:
        args.run_dir = auto_set_run_dir()
    else:
        set_run_dir(args.run_dir)

    logger.info(' '.join([sys.executable] + sys.argv))
    logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}')

    dataset = builder.make_dataset()
    dataflow = dict()

    for split in dataset:
        sampler = torch.utils.data.RandomSampler(dataset[split])
        dataflow[split] = torch.utils.data.DataLoader(
            dataset[split],
            batch_size=configs.run.bsz,
            sampler=sampler,
            num_workers=configs.run.workers_per_gpu,
            pin_memory=True)

    model = builder.make_model()
    model.to(device)
    # model = torch.nn.parallel.DistributedDataParallel(
    #     model.cuda(),
    #     device_ids=[dist.local_rank()],
    #     find_unused_parameters=True)

    total_params = sum(p.numel() for p in model.parameters())
    logger.info(f'Model Size: {total_params}')

    # logger.info(f'Model MACs: {profile_macs(model, inputs)}')

    criterion = builder.make_criterion()
    optimizer = builder.make_optimizer(model)
    scheduler = builder.make_scheduler(optimizer)

    trainer = LayerRegressionTrainer(model=model,
                                     criterion=criterion,
                                     optimizer=optimizer,
                                     scheduler=scheduler)
    trainer.train_with_defaults(
        dataflow['train'],
        num_epochs=configs.run.n_epochs,
        callbacks=[
            # SaverRestore(),
            InferenceRunner(dataflow=dataflow['valid'], callbacks=[]),
            MaxSaver('loss/valid'),
            # Saver(),
        ])
Ejemplo n.º 3
0
def main() -> None:
    dist.init()

    torch.backends.cudnn.benchmark = True
    torch.cuda.set_device(dist.local_rank())

    parser = argparse.ArgumentParser()
    parser.add_argument('config', metavar='FILE', help='config file')
    parser.add_argument('--run-dir', metavar='DIR', help='run directory')
    parser.add_argument('--name', type=str, help='model name')
    args, opts = parser.parse_known_args()

    configs.load(args.config, recursive=True)
    configs.update(opts)

    if args.run_dir is None:
        args.run_dir = auto_set_run_dir()
    else:
        set_run_dir(args.run_dir)

    logger.info(' '.join([sys.executable] + sys.argv))
    logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}')

    dataset = builder.make_dataset()
    dataflow = dict()
    for split in dataset:
        sampler = torch.utils.data.distributed.DistributedSampler(
            dataset[split],
            num_replicas=dist.size(),
            rank=dist.rank(),
            shuffle=(split == 'train'))
        dataflow[split] = torch.utils.data.DataLoader(
            dataset[split],
            batch_size=configs.batch_size if split == 'train' else 1,
            sampler=sampler,
            num_workers=configs.workers_per_gpu,
            pin_memory=True,
            collate_fn=dataset[split].collate_fn)

    
    if 'spvnas' in args.name.lower():
        model = spvnas_specialized(args.name)
    elif 'spvcnn' in args.name.lower():
        model = spvcnn(args.name)
    elif 'mink' in args.name.lower():
        model = minkunet(args.name)
    else:
        raise NotImplementedError
    
    #model = builder.make_model()
    model = torch.nn.parallel.DistributedDataParallel(
        model.cuda(),
        device_ids=[dist.local_rank()],
        find_unused_parameters=True)
    model.eval()

    criterion = builder.make_criterion()
    optimizer = builder.make_optimizer(model)
    scheduler = builder.make_scheduler(optimizer)
    meter = MeanIoU(configs.data.num_classes, 
                    configs.data.ignore_label)

    trainer = SemanticKITTITrainer(model=model,
                          criterion=criterion,
                          optimizer=optimizer,
                          scheduler=scheduler,
                          num_workers=configs.workers_per_gpu,
                          seed=configs.train.seed
                          )
    callbacks=Callbacks([
                          SaverRestore(),
                          MeanIoU(
                              configs.data.num_classes, 
                              configs.data.ignore_label
                          )
                      ])
    callbacks._set_trainer(trainer)
    trainer.callbacks = callbacks
    trainer.dataflow = dataflow['test']
    
    
    trainer.before_train()
    trainer.before_epoch()
    
    # important
    model.eval()
    
    for feed_dict in tqdm(dataflow['test'], desc='eval'):
        _inputs = dict()
        for key, value in feed_dict.items():
            if not 'name' in key:
                _inputs[key] = value.cuda()

        inputs = _inputs['lidar']
        targets = feed_dict['targets'].F.long().cuda(non_blocking=True)
        outputs = model(inputs)
       
        invs = feed_dict['inverse_map']
        all_labels = feed_dict['targets_mapped']
        _outputs = []
        _targets = []
        for idx in range(invs.C[:, -1].max()+1):
            cur_scene_pts = (inputs.C[:, -1] == idx).cpu().numpy()
            cur_inv = invs.F[invs.C[:, -1] == idx].cpu().numpy()
            cur_label = (all_labels.C[:, -1] == idx).cpu().numpy()
            outputs_mapped = outputs[cur_scene_pts][
                cur_inv].argmax(1)
            targets_mapped = all_labels.F[cur_label]
            _outputs.append(outputs_mapped)
            _targets.append(targets_mapped)
        outputs = torch.cat(_outputs, 0)
        targets = torch.cat(_targets, 0)
        output_dict = {
            'outputs': outputs,
            'targets': targets
        }
        trainer.after_step(output_dict)
    
    trainer.after_epoch()
Ejemplo n.º 4
0
def main() -> None:
    # dist.init()
    torch.backends.cudnn.benchmark = True
    # torch.cuda.set_device(dist.local_rank())

    parser = argparse.ArgumentParser()
    parser.add_argument('config', metavar='FILE', help='config file')
    parser.add_argument('--ckpt-dir', metavar='DIR', help='run directory')
    parser.add_argument('--pdb', action='store_true', help='pdb')
    parser.add_argument('--gpu', type=str, help='gpu ids', default=None)
    parser.add_argument('--print-configs',
                        action='store_true',
                        help='print ALL configs')
    args, opts = parser.parse_known_args()

    configs.load(args.config, recursive=True)
    configs.update(opts)

    if configs.debug.pdb or args.pdb:
        pdb.set_trace()

    if args.gpu is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if configs.debug.set_seed:
        torch.manual_seed(configs.debug.seed)
        np.random.seed(configs.debug.seed)

    if configs.run.device == 'gpu':
        device = torch.device('cuda')
    elif configs.run.device == 'cpu':
        device = torch.device('cpu')
    else:
        raise ValueError(configs.run.device)

    if isinstance(configs.optimizer.lr, str):
        configs.optimizer.lr = eval(configs.optimizer.lr)

    # set the run dir according to config file's name
    args.run_dir = 'runs/' + args.config.replace('/', '.').replace(
        'examples.', '').replace('.yml', '').replace('configs.', '')
    set_run_dir(args.run_dir)

    logger.info(' '.join([sys.executable] + sys.argv))

    if args.print_configs:
        print_conf = configs
    else:
        print_conf = get_cared_configs(configs, 'train')

    logger.info(f'Training started: "{args.run_dir}".' + '\n' +
                f'{print_conf}')

    dataset = builder.make_dataset()
    dataflow = dict()

    # for split in dataset:
    #     sampler = torch.utils.data.distributed.DistributedSampler(
    #         dataset[split],
    #         num_replicas=dist.size(),
    #         rank=dist.rank(),
    #         shuffle=(split == 'train'))
    #     dataflow[split] = torch.utils.data.DataLoader(
    #         dataset[split],
    #         batch_size=configs.run.bsz // dist.size(),
    #         sampler=sampler,
    #         num_workers=configs.run.workers_per_gpu,
    #         pin_memory=True)

    for split in dataset:
        if split == 'train':
            sampler = torch.utils.data.RandomSampler(dataset[split])
            batch_size = configs.run.bsz
        else:
            # for valid and test, use SequentialSampler to make the train.py
            # and eval.py results consistent
            sampler = torch.utils.data.SequentialSampler(dataset[split])
            batch_size = getattr(configs.run, 'eval_bsz', configs.run.bsz)

        dataflow[split] = torch.utils.data.DataLoader(
            dataset[split],
            batch_size=batch_size,
            sampler=sampler,
            num_workers=configs.run.workers_per_gpu,
            pin_memory=True)

    model = builder.make_model()

    state_dict = {}
    solution = None
    score = None
    if configs.ckpt.load_ckpt:
        logger.warning('Loading checkpoint!')
        state_dict = io.load(os.path.join(args.ckpt_dir, configs.ckpt.name),
                             map_location='cpu')
        if getattr(state_dict, 'model_arch', None) is not None:
            model_load = state_dict['model_arch']
            for module_load, module in zip(model_load.modules(),
                                           model.modules()):
                if isinstance(module, tq.RandomLayer):
                    # random layer, need to restore the architecture
                    module.rebuild_random_layer_from_op_list(
                        n_ops_in=module_load.n_ops,
                        wires_in=module_load.wires,
                        op_list_in=module_load.op_list,
                    )

        if not configs.ckpt.weight_from_scratch:
            model.load_state_dict(state_dict['model'], strict=False)
        else:
            logger.warning(f"DO NOT load weight, train weights from scratch!")

        if 'solution' in state_dict.keys():
            solution = state_dict['solution']
            logger.info(f"Loading the solution {solution}")
            logger.info(f"Original score: {state_dict['score']}")
            model.set_sample_arch(solution['arch'])
            score = state_dict['score']

        if 'v_c_reg_mapping' in state_dict.keys():
            try:
                model.measure.set_v_c_reg_mapping(
                    state_dict['v_c_reg_mapping'])
            except AttributeError:
                logger.warning(f"Cannot set v_c_reg_mapping.")

        if configs.model.load_op_list:
            assert state_dict['q_layer_op_list'] is not None
            logger.warning(f"Loading the op_list, will replace the q_layer in "
                           f"the original model!")
            q_layer = build_module_from_op_list(state_dict['q_layer_op_list'])
            model.q_layer = q_layer

    if configs.model.transpile_before_run:
        # transpile the q_layer
        logger.warning(f"Transpile the q_layer to basis gate set before "
                       f"training, will replace the q_layer!")
        processor = builder.make_qiskit_processor()

        if getattr(model, 'q_layer', None) is not None:
            circ = tq2qiskit(model.q_device, model.q_layer)
            """
            add measure because the transpile process may permute the wires, 
            so we need to get the final q reg to c reg mapping 
            """
            circ.measure(list(range(model.q_device.n_wires)),
                         list(range(model.q_device.n_wires)))
            logger.info("Transpiling circuit...")

            if solution is not None:
                processor.set_layout(solution['layout'])
                logger.warning(
                    f"Set layout {solution['layout']} for transpile!")

            circ_transpiled = processor.transpile(circs=circ)
            q_layer = qiskit2tq(circ=circ_transpiled)

            model.measure.set_v_c_reg_mapping(
                get_v_c_reg_mapping(circ_transpiled))
            model.q_layer = q_layer

            if configs.trainer.add_noise:
                # noise-aware training
                noise_model_tq = builder.make_noise_model_tq()
                noise_model_tq.is_add_noise = True
                noise_model_tq.v_c_reg_mapping = get_v_c_reg_mapping(
                    circ_transpiled)
                noise_model_tq.p_c_reg_mapping = get_p_c_reg_mapping(
                    circ_transpiled)
                noise_model_tq.p_v_reg_mapping = get_p_v_reg_mapping(
                    circ_transpiled)
                model.set_noise_model_tq(noise_model_tq)

        elif getattr(model, 'nodes', None) is not None:
            # every node has a noise model because it is possible that
            # different nodes run on different QC
            for node in model.nodes:
                circ = tq2qiskit(node.q_device, node.q_layer)
                circ.measure(list(range(node.q_device.n_wires)),
                             list(range(node.q_device.n_wires)))
                circ_transpiled = processor.transpile(circs=circ)
                q_layer = qiskit2tq(circ=circ_transpiled)

                node.measure.set_v_c_reg_mapping(
                    get_v_c_reg_mapping(circ_transpiled))
                node.q_layer = q_layer

                if configs.trainer.add_noise:
                    # noise-aware training
                    noise_model_tq = builder.make_noise_model_tq()
                    noise_model_tq.is_add_noise = True
                    noise_model_tq.v_c_reg_mapping = get_v_c_reg_mapping(
                        circ_transpiled)
                    noise_model_tq.p_c_reg_mapping = get_p_c_reg_mapping(
                        circ_transpiled)
                    noise_model_tq.p_v_reg_mapping = get_p_v_reg_mapping(
                        circ_transpiled)
                    node.set_noise_model_tq(noise_model_tq)

    if getattr(configs.model.arch, 'sample_arch', None) is not None and \
            not configs.model.transpile_before_run:
        sample_arch = configs.model.arch.sample_arch
        logger.warning(f"Setting sample arch {sample_arch} from config file!")
        if isinstance(sample_arch, str):
            # this is the name of arch
            sample_arch = get_named_sample_arch(model.arch_space, sample_arch)
            logger.warning(f"Decoded sample arch: {sample_arch}")
        model.set_sample_arch(sample_arch)

    if configs.trainer.name == 'pruning_trainer':
        """
        in pruning, convert the super layers to module list, otherwise the 
        pruning ratio is difficulty to set
        """
        logger.warning(f"Convert sampled layer to module list layer!")
        model.q_layer = build_module_from_op_list(
            build_module_op_list(model.q_layer))

    model.to(device)
    # model = torch.nn.parallel.DistributedDataParallel(
    #     model.cuda(),
    #     device_ids=[dist.local_rank()],
    #     find_unused_parameters=True)
    if getattr(model, 'sample_arch', None) is not None and \
            not configs.model.transpile_before_run and \
            not configs.trainer.name == 'pruning_trainer':
        n_params = model.count_sample_params()
        logger.info(f"Number of sampled params: {n_params}")

    total_params = sum(p.numel() for p in model.parameters())
    logger.info(f'Model Size: {total_params}')

    # logger.info(f'Model MACs: {profile_macs(model, inputs)}')

    criterion = builder.make_criterion()
    optimizer = builder.make_optimizer(model)
    scheduler = builder.make_scheduler(optimizer)
    trainer = builder.make_trainer(model=model,
                                   criterion=criterion,
                                   optimizer=optimizer,
                                   scheduler=scheduler)
    trainer.solution = solution
    trainer.score = score

    # trainer state_dict will be loaded in a callback
    callbacks = builder.make_callbacks(dataflow, state_dict)

    trainer.train_with_defaults(dataflow['train'],
                                num_epochs=configs.run.n_epochs,
                                callbacks=callbacks)
Ejemplo n.º 5
0
def main() -> None:
    dist.init()

    torch.backends.cudnn.benchmark = True
    torch.cuda.set_device(dist.local_rank())

    parser = argparse.ArgumentParser()
    parser.add_argument('config', metavar='FILE', help='config file')
    parser.add_argument('--run-dir', metavar='DIR', help='run directory')
    args, opts = parser.parse_known_args()

    configs.load(args.config, recursive=True)
    configs.update(opts)

    if args.run_dir is None:
        args.run_dir = auto_set_run_dir()
    else:
        set_run_dir(args.run_dir)

    logger.info(' '.join([sys.executable] + sys.argv))
    logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}')

    # seed
    if ('seed' not in configs.train) or (configs.train.seed is None):
        configs.train.seed = torch.initial_seed() % (2**32 - 1)

    seed = configs.train.seed + dist.rank(
    ) * configs.workers_per_gpu * configs.num_epochs
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    dataset = builder.make_dataset()
    dataflow = dict()
    for split in dataset:
        sampler = torch.utils.data.distributed.DistributedSampler(
            dataset[split],
            num_replicas=dist.size(),
            rank=dist.rank(),
            shuffle=(split == 'train'))
        dataflow[split] = torch.utils.data.DataLoader(
            dataset[split],
            batch_size=configs.batch_size,
            sampler=sampler,
            num_workers=configs.workers_per_gpu,
            pin_memory=True,
            collate_fn=dataset[split].collate_fn)

    model = builder.make_model()
    model = torch.nn.parallel.DistributedDataParallel(
        model.cuda(),
        device_ids=[dist.local_rank()],
        find_unused_parameters=True)

    criterion = builder.make_criterion()
    optimizer = builder.make_optimizer(model)
    scheduler = builder.make_scheduler(optimizer)

    trainer = SemanticKITTITrainer(model=model,
                                   criterion=criterion,
                                   optimizer=optimizer,
                                   scheduler=scheduler,
                                   num_workers=configs.workers_per_gpu,
                                   seed=seed)
    trainer.train_with_defaults(
        dataflow['train'],
        num_epochs=configs.num_epochs,
        callbacks=[
            InferenceRunner(dataflow[split],
                            callbacks=[
                                MeanIoU(name=f'iou/{split}',
                                        num_classes=configs.data.num_classes,
                                        ignore_label=configs.data.ignore_label)
                            ]) for split in ['test']
        ] + [
            MaxSaver('iou/test'),
            Saver(),
        ])