def main() -> None: dist.init() torch.backends.cudnn.benchmark = True torch.cuda.set_device(dist.local_rank()) parser = argparse.ArgumentParser() parser.add_argument('config', metavar='FILE', help='config file') parser.add_argument('--run-dir', metavar='DIR', help='run directory') args, opts = parser.parse_known_args() configs.load(args.config, recursive=True) configs.update(opts) if args.run_dir is None: args.run_dir = auto_set_run_dir() else: set_run_dir(args.run_dir) logger.info(' '.join([sys.executable] + sys.argv)) logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}') dataset = builder.make_dataset() dataflow = {} for split in dataset: sampler = torch.utils.data.DistributedSampler( dataset[split], num_replicas=dist.size(), rank=dist.rank(), shuffle=(split == 'train'), ) dataflow[split] = torch.utils.data.DataLoader( dataset[split], batch_size=configs.batch_size // dist.size(), sampler=sampler, num_workers=configs.workers_per_gpu, pin_memory=True, ) model = builder.make_model() model = torch.nn.parallel.DistributedDataParallel( model.cuda(), device_ids=[dist.local_rank()], ) criterion = builder.make_criterion() optimizer = builder.make_optimizer(model) scheduler = builder.make_scheduler(optimizer) trainer = ClassificationTrainer( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, amp_enabled=configs.amp.enabled, ) trainer.train_with_defaults( dataflow['train'], num_epochs=configs.num_epochs, callbacks=[ SaverRestore(), InferenceRunner( dataflow['test'], callbacks=[ TopKCategoricalAccuracy(k=1, name='acc/top1'), TopKCategoricalAccuracy(k=5, name='acc/top5'), ], ), MaxSaver('acc/top1'), Saver(), ], )
def main() -> None: # dist.init() torch.backends.cudnn.benchmark = True # torch.cuda.set_device(dist.local_rank()) parser = argparse.ArgumentParser() parser.add_argument('config', metavar='FILE', help='config file') parser.add_argument('--run-dir', metavar='DIR', help='run directory') parser.add_argument('--pdb', action='store_true', help='pdb') parser.add_argument('--gpu', type=str, help='gpu ids', default=None) args, opts = parser.parse_known_args() configs.load(args.config, recursive=True) configs.update(opts) if configs.debug.pdb or args.pdb: pdb.set_trace() if args.gpu is not None: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if configs.debug.set_seed: torch.manual_seed(configs.debug.seed) np.random.seed(configs.debug.seed) if configs.run.device == 'gpu': device = torch.device('cuda') elif configs.run.device == 'cpu': device = torch.device('cpu') else: raise ValueError(configs.run.device) if isinstance(configs.optimizer.lr, str): configs.optimizer.lr = eval(configs.optimizer.lr) if args.run_dir is None: args.run_dir = auto_set_run_dir() else: set_run_dir(args.run_dir) logger.info(' '.join([sys.executable] + sys.argv)) logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}') dataset = builder.make_dataset() dataflow = dict() for split in dataset: sampler = torch.utils.data.RandomSampler(dataset[split]) dataflow[split] = torch.utils.data.DataLoader( dataset[split], batch_size=configs.run.bsz, sampler=sampler, num_workers=configs.run.workers_per_gpu, pin_memory=True) model = builder.make_model() model.to(device) # model = torch.nn.parallel.DistributedDataParallel( # model.cuda(), # device_ids=[dist.local_rank()], # find_unused_parameters=True) total_params = sum(p.numel() for p in model.parameters()) logger.info(f'Model Size: {total_params}') # logger.info(f'Model MACs: {profile_macs(model, inputs)}') criterion = builder.make_criterion() optimizer = builder.make_optimizer(model) scheduler = builder.make_scheduler(optimizer) trainer = LayerRegressionTrainer(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler) trainer.train_with_defaults( dataflow['train'], num_epochs=configs.run.n_epochs, callbacks=[ # SaverRestore(), InferenceRunner(dataflow=dataflow['valid'], callbacks=[]), MaxSaver('loss/valid'), # Saver(), ])
def main() -> None: dist.init() torch.backends.cudnn.benchmark = True torch.cuda.set_device(dist.local_rank()) parser = argparse.ArgumentParser() parser.add_argument('config', metavar='FILE', help='config file') parser.add_argument('--run-dir', metavar='DIR', help='run directory') parser.add_argument('--name', type=str, help='model name') args, opts = parser.parse_known_args() configs.load(args.config, recursive=True) configs.update(opts) if args.run_dir is None: args.run_dir = auto_set_run_dir() else: set_run_dir(args.run_dir) logger.info(' '.join([sys.executable] + sys.argv)) logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}') dataset = builder.make_dataset() dataflow = dict() for split in dataset: sampler = torch.utils.data.distributed.DistributedSampler( dataset[split], num_replicas=dist.size(), rank=dist.rank(), shuffle=(split == 'train')) dataflow[split] = torch.utils.data.DataLoader( dataset[split], batch_size=configs.batch_size if split == 'train' else 1, sampler=sampler, num_workers=configs.workers_per_gpu, pin_memory=True, collate_fn=dataset[split].collate_fn) if 'spvnas' in args.name.lower(): model = spvnas_specialized(args.name) elif 'spvcnn' in args.name.lower(): model = spvcnn(args.name) elif 'mink' in args.name.lower(): model = minkunet(args.name) else: raise NotImplementedError #model = builder.make_model() model = torch.nn.parallel.DistributedDataParallel( model.cuda(), device_ids=[dist.local_rank()], find_unused_parameters=True) model.eval() criterion = builder.make_criterion() optimizer = builder.make_optimizer(model) scheduler = builder.make_scheduler(optimizer) meter = MeanIoU(configs.data.num_classes, configs.data.ignore_label) trainer = SemanticKITTITrainer(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_workers=configs.workers_per_gpu, seed=configs.train.seed ) callbacks=Callbacks([ SaverRestore(), MeanIoU( configs.data.num_classes, configs.data.ignore_label ) ]) callbacks._set_trainer(trainer) trainer.callbacks = callbacks trainer.dataflow = dataflow['test'] trainer.before_train() trainer.before_epoch() # important model.eval() for feed_dict in tqdm(dataflow['test'], desc='eval'): _inputs = dict() for key, value in feed_dict.items(): if not 'name' in key: _inputs[key] = value.cuda() inputs = _inputs['lidar'] targets = feed_dict['targets'].F.long().cuda(non_blocking=True) outputs = model(inputs) invs = feed_dict['inverse_map'] all_labels = feed_dict['targets_mapped'] _outputs = [] _targets = [] for idx in range(invs.C[:, -1].max()+1): cur_scene_pts = (inputs.C[:, -1] == idx).cpu().numpy() cur_inv = invs.F[invs.C[:, -1] == idx].cpu().numpy() cur_label = (all_labels.C[:, -1] == idx).cpu().numpy() outputs_mapped = outputs[cur_scene_pts][ cur_inv].argmax(1) targets_mapped = all_labels.F[cur_label] _outputs.append(outputs_mapped) _targets.append(targets_mapped) outputs = torch.cat(_outputs, 0) targets = torch.cat(_targets, 0) output_dict = { 'outputs': outputs, 'targets': targets } trainer.after_step(output_dict) trainer.after_epoch()
def main() -> None: # dist.init() torch.backends.cudnn.benchmark = True # torch.cuda.set_device(dist.local_rank()) parser = argparse.ArgumentParser() parser.add_argument('config', metavar='FILE', help='config file') parser.add_argument('--ckpt-dir', metavar='DIR', help='run directory') parser.add_argument('--pdb', action='store_true', help='pdb') parser.add_argument('--gpu', type=str, help='gpu ids', default=None) parser.add_argument('--print-configs', action='store_true', help='print ALL configs') args, opts = parser.parse_known_args() configs.load(args.config, recursive=True) configs.update(opts) if configs.debug.pdb or args.pdb: pdb.set_trace() if args.gpu is not None: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if configs.debug.set_seed: torch.manual_seed(configs.debug.seed) np.random.seed(configs.debug.seed) if configs.run.device == 'gpu': device = torch.device('cuda') elif configs.run.device == 'cpu': device = torch.device('cpu') else: raise ValueError(configs.run.device) if isinstance(configs.optimizer.lr, str): configs.optimizer.lr = eval(configs.optimizer.lr) # set the run dir according to config file's name args.run_dir = 'runs/' + args.config.replace('/', '.').replace( 'examples.', '').replace('.yml', '').replace('configs.', '') set_run_dir(args.run_dir) logger.info(' '.join([sys.executable] + sys.argv)) if args.print_configs: print_conf = configs else: print_conf = get_cared_configs(configs, 'train') logger.info(f'Training started: "{args.run_dir}".' + '\n' + f'{print_conf}') dataset = builder.make_dataset() dataflow = dict() # for split in dataset: # sampler = torch.utils.data.distributed.DistributedSampler( # dataset[split], # num_replicas=dist.size(), # rank=dist.rank(), # shuffle=(split == 'train')) # dataflow[split] = torch.utils.data.DataLoader( # dataset[split], # batch_size=configs.run.bsz // dist.size(), # sampler=sampler, # num_workers=configs.run.workers_per_gpu, # pin_memory=True) for split in dataset: if split == 'train': sampler = torch.utils.data.RandomSampler(dataset[split]) batch_size = configs.run.bsz else: # for valid and test, use SequentialSampler to make the train.py # and eval.py results consistent sampler = torch.utils.data.SequentialSampler(dataset[split]) batch_size = getattr(configs.run, 'eval_bsz', configs.run.bsz) dataflow[split] = torch.utils.data.DataLoader( dataset[split], batch_size=batch_size, sampler=sampler, num_workers=configs.run.workers_per_gpu, pin_memory=True) model = builder.make_model() state_dict = {} solution = None score = None if configs.ckpt.load_ckpt: logger.warning('Loading checkpoint!') state_dict = io.load(os.path.join(args.ckpt_dir, configs.ckpt.name), map_location='cpu') if getattr(state_dict, 'model_arch', None) is not None: model_load = state_dict['model_arch'] for module_load, module in zip(model_load.modules(), model.modules()): if isinstance(module, tq.RandomLayer): # random layer, need to restore the architecture module.rebuild_random_layer_from_op_list( n_ops_in=module_load.n_ops, wires_in=module_load.wires, op_list_in=module_load.op_list, ) if not configs.ckpt.weight_from_scratch: model.load_state_dict(state_dict['model'], strict=False) else: logger.warning(f"DO NOT load weight, train weights from scratch!") if 'solution' in state_dict.keys(): solution = state_dict['solution'] logger.info(f"Loading the solution {solution}") logger.info(f"Original score: {state_dict['score']}") model.set_sample_arch(solution['arch']) score = state_dict['score'] if 'v_c_reg_mapping' in state_dict.keys(): try: model.measure.set_v_c_reg_mapping( state_dict['v_c_reg_mapping']) except AttributeError: logger.warning(f"Cannot set v_c_reg_mapping.") if configs.model.load_op_list: assert state_dict['q_layer_op_list'] is not None logger.warning(f"Loading the op_list, will replace the q_layer in " f"the original model!") q_layer = build_module_from_op_list(state_dict['q_layer_op_list']) model.q_layer = q_layer if configs.model.transpile_before_run: # transpile the q_layer logger.warning(f"Transpile the q_layer to basis gate set before " f"training, will replace the q_layer!") processor = builder.make_qiskit_processor() if getattr(model, 'q_layer', None) is not None: circ = tq2qiskit(model.q_device, model.q_layer) """ add measure because the transpile process may permute the wires, so we need to get the final q reg to c reg mapping """ circ.measure(list(range(model.q_device.n_wires)), list(range(model.q_device.n_wires))) logger.info("Transpiling circuit...") if solution is not None: processor.set_layout(solution['layout']) logger.warning( f"Set layout {solution['layout']} for transpile!") circ_transpiled = processor.transpile(circs=circ) q_layer = qiskit2tq(circ=circ_transpiled) model.measure.set_v_c_reg_mapping( get_v_c_reg_mapping(circ_transpiled)) model.q_layer = q_layer if configs.trainer.add_noise: # noise-aware training noise_model_tq = builder.make_noise_model_tq() noise_model_tq.is_add_noise = True noise_model_tq.v_c_reg_mapping = get_v_c_reg_mapping( circ_transpiled) noise_model_tq.p_c_reg_mapping = get_p_c_reg_mapping( circ_transpiled) noise_model_tq.p_v_reg_mapping = get_p_v_reg_mapping( circ_transpiled) model.set_noise_model_tq(noise_model_tq) elif getattr(model, 'nodes', None) is not None: # every node has a noise model because it is possible that # different nodes run on different QC for node in model.nodes: circ = tq2qiskit(node.q_device, node.q_layer) circ.measure(list(range(node.q_device.n_wires)), list(range(node.q_device.n_wires))) circ_transpiled = processor.transpile(circs=circ) q_layer = qiskit2tq(circ=circ_transpiled) node.measure.set_v_c_reg_mapping( get_v_c_reg_mapping(circ_transpiled)) node.q_layer = q_layer if configs.trainer.add_noise: # noise-aware training noise_model_tq = builder.make_noise_model_tq() noise_model_tq.is_add_noise = True noise_model_tq.v_c_reg_mapping = get_v_c_reg_mapping( circ_transpiled) noise_model_tq.p_c_reg_mapping = get_p_c_reg_mapping( circ_transpiled) noise_model_tq.p_v_reg_mapping = get_p_v_reg_mapping( circ_transpiled) node.set_noise_model_tq(noise_model_tq) if getattr(configs.model.arch, 'sample_arch', None) is not None and \ not configs.model.transpile_before_run: sample_arch = configs.model.arch.sample_arch logger.warning(f"Setting sample arch {sample_arch} from config file!") if isinstance(sample_arch, str): # this is the name of arch sample_arch = get_named_sample_arch(model.arch_space, sample_arch) logger.warning(f"Decoded sample arch: {sample_arch}") model.set_sample_arch(sample_arch) if configs.trainer.name == 'pruning_trainer': """ in pruning, convert the super layers to module list, otherwise the pruning ratio is difficulty to set """ logger.warning(f"Convert sampled layer to module list layer!") model.q_layer = build_module_from_op_list( build_module_op_list(model.q_layer)) model.to(device) # model = torch.nn.parallel.DistributedDataParallel( # model.cuda(), # device_ids=[dist.local_rank()], # find_unused_parameters=True) if getattr(model, 'sample_arch', None) is not None and \ not configs.model.transpile_before_run and \ not configs.trainer.name == 'pruning_trainer': n_params = model.count_sample_params() logger.info(f"Number of sampled params: {n_params}") total_params = sum(p.numel() for p in model.parameters()) logger.info(f'Model Size: {total_params}') # logger.info(f'Model MACs: {profile_macs(model, inputs)}') criterion = builder.make_criterion() optimizer = builder.make_optimizer(model) scheduler = builder.make_scheduler(optimizer) trainer = builder.make_trainer(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler) trainer.solution = solution trainer.score = score # trainer state_dict will be loaded in a callback callbacks = builder.make_callbacks(dataflow, state_dict) trainer.train_with_defaults(dataflow['train'], num_epochs=configs.run.n_epochs, callbacks=callbacks)
def main() -> None: dist.init() torch.backends.cudnn.benchmark = True torch.cuda.set_device(dist.local_rank()) parser = argparse.ArgumentParser() parser.add_argument('config', metavar='FILE', help='config file') parser.add_argument('--run-dir', metavar='DIR', help='run directory') args, opts = parser.parse_known_args() configs.load(args.config, recursive=True) configs.update(opts) if args.run_dir is None: args.run_dir = auto_set_run_dir() else: set_run_dir(args.run_dir) logger.info(' '.join([sys.executable] + sys.argv)) logger.info(f'Experiment started: "{args.run_dir}".' + '\n' + f'{configs}') # seed if ('seed' not in configs.train) or (configs.train.seed is None): configs.train.seed = torch.initial_seed() % (2**32 - 1) seed = configs.train.seed + dist.rank( ) * configs.workers_per_gpu * configs.num_epochs random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) dataset = builder.make_dataset() dataflow = dict() for split in dataset: sampler = torch.utils.data.distributed.DistributedSampler( dataset[split], num_replicas=dist.size(), rank=dist.rank(), shuffle=(split == 'train')) dataflow[split] = torch.utils.data.DataLoader( dataset[split], batch_size=configs.batch_size, sampler=sampler, num_workers=configs.workers_per_gpu, pin_memory=True, collate_fn=dataset[split].collate_fn) model = builder.make_model() model = torch.nn.parallel.DistributedDataParallel( model.cuda(), device_ids=[dist.local_rank()], find_unused_parameters=True) criterion = builder.make_criterion() optimizer = builder.make_optimizer(model) scheduler = builder.make_scheduler(optimizer) trainer = SemanticKITTITrainer(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_workers=configs.workers_per_gpu, seed=seed) trainer.train_with_defaults( dataflow['train'], num_epochs=configs.num_epochs, callbacks=[ InferenceRunner(dataflow[split], callbacks=[ MeanIoU(name=f'iou/{split}', num_classes=configs.data.num_classes, ignore_label=configs.data.ignore_label) ]) for split in ['test'] ] + [ MaxSaver('iou/test'), Saver(), ])