def get_args(): parser = HyperOptArgumentParser() parser.add_argument('--work_dir', metavar='DIR', default="./work_dir", type=str, help='path to save output') parser.add_argument('--proj_name', type=str) parser.add_argument('--name', type=str) parser.add_argument('--gpus', type=str, default='7', help='how many gpus') parser.add_argument('--dist_bd', type=str, default='dp', choices=('dp', 'ddp', 'ddp2'), help='supports three options dp, ddp, ddp2') parser.add_argument('--use_16bit', dest='use_16bit', action='store_true', help='if true uses 16 bit precision') parser.add_argument('--eval', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser.add_argument('--seed', default=1, type=int) parser.add_argument('--load_mem', action='store_true') parser.add_argument('--track_grad_norm', action='store_true') parser = CGCNModel.add_model_specific_args(parser) return parser.parse_args()
def main(params, gpus=None, results_dict=None): init_seed(params) params.gpu_num = len(params.gpus.split(',')) params.dataset = 'gqa_graph' if params.work_dir == './work_dir': params.work_dir = params.work_dir + f'/{params.dataset}' if not params.track_grad_norm: params.track_grad_norm = -1 else: params.track_grad_norm = 1 params.lr = params.lr * params.gpu_num params.batch_size = params.batch_size * params.gpu_num auto_set_name(params) logger = TestTubeLogger(params.work_dir, name=params.name) model = CGCNModel(params) trainer = pl.Trainer( logger=logger, default_save_path=params.work_dir, gpus=params.gpus, max_nb_epochs=params.epochs, distributed_backend=params.dist_bd, use_amp=params.use_16bit, # nb_sanity_val_steps=0, # val_check_interval=0.01, # val_percent_check=0.001, # train_percent_check=0.001, early_stop_callback=False, max_epochs=params.epochs, track_grad_norm=params.track_grad_norm, # log_gpu_memory='all', )
def main(params): if params.seed is not None: random.seed(params.seed) torch.manual_seed(params.seed) random.seed(params.seed) np.random.seed(params.seed) torch.manual_seed(params.seed) torch.cuda.manual_seed(params.seed) cudnn.deterministic = True logger = TestTubeLogger(params.save_path, name=params.dataset) model = CGCNModel(params) trainer = pl.Trainer( logger=logger, default_save_path=params.save_path, gpus=params.gpus, max_nb_epochs=params.epochs, distributed_backend=params.dist_backend, use_amp=params.use_16bit, # nb_sanity_val_steps=0, # val_check_interval=0.01, # val_percent_check=0.01, # train_percent_check=0.002, early_stop_callback=False, ) if params.evaluate: trainer.run_evaluation() else: trainer.fit(model)
def main(params, gpus=None, results_dict=None): init_seed(params) params.padding_idx = 0 if params.work_dir == './work_dir': params.work_dir = params.work_dir + f'/{params.dataset}' auto_set_name(params) if not params.track_grad_norm: params.track_grad_norm = -1 else: params.track_grad_norm = 1 logger = TestTubeLogger(params.work_dir, name=params.name) model = CGCNModel(params) idx2name = {} for idx, (p_nam, p) in enumerate(model.named_parameters()): idx2name[idx] = p_nam gpu_num = len(params.gpus.split(',')) params.lr = params.lr * gpu_num trainer = pl.Trainer( logger=logger, default_save_path=params.work_dir, gpus=params.gpus, max_nb_epochs=params.epochs, distributed_backend=params.dist_bd, use_amp=params.use_16bit, # nb_sanity_val_steps=0, # val_check_interval=0.01, # val_percent_check=0.001, # train_percent_check=0.001, early_stop_callback=False, max_epochs=params.epochs, track_grad_norm=params.track_grad_norm, # log_gpu_memory='all', ) if params.evaluate: trainer.run_evaluation() else: trainer.fit(model)
def main(params, gpus=None, results_dict=None): init_seed(params) params.gpu_num = len(params.gpus.split(',')) params.dataset = 'gqa_graph' if params.proj_name is None: params.proj_name = params.dataset params.work_dir = params.work_dir + f'/{params.proj_name}' if not params.track_grad_norm: params.track_grad_norm = -1 else: params.track_grad_norm = 1 params.lr = params.lr * params.gpu_num * (params.batch_size / 64.) params.batch_size = params.batch_size * params.gpu_num auto_set_name(params) logger = TestTubeLogger(params.work_dir, name=params.name) model = CGCNModel(params) # checkpoint = ModelCheckpoint() trainer = pl.Trainer( logger=logger, default_save_path=params.work_dir, gpus=params.gpus, distributed_backend=params.dist_bd, use_amp=params.use_16bit, # nb_sanity_val_steps=0, # val_check_interval=0.01, # val_percent_check=0.001, # train_percent_check=0.001, early_stop_callback=False, max_epochs=params.epochs, # max_epochs=1, track_grad_norm=params.track_grad_norm, # log_gpu_memory='all', # checkpoint_callback=False, row_log_interval=100, gradient_clip_val=params.grad_clip) if params.evaluate: trainer.run_evaluation() else: trainer.fit(model)