def run(args): if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) # device using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() print("Let's use %d GPUs!" % len(args.gpu_ids)) device = torch.device('cuda:%d' % args.gpu_ids[0] if using_cuda else 'cpu') args.device = device # data dataloader = MMDataLoader(args) model = AMIO(args).to(device) # using multiple gpus if using_cuda and len(args.gpu_ids) > 1: model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=args.gpu_ids[0]) # start running # do train atio = ATIO().getTrain(args) # do train atio.do_train(model, dataloader) # load pretrained model pretrained_path = os.path.join(args.model_save_path,\ f'{args.modelName}-{args.datasetName}-{args.tasks}.pth') assert os.path.exists(pretrained_path) model.load_state_dict(torch.load(pretrained_path)) model.to(device) # do test if args.debug_mode: # using valid dataset to debug hyper parameters results = atio.do_test(model, dataloader['valid'], mode="VAL") else: results = atio.do_test(model, dataloader['test'], mode="TEST") return results
def run(args): if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) # device using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() print("Let's use %d GPUs!" % len(args.gpu_ids)) device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu') args.device = device # data dataloader = MMDataLoader(args) model = AMIO(args).to(device) def count_parameters(model): answer = 0 for p in model.parameters(): if p.requires_grad: answer += p.numel() return answer print(f'The model has {count_parameters(model)} trainable parameters') # exit() # using multiple gpus if using_cuda and len(args.gpu_ids) > 1: model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=args.gpu_ids[0]) # start running # do train atio = ATIO().getTrain(args) # do train atio.do_train(model, dataloader) # load pretrained model pretrained_path = os.path.join(args.model_save_path,\ f'{args.modelName}-{args.datasetName}-{args.tasks}.pth') assert os.path.exists(pretrained_path) model.load_state_dict(torch.load(pretrained_path)) model.to(device) # do test if args.debug_mode: # using valid dataset to debug hyper parameters results = atio.do_test(model, dataloader['valid'], mode="VALID") else: results = atio.do_test(model, dataloader['test'], mode="TEST") """ eval_results = { "has0_acc_2": acc2, "has0_F1_score": f_score, "non0_acc_2": non_zeros_acc2, "non0_F1_score": non_zeros_f1_score, "Mult_acc_5": mult_a5, "Mult_acc_7": mult_a7, "MAE": mae, "Correlation Coefficient": corr, } """ return results
def run(args): # device using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() print("Let's use %d GPUs!" % len(args.gpu_ids)) device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu') args.device = device # data dataloader = MMDataLoader(args) model = AMIO(args).to(device) def count_parameters(model): answer = 0 for p in model.parameters(): if p.requires_grad: answer += p.numel() # print(p) return answer print(f'The model has {count_parameters(model)} trainable parameters') # exit() # using multiple gpus if using_cuda and len(args.gpu_ids) > 1: model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=args.gpu_ids[0]) # start running # do train atio = ATIO().getTrain(args) # do train epoch_results = atio.do_train(model, dataloader) # load pretrained model assert os.path.exists(args.model_save_path) model.load_state_dict(torch.load(args.model_save_path)) model.to(device) # do test final_results = {} final_results['train'] = atio.do_test(model, dataloader['train'], mode="TRAIN", need_details=True) final_results['valid'] = atio.do_test(model, dataloader['valid'], mode="VALID", need_details=True) final_results['test'] = atio.do_test(model, dataloader['test'], mode="TEST", need_details=True) # don't save pretrained model for debug mode if args.run_mode == 'Tune': os.remove(args.model_save_path) return {"epoch_results": epoch_results, 'final_results': final_results}
def run(args): if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) "Step1. device preparation" using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() print("Let's use %d GPUs!" % len(args.gpu_ids)) print(using_cuda) device = torch.device('cuda:%d' % args.gpu_ids[0] if using_cuda else 'cpu') args.device = device "Step2. modal and data preparation" dataloader = MMDataLoader(args) model = AMIO(args).to(device) # using multiple gpus if using_cuda and len(args.gpu_ids) > 1: model = torch.nn.DataParallel( model, device_ids=args.gpu_ids, output_device=args.gpu_ids[0] ) "Step3. do training" # do train atio = ATIO().get_train(args) if not args.debug_mode: # do train atio.do_train(model, dataloader) else: # load pretrained model pretrained_path = os.path.join( args.model_save_path, f'{args.modelName}-{args.datasetName}-{args.tasks}-{args.seed}.pth' ) assert os.path.exists(pretrained_path) model.load_state_dict(torch.load(pretrained_path)) model.to(device) "Step4. do validating or testing" # if args.debug_mode: # results = atio.do_test(model, dataloader['valid'], mode="VAL") # else: results = atio.do_test(model, dataloader['test'], mode="TEST") return results
def run(args): if not os.path.exists(args.model_save_dir): os.makedirs(args.model_save_dir) args.model_save_path = os.path.join(args.model_save_dir,\ f'{args.modelName}-{args.datasetName}-{args.train_mode}.pth') # indicate used gpu if len(args.gpu_ids) == 0 and torch.cuda.is_available(): # load free-most gpu pynvml.nvmlInit() dst_gpu_id, min_mem_used = 0, 1e16 for g_id in [0, 1, 2, 3]: handle = pynvml.nvmlDeviceGetHandleByIndex(g_id) meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) mem_used = meminfo.used if mem_used < min_mem_used: min_mem_used = mem_used dst_gpu_id = g_id print(f'Find gpu: {dst_gpu_id}, use memory: {min_mem_used}!') logger.info(f'Find gpu: {dst_gpu_id}, with memory: {min_mem_used} left!') args.gpu_ids.append(dst_gpu_id) # device using_cuda = len(args.gpu_ids) > 0 and torch.cuda.is_available() logger.info("Let's use %d GPUs!" % len(args.gpu_ids)) device = torch.device('cuda:%d' % int(args.gpu_ids[0]) if using_cuda else 'cpu') args.device = device # add tmp tensor to increase the temporary consumption of GPU tmp_tensor = torch.zeros((100, 100)).to(args.device) # load data and models dataloader = MMDataLoader(args) model = AMIO(args).to(device) del tmp_tensor def count_parameters(model): answer = 0 for p in model.parameters(): if p.requires_grad: answer += p.numel() # print(p) return answer logger.info(f'The model has {count_parameters(model)} trainable parameters') # exit() # using multiple gpus # if using_cuda and len(args.gpu_ids) > 1: # model = torch.nn.DataParallel(model, # device_ids=args.gpu_ids, # output_device=args.gpu_ids[0]) atio = ATIO().getTrain(args) # do train atio.do_train(model, dataloader) # load pretrained model assert os.path.exists(args.model_save_path) model.load_state_dict(torch.load(args.model_save_path)) model.to(device) # do test if args.is_tune: # using valid dataset to tune hyper parameters results = atio.do_test(model, dataloader['valid'], mode="VALID") else: results = atio.do_test(model, dataloader['test'], mode="TEST") del model torch.cuda.empty_cache() gc.collect() time.sleep(5) return results