def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) seed = 1111 set_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device', device, torch.cuda.current_device()) # exit() data_obj = _DATA() if "yelp" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_yelp_restaurant( args) if "movie" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args) if "beer" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_beer(args) if "wine" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_wine(args) if "lthing" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args) if args.train: now_time = datetime.now() time_name = str(now_time.month) + "_" + str(now_time.day) + "_" + str( now_time.hour) + "_" + str(now_time.minute) model_file = os.path.join(args.model_path, args.data_name + "_" + args.model_name) if not os.path.isdir(model_file): print("create a directory", model_file) os.mkdir(model_file) args.model_file = model_file + "/model_best_" + time_name + ".pt" print("model_file", model_file) print("vocab_size", vocab_obj.vocab_size) print("user num", vocab_obj.user_num) print("item num", vocab_obj.item_num) pretrain_model_file = args.pretrain_model_file pretrain_network = None if pretrain_model_file != "": pretrain_network = BPR(vocab_obj, args, device) pretrain_model_abs_file = os.path.join(args.model_path, pretrain_model_file) print("pretrain_model_abs_file", pretrain_model_abs_file) checkpoint = torch.load(pretrain_model_abs_file) pretrain_network.load_state_dict(checkpoint['model']) network = _ATTR_NETWORK(vocab_obj, args, device) total_param_num = 0 for name, param in network.named_parameters(): if param.requires_grad: param_num = param.numel() total_param_num += param_num print(name, "\t", param_num) print("total parameters num", total_param_num) if args.train: logger_obj = _LOGGER() logger_obj.f_add_writer(args) optimizer = _OPTIM(network.parameters(), args) trainer = _TRAINER(vocab_obj, args, device) trainer.f_train(pretrain_network, train_data, valid_data, network, optimizer, logger_obj) logger_obj.f_close_writer() if args.eval: print("=" * 10, "eval", "=" * 10) eval_obj = _EVAL(vocab_obj, args, device) network = network.to(device) eval_obj.f_init_eval(network, args.model_file, reload_model=True) # eval_obj.f_eval_new_user(train_data, valid_data) eval_obj.f_eval(train_data, valid_data) if args.test: print("=" * 10, "eval", "=" * 10) infer_obj = _INFER(vocab_obj, args, device) network = network.to(device) infer_obj.f_init_infer(network, args.model_file, reload_model=True) infer_obj.f_infer(train_data, valid_data)
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) seed = 1111 set_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device', device) local_rank = None if args.parallel: local_rank = args.local_rank torch.distributed.init_process_group(backend="nccl") device = torch.device('cuda:{}'.format(local_rank)) data_obj = _DATA() if "beer" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args) if "wine" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args) if "yelp" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_yelp(args) if "movie" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args) if "lthing" in args.data_name: train_data, valid_data, vocab_obj = data_obj.f_load_data_movie(args) if args.train: now_time = datetime.now() time_name = str(now_time.month)+"_"+str(now_time.day)+"_"+str(now_time.hour)+"_"+str(now_time.minute) model_file = os.path.join(args.model_path, args.data_name+"_"+args.model_name) if not os.path.isdir(model_file): print("create a directory", model_file) os.mkdir(model_file) args.model_file = model_file+"/model_best_"+time_name+".pt" print("model_file", model_file) print("vocab_size", vocab_obj.vocab_size) print("user num", vocab_obj.user_num) print("item num", vocab_obj.item_num) network = _ATTR_NETWORK(vocab_obj, args, device) total_param_num = 0 for name, param in network.named_parameters(): if param.requires_grad: param_num = param.numel() total_param_num += param_num print(name, "\t", param_num) print("total parameters num", total_param_num) if args.train: logger_obj = _LOGGER() logger_obj.f_add_writer(args) if args.parallel: network = torch.nn.parallel.DistributedDataParallel(network, device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True) optimizer = _OPTIM(network.parameters(), args) trainer = _TRAINER(args, device) trainer.f_train(train_data, valid_data, network, optimizer, logger_obj, local_rank) logger_obj.f_close_writer() if args.eval: print("="*10, "eval", "="*10) eval_obj = _EVAL(vocab_obj, args, device) network = network.to(device) eval_obj.f_init_eval(network, args.model_file, reload_model=True) # eval_obj.f_eval_new_user(train_data, valid_data) eval_obj.f_eval(train_data, valid_data)