def test(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'AmazonDigitalMusic_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) assert(len(opt.pth_path) > 0) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = Model(opt, getattr(models, opt.model)) if opt.use_gpu: model.cuda() if len(opt.gpu_ids) > 0: model = nn.DataParallel(model, device_ids=opt.gpu_ids) if model.net.num_fea != opt.num_fea: raise ValueError(f"the num_fea of {opt.model} is error, please specific --num_fea={model.net.num_fea}") model.load(opt.pth_path) print(f"load model: {opt.pth_path}") test_data = ReviewData(opt.data_root, mode="Test") test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, collate_fn=collate_fn) print(f"{now()}: test in the test datset") predict_loss, test_mse, test_mae = predict(model, test_data_loader, opt)
def test(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'Gourmet_Food_data_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) logging.basicConfig( filename=f"logs/{opt}.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%m-%Y %H:%M:%S", level=logging.DEBUG) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = Model(opt, getattr(models, opt.model)).cuda() print("load...") model.load( "./checkpoints/DPHP_Gourmet_Food_data_cfg-Gourmet_Food_data-poolatt-lr0.001-wd0.0005-drop0.1-id32-hidden100.pth" ) test_data = ReviewData(opt.data_root, mode="Test") test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, collate_fn=collate_fn) auc, corr, predict_loss = predict(model, test_data_loader, opt, logging)
def generate_conditional_sentence(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'AmazonDigitalMusic_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) assert(len(opt.pth_path) > 0) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = Model(opt, getattr(models, opt.model)) if opt.use_gpu: model.cuda() if len(opt.gpu_ids) > 0: model = nn.DataParallel(model, device_ids=opt.gpu_ids) if model.net.num_fea != opt.num_fea: raise ValueError(f"the num_fea of {opt.model} is error, please specific --num_fea={model.net.num_fea}") model.load(opt.pth_path) print(f"load model: {opt.pth_path}") test_data = ReviewData(opt.data_root, mode="Test") test_data_loader = DataLoader(test_data, batch_size=1, shuffle=False, collate_fn=collate_fn) print(f"{now()}: generating conditional sentence...") model.eval() with torch.no_grad(): user_review_dict = np.load("./dataset/AmazonDigitalMusic/train/plainUserReviews.npy", allow_pickle=True).item() item_review_dict = np.load("./dataset/AmazonDigitalMusic/train/plainItemReviews.npy", allow_pickle=True).item() cnt = 10 for idx, (test_input, scores) in enumerate(test_data_loader): if idx == cnt: test_input = unpack_input(opt, test_input) output = model(test_input, mode="Generate") uid = test_input[2].item() user_reviews = user_review_dict[uid] iid = test_input[3].item() item_reviews = item_review_dict[iid] imp_user_review_id = output[0].cpu().numpy().squeeze() imp_user_review_id = np.argmax(imp_user_review_id) print(user_reviews[imp_user_review_id]) imp_item_review_id = output[1].cpu().numpy().squeeze() imp_item_review_id = np.argmax(imp_item_review_id) print(item_reviews[imp_item_review_id]) break
def train(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'AmazonDigitalMusic_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = Model(opt, getattr(models, opt.model)) if opt.use_gpu: model.cuda() if len(opt.gpu_ids) > 0: model = nn.DataParallel(model, device_ids=opt.gpu_ids) if model.net.num_fea != opt.num_fea: raise ValueError(f"the num_fea of {opt.model} is error, please specific --num_fea={model.net.num_fea}") # 3 data train_data = ReviewData(opt.data_root, mode="Train") train_data_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, collate_fn=collate_fn) val_data = ReviewData(opt.data_root, mode="Val") val_data_loader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, collate_fn=collate_fn) print(f'train data: {len(train_data)}; test data: {len(val_data)}') optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) # training print("start training....") min_loss = 1e+10 best_res = 1e+10 mse_func = nn.MSELoss() mae_func = nn.L1Loss() smooth_mae_func = nn.SmoothL1Loss() train_mse_list = [] val_mse_list = [] val_mae_list = [] for epoch in range(opt.num_epochs): total_loss = 0.0 total_maeloss = 0.0 model.train() print(f"{now()} Epoch {epoch}...") for idx, (train_datas, scores) in enumerate(train_data_loader): if opt.use_gpu: scores = torch.FloatTensor(scores).cuda() else: scores = torch.FloatTensor(scores) train_datas = unpack_input(opt, train_datas) optimizer.zero_grad() output = model(train_datas) mse_loss = mse_func(output, scores) total_loss += mse_loss.item() * len(scores) mae_loss = mae_func(output, scores) total_maeloss += mae_loss.item() smooth_mae_loss = smooth_mae_func(output, scores) if opt.loss_method == 'mse': loss = mse_loss if opt.loss_method == 'rmse': loss = torch.sqrt(mse_loss) / 2.0 if opt.loss_method == 'mae': loss = mae_loss if opt.loss_method == 'smooth_mae': loss = smooth_mae_loss loss.backward() optimizer.step() if opt.fine_step: if idx % opt.print_step == 0 and idx > 0: print("\t{}, {} step finised;".format(now(), idx)) val_loss, val_mse, val_mae = predict(model, val_data_loader, opt) if val_loss < min_loss: model.save(name=opt.dataset, opt=opt.print_opt) min_loss = val_loss print("\tmodel save") if val_loss > min_loss: best_res = min_loss scheduler.step() mse = total_loss * 1.0 / len(train_data) print(f"\ttrain data: loss:{total_loss:.4f}, mse: {mse:.4f};") val_loss, val_mse, val_mae = predict(model, val_data_loader, opt) train_mse_list.append(mse) val_mse_list.append(val_mse) val_mae_list.append(val_mae) if val_loss < min_loss: model.save(name=opt.dataset, opt=opt.print_opt) min_loss = val_loss print("model save") if val_mse < best_res: best_res = val_mse print("*"*30) print("----"*20) print(f"{now()} {opt.dataset} {opt.print_opt} best_res: {best_res}") print("----"*20) print("Train MSE:", train_mse_list) print("Val MSE:", val_mse_list) print("Val MAE:", val_mae_list)
def train(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'Digital_Music_data_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = Model(opt, getattr(models, opt.model)) if opt.use_gpu: model.cuda() if len(opt.gpu_ids) > 0: model = nn.DataParallel(model, device_ids=opt.gpu_ids) # 3 data train_data = ReviewData(opt.data_root, train=True) train_data_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn) test_data = ReviewData(opt.data_root, train=False) test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn) print('{}: train data: {}; test data: {}'.format(now(), len(train_data), len(test_data))) optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) # training print("start training....") min_loss = 1e+10 best_res = 1e+10 mse_func = nn.MSELoss() mae_func = nn.L1Loss() smooth_mae_func = nn.SmoothL1Loss() for epoch in range(opt.num_epochs): total_loss = 0.0 total_maeloss = 0.0 model.train() print("{} Epoch {}: start".format(now(), epoch)) for idx, (train_datas, scores) in enumerate(train_data_loader): if opt.use_gpu: scores = torch.FloatTensor(scores).cuda() else: scores = torch.FloatTensor(scores) train_datas = unpack_input(opt, train_datas) optimizer.zero_grad() output = model(train_datas) mse_loss = mse_func(output, scores) total_loss += mse_loss.item() * len(scores) mae_loss = mae_func(output, scores) total_maeloss += mae_loss.item() smooth_mae_loss = smooth_mae_func(output, scores) if opt.loss_method == 'mse': loss = mse_loss if opt.loss_method == 'rmse': loss = torch.sqrt(mse_loss) / 2.0 if opt.loss_method == 'mae': loss = mae_loss if opt.loss_method == 'smooth_mae': loss = smooth_mae_loss loss.backward() optimizer.step() if opt.fine_step: if idx % opt.print_step == 0 and idx > 0: print("\t{}, {} step finised;".format(now(), idx)) predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu) if predict_loss < min_loss: model.save(name=opt.dataset, opt=opt.print_opt) min_loss = predict_loss print("\tmodel save") if predict_loss > min_loss: best_res = min_loss scheduler.step(epoch) print("{}; epoch:{}; total_loss:{}".format(now(), epoch, total_loss)) mse = total_loss * 1.0 / len(train_data) mae = total_maeloss * 1.0 / len(train_data) print("{};train reslut: mse: {}; rmse: {}; mae: {}".format(now(), mse, math.sqrt(mse), mae)) predict_loss, test_mse = predict(model, test_data_loader, opt, use_gpu=opt.use_gpu) if predict_loss < min_loss: model.save(name=opt.dataset, opt=opt.print_opt) min_loss = predict_loss print("model save") if test_mse < best_res: best_res = test_mse print("----"*20) print(f"{now()} {opt.dataset} {opt.print_opt} best_res: {best_res}") print("----"*20)
def train(**kwargs): if 'dataset' not in kwargs: opt = getattr(config, 'Gourmet_Food_data_Config')() else: opt = getattr(config, kwargs['dataset'] + '_Config')() opt.parse(kwargs) logging.basicConfig( filename=f"logs/{opt}.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%m-%Y %H:%M:%S", level=logging.DEBUG) random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) if opt.use_gpu: torch.cuda.manual_seed_all(opt.seed) if len(opt.gpu_ids) == 0 and opt.use_gpu: torch.cuda.set_device(opt.gpu_id) model = Model(opt, getattr(models, opt.model)) if opt.use_gpu: model.cuda() # 3 data train_data = ReviewData(opt.data_root, mode="Train") train_data_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, collate_fn=collate_fn) val_data = ReviewData(opt.data_root, mode="Val") val_data_loader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=True, collate_fn=collate_fn) logging.info('{}: train data: {}; val data: {}'.format( now(), len(train_data), len(val_data))) optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5) # training logging.info("start training....") min_loss = 1e+20 best_auc = -1. best_per = -1. best_epoch = 0 cre_loss = nn.BCEWithLogitsLoss() for epoch in range(opt.num_epochs): total_loss = 0.0 model.train() for idx, datas in enumerate(train_data_loader): train_datas, is_helpful, helpful_score = unpack_input(opt, datas) optimizer.zero_grad() output = model(train_datas) loss = cre_loss(output, is_helpful.float()) cur_loss = loss.item() total_loss += cur_loss loss.backward() optimizer.step() scheduler.step(epoch) logging.info(f"{now()}: epoch {epoch}: total_loss: {total_loss}") print(f"epoch: {epoch}") auc, corr, predict_loss = predict(model, val_data_loader, opt, logging) if predict_loss < min_loss: min_loss = predict_loss if auc > best_auc: model.save(name=opt.dataset, epoch=epoch, opt=f"{opt}") best_epoch = epoch best_auc = auc best_per = corr logging.info("model save") logging.info("----" * 20) logging.info( f"{now()}:{opt.model}:{opt} \n\t\t best_auc:{best_auc}, best_per:{best_per}" ) logging.info("----" * 20) print("----" * 20) print( f"{now()}:{opt.model}:{opt} \n\t epoch:{best_epoch}: best_auc:{best_auc}, best_per:{best_per}" ) print("----" * 20)