예제 #1
0
def train_gowalla(args):
    p_str = "running " + args.dataset
    print(p_str)

    dataset_base_path = '../../data/Gowalla/gowalla_x0'

    ##gowalla
    user_num = 29858
    item_num = 40981
    factor_num = args.embed_size
    batch_size = 2048 * 512
    top_k = 20
    num_negative_test_val = -1  ##all

    run_id = args.run_id
    print(run_id)
    dataset = 'gowalla'
    path_save_base = './log/' + dataset + '/newloss' + run_id
    if (os.path.exists(path_save_base)):
        print('has results save path')
    else:
        os.makedirs(path_save_base)
    result_file = open(path_save_base + '/results.txt',
                       'w+')  #('./log/results_gcmc.txt','w+')
    copyfile('../../external/LR_gccf/train_gowalla.py',
             path_save_base + '/train_gowalla' + run_id + '.py')

    path_save_model_base = './newlossModel/' + dataset + '/s' + run_id
    if (os.path.exists(path_save_model_base)):
        print('has model save path')
    else:
        os.makedirs(path_save_model_base)

    training_user_set, training_item_set, training_set_count = np.load(
        dataset_base_path + '/datanpy/training_set.npy', allow_pickle=True)
    testing_user_set, testing_item_set, testing_set_count = np.load(
        dataset_base_path + '/datanpy/testing_set.npy', allow_pickle=True)
    val_user_set, val_item_set, val_set_count = np.load(dataset_base_path +
                                                        '/datanpy/val_set.npy',
                                                        allow_pickle=True)
    user_rating_set_all = np.load(dataset_base_path +
                                  '/datanpy/user_rating_set_all.npy',
                                  allow_pickle=True).item()

    u_d = readD(training_user_set, user_num)
    i_d = readD(training_item_set, item_num)
    d_i_train = u_d
    d_j_train = i_d

    sparse_u_i = readTrainSparseMatrix(training_user_set, True, u_d, i_d)
    sparse_i_u = readTrainSparseMatrix(training_item_set, False, u_d, i_d)

    train_dataset = data_utils.BPRData(
            train_dict=training_user_set, num_item=item_num, num_ng=5, is_training=True,\
            data_set_count=training_set_count,all_rating=user_rating_set_all)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=2)

    testing_dataset_loss = data_utils.BPRData(
            train_dict=testing_user_set, num_item=item_num, num_ng=5, is_training=True,\
            data_set_count=testing_set_count,all_rating=user_rating_set_all)
    testing_loader_loss = DataLoader(testing_dataset_loss,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=0)

    val_dataset_loss = data_utils.BPRData(
            train_dict=val_user_set, num_item=item_num, num_ng=5, is_training=True,\
            data_set_count=val_set_count,all_rating=user_rating_set_all)
    val_loader_loss = DataLoader(val_dataset_loss,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=0)

    model = BPR(user_num, item_num, factor_num, sparse_u_i, sparse_i_u,
                d_i_train, d_j_train)
    model = model.to('cuda')

    # lr=0.005
    optimizer_bpr = torch.optim.Adam(model.parameters(),
                                     lr=args.lr)  #, betas=(0.5, 0.99))

    ########################### TRAINING #####################################

    # testing_loader_loss.dataset.ng_sample()

    print('--------training processing-------')
    count, best_hr = 0, 0
    # epoch=350
    for epoch in range(args.epoch):
        model.train()
        start_time = time.time()
        train_loader.dataset.ng_sample()
        # pdb.set_trace()
        print('train data of ng_sample is  end')
        # elapsed_time = time.time() - start_time
        # print(' time:'+str(round(elapsed_time,1)))
        # start_time = time.time()

        train_loss_sum = []
        train_loss_sum2 = []
        for user, item_i, item_j in train_loader:
            user = user.cuda()
            item_i = item_i.cuda()
            item_j = item_j.cuda()

            model.zero_grad()
            prediction_i, prediction_j, loss, loss2 = model(
                user, item_i, item_j)
            loss.backward()
            optimizer_bpr.step()
            count += 1
            train_loss_sum.append(loss.item())
            train_loss_sum2.append(loss2.item())
            # print(count)

        elapsed_time = time.time() - start_time
        train_loss = round(np.mean(train_loss_sum[:-1]),
                           4)  #最后一个可能不满足一个batch,所以去掉这样loss就是一致的可以求mean了
        train_loss2 = round(np.mean(train_loss_sum2[:-1]),
                            4)  #最后一个可能不满足一个batch,所以去掉这样loss就是一致的可以求mean了
        str_print_train = "epoch:" + str(epoch) + ' time:' + str(
            round(elapsed_time, 1)) + '\t train loss:' + str(
                train_loss) + "=" + str(train_loss2) + "+"
        print('--train--', elapsed_time)

        PATH_model = path_save_model_base + '/epoch' + str(epoch) + '.pt'
        torch.save(model.state_dict(), PATH_model)

        model.eval()
        # ######test and val###########
        val_loader_loss.dataset.ng_sample()
        val_loss = evaluate.metrics_loss(model, val_loader_loss, batch_size)
        # str_print_train+=' val loss:'+str(val_loss)

        testing_loader_loss.dataset.ng_sample()
        test_loss = evaluate.metrics_loss(model, testing_loader_loss,
                                          batch_size)
        print(str_print_train + ' val loss:' + str(val_loss) + ' test loss:' +
              str(test_loss))
        result_file.write(str_print_train + ' val loss:' + str(val_loss) +
                          ' test loss:' + str(test_loss))
        result_file.write('\n')
        result_file.flush()
예제 #2
0
        train_loss_sum2.append(loss2.item())
        # print(count)

    elapsed_time = time.time() - start_time
    train_loss = round(np.mean(train_loss_sum[:-1]),
                       4)  #最后一个可能不满足一个batch,所以去掉这样loss就是一致的可以求mean了
    train_loss2 = round(np.mean(train_loss_sum2[:-1]),
                        4)  #最后一个可能不满足一个batch,所以去掉这样loss就是一致的可以求mean了
    str_print_train = "epoch:" + str(epoch) + ' time:' + str(
        round(elapsed_time, 1)) + '\t train loss:' + str(
            train_loss) + "=" + str(train_loss2) + "+"
    print('--train--', elapsed_time)

    PATH_model = path_save_model_base + '/epoch' + str(epoch) + '.pt'
    torch.save(model.state_dict(), PATH_model)

    model.eval()
    # ######test and val###########
    val_loader_loss.dataset.ng_sample()
    val_loss = evaluate.metrics_loss(model, val_loader_loss, batch_size)
    # str_print_train+=' val loss:'+str(val_loss)

    testing_loader_loss.dataset.ng_sample()
    test_loss = evaluate.metrics_loss(model, testing_loader_loss, batch_size)
    print(str_print_train + ' val loss:' + str(val_loss) + ' test loss:' +
          str(test_loss))
    result_file.write(str_print_train + ' val loss:' + str(val_loss) +
                      ' test loss:' + str(test_loss))
    result_file.write('\n')
    result_file.flush()