Esempio n. 1
0
    def __init__(self, train_fname, nb_neg):
        self._load_train_matrix(train_fname)
        self.nb_neg = nb_neg

        mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_TRAIN_NEG_GEN,
                             value=nb_neg)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_SAMPLE_TRAIN_REPLACEMENT)
Esempio n. 2
0
def val_epoch(model,
              ratings,
              negs,
              K,
              use_cuda=True,
              output=None,
              epoch=None,
              processes=1):
    if epoch is None:
        print("Initial evaluation")
    else:
        print("Epoch {} evaluation".format(epoch))

    mlperf_log.ncf_print(key=mlperf_log.EVAL_START, value=epoch)
    start = datetime.now()
    model.eval()
    if processes > 1:
        context = mp.get_context('spawn')
        _eval_one = partial(eval_one, model=model, K=K, use_cuda=use_cuda)
        with context.Pool(processes=processes) as workers:
            hits_ndcg_numpred = workers.starmap(_eval_one, zip(ratings, negs))
        hits, ndcgs, num_preds = zip(*hits_ndcg_numpred)
    else:
        hits, ndcgs, num_preds = [], [], []
        for rating, items in zip(ratings, negs):
            hit, ndcg, num_pred = eval_one(rating,
                                           items,
                                           model,
                                           K,
                                           use_cuda=use_cuda)
            hits.append(hit)
            ndcgs.append(ndcg)
            num_preds.append(num_pred)

    hits = np.array(hits, dtype=np.float32)
    ndcgs = np.array(ndcgs, dtype=np.float32)

    assert len(set(num_preds)) == 1
    num_neg = num_preds[0] - 1  # one true positive, many negatives
    mlperf_log.ncf_print(key=mlperf_log.EVAL_SIZE,
                         value={
                             "epoch": epoch,
                             "value": len(hits) * (1 + num_neg)
                         })
    mlperf_log.ncf_print(key=mlperf_log.EVAL_HP_NUM_USERS, value=len(hits))
    mlperf_log.ncf_print(key=mlperf_log.EVAL_HP_NUM_NEG, value=num_neg)

    end = datetime.now()
    if output is not None:
        result = OrderedDict()
        result['timestamp'] = datetime.now()
        result['duration'] = end - start
        result['epoch'] = epoch
        result['K'] = K
        result['hit_rate'] = np.mean(hits)
        result['NDCG'] = np.mean(ndcgs)
        utils.save_result(result, output)

    return hits, ndcgs
Esempio n. 3
0
    def __init__(self, nb_users, nb_items, mf_dim, mf_reg, mlp_layer_sizes,
                 mlp_layer_regs):
        if len(mlp_layer_sizes) != len(mlp_layer_regs):
            raise RuntimeError('u dummy, layer_sizes != layer_regs!')
        if mlp_layer_sizes[0] % 2 != 0:
            raise RuntimeError('u dummy, mlp_layer_sizes[0] % 2 != 0')
        super(NeuMF, self).__init__()
        nb_mlp_layers = len(mlp_layer_sizes)

        mlperf_log.ncf_print(key=mlperf_log.MODEL_HP_MF_DIM)

        # TODO: regularization?
        self.mf_user_embed = nn.Embedding(nb_users, mf_dim)
        self.mf_item_embed = nn.Embedding(nb_items, mf_dim)
        self.mlp_user_embed = nn.Embedding(nb_users, mlp_layer_sizes[0] // 2)
        self.mlp_item_embed = nn.Embedding(nb_items, mlp_layer_sizes[0] // 2)

        mlperf_log.ncf_print(key=mlperf_log.MODEL_HP_MLP_LAYER_SIZES,
                             value=mlp_layer_sizes)
        self.mlp = nn.ModuleList()
        for i in range(1, nb_mlp_layers):
            self.mlp.extend(
                [nn.Linear(mlp_layer_sizes[i - 1],
                           mlp_layer_sizes[i])])  # noqa: E501

        self.final = nn.Linear(mlp_layer_sizes[-1] + mf_dim, 1)

        self.mf_user_embed.weight.data.normal_(0., 0.01)
        self.mf_item_embed.weight.data.normal_(0., 0.01)
        self.mlp_user_embed.weight.data.normal_(0., 0.01)
        self.mlp_item_embed.weight.data.normal_(0., 0.01)

        def golorot_uniform(layer):
            fan_in, fan_out = layer.in_features, layer.out_features
            limit = np.sqrt(6. / (fan_in + fan_out))
            layer.weight.data.uniform_(-limit, limit)

        def lecunn_uniform(layer):
            fan_in, fan_out = layer.in_features, layer.out_features  # noqa: F841, E501
            limit = np.sqrt(3. / fan_in)
            layer.weight.data.uniform_(-limit, limit)

        for layer in self.mlp:
            if type(layer) != nn.Linear:
                continue
            golorot_uniform(layer)
        lecunn_uniform(self.final)
Esempio n. 4
0
def main():
    # Note: The run start is in convert.py

    args = parse_args()
    if args.seed is not None:
        print("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = "./run/neumf/{}".format(config['timestamp'])
    print("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    t1 = time.time()
    # Load Data
    print('Loading data')
    train_dataset = CFTrainDataset(
        os.path.join(args.data, TRAIN_RATINGS_FILENAME), args.negative_samples)

    mlperf_log.ncf_print(key=mlperf_log.INPUT_BATCH_SIZE,
                         value=args.batch_size)
    mlperf_log.ncf_print(
        key=mlperf_log.INPUT_ORDER)  # set shuffle=True in DataLoader
    train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
    test_ratings = load_test_ratings(
        os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))
    nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    print('Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d' %
          (time.time() - t1, nb_users, nb_items, train_dataset.mat.nnz,
           len(test_ratings)))

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=args.factors,
                  mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers])
    print(model)
    print("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    # Add optimizer and loss to graph
    mlperf_log.ncf_print(key=mlperf_log.TRAIN_LEARN_RATE,
                         value=args.learning_rate)
    beta1, beta2, epsilon = 0.9, 0.999, 1e-8
    mlperf_log.ncf_print(key=mlperf_log.OPT_NAME, value="Adam")
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA1, value=beta1)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_BETA2, value=beta2)
    mlperf_log.ncf_print(key=mlperf_log.OPT_HP_ADAM_EPSILON, value=epsilon)
    optimizer = torch.optim.Adam(model.parameters(),
                                 betas=(beta1, beta2),
                                 lr=args.learning_rate,
                                 eps=epsilon)

    mlperf_log.ncf_print(key=mlperf_log.MODEL_HP_LOSS_FN, value=mlperf_log.BCE)
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        # Move model and loss to GPU
        model = model.cuda()
        criterion = criterion.cuda()

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    # Calculate initial Hit Ratio and NDCG
    hits, ndcgs = val_epoch(model,
                            test_ratings,
                            test_negs,
                            args.topk,
                            use_cuda=use_cuda,
                            processes=args.processes)
    print('Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'.format(
        K=args.topk, hit_rate=np.mean(hits), ndcg=np.mean(ndcgs)))

    success = False
    mlperf_log.ncf_print(key=mlperf_log.TRAIN_LOOP)
    for epoch in range(args.epochs):
        mlperf_log.ncf_print(key=mlperf_log.TRAIN_EPOCH, value=epoch)
        model.train()
        losses = utils.AverageMeter()

        mlperf_log.ncf_print(key=mlperf_log.INPUT_HP_NUM_NEG,
                             value=train_dataset.nb_neg)
        mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_TRAIN_NEG_GEN)
        begin = time.time()
        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                label = label.cuda(async=True)

            outputs = model(user, item)
            loss = criterion(outputs, label)
            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Save stats to file
            description = (
                'Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                    epoch, loss=losses))
            loader.set_description(description)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                output=valid_results_file,
                                epoch=epoch,
                                processes=args.processes)
        mlperf_log.ncf_print(key=mlperf_log.EVAL_ACCURACY,
                             value={
                                 "epoch": epoch,
                                 "value": float(np.mean(hits))
                             })
        mlperf_log.ncf_print(key=mlperf_log.EVAL_TARGET, value=args.threshold)
        mlperf_log.ncf_print(key=mlperf_log.EVAL_STOP)
        val_time = time.time() - begin
        print(
            'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f},'
            ' train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format(
                epoch=epoch,
                K=args.topk,
                hit_rate=np.mean(hits),
                ndcg=np.mean(ndcgs),
                train_time=train_time,
                val_time=val_time))
        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                print("Hit threshold of {}".format(args.threshold))
                success = True
                break

    mlperf_log.ncf_print(key=mlperf_log.RUN_STOP, value={"success": success})
    mlperf_log.ncf_print(key=mlperf_log.RUN_FINAL)
Esempio n. 5
0
def main():
    args = parse_args()
    np.random.seed(args.seed)

    print("Loading raw data from {}".format(args.path))
    df = implicit_load(args.path, sort=False)

    print("Filtering out users with less than {} ratings".format(MIN_RATINGS))
    grouped = df.groupby(USER_COLUMN)
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_MIN_RATINGS,
                         value=MIN_RATINGS)
    df = grouped.filter(lambda x: len(x) >= MIN_RATINGS)

    print("Mapping original user and item IDs to new sequential IDs")
    original_users = df[USER_COLUMN].unique()
    original_items = df[ITEM_COLUMN].unique()

    user_map = {user: index for index, user in enumerate(original_users)}
    item_map = {item: index for index, item in enumerate(original_items)}

    df[USER_COLUMN] = df[USER_COLUMN].apply(lambda user: user_map[user])
    df[ITEM_COLUMN] = df[ITEM_COLUMN].apply(lambda item: item_map[item])

    assert df[USER_COLUMN].max() == len(original_users) - 1
    assert df[ITEM_COLUMN].max() == len(original_items) - 1

    print("Creating list of items for each user")
    # Need to sort before popping to get last item
    df.sort_values(by='timestamp', inplace=True)
    all_ratings = set(zip(df[USER_COLUMN], df[ITEM_COLUMN]))
    user_to_items = defaultdict(list)
    for row in tqdm(df.itertuples(), desc='Ratings', total=len(df)):
        user_to_items[getattr(row, USER_COLUMN)].append(
            getattr(row, ITEM_COLUMN))  # noqa: E501

    test_ratings = []
    test_negs = []
    all_items = set(range(len(original_items)))

    print("Generating {} negative samples for each user".format(
        args.negatives))
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_NUM_EVAL,
                         value=args.negatives)

    # The default of np.random.choice is replace=True
    mlperf_log.ncf_print(key=mlperf_log.PREPROC_HP_SAMPLE_EVAL_REPLACEMENT,
                         value=True)

    #===========================================================================
    #== First random operation triggers the clock start. =======================
    #===========================================================================
    mlperf_log.ncf_print(key=mlperf_log.RUN_START)
    mlperf_log.ncf_print(key=mlperf_log.INPUT_STEP_EVAL_NEG_GEN)

    for user in tqdm(range(len(original_users)),
                     desc='Users',
                     total=len(original_users)):  # noqa: E501
        test_item = user_to_items[user].pop()

        all_ratings.remove((user, test_item))
        all_negs = all_items - set(user_to_items[user])
        all_negs = sorted(list(all_negs))  # determinism

        test_ratings.append((user, test_item))
        test_negs.append(list(np.random.choice(all_negs, args.negatives)))

    print("Saving train and test CSV files to {}".format(args.output))
    df_train_ratings = pd.DataFrame(list(all_ratings))
    df_train_ratings['fake_rating'] = 1
    df_train_ratings.to_csv(os.path.join(args.output, TRAIN_RATINGS_FILENAME),
                            index=False,
                            header=False,
                            sep='\t')

    mlperf_log.ncf_print(key=mlperf_log.INPUT_SIZE,
                         value=len(df_train_ratings))

    df_test_ratings = pd.DataFrame(test_ratings)
    df_test_ratings['fake_rating'] = 1
    df_test_ratings.to_csv(os.path.join(args.output, TEST_RATINGS_FILENAME),
                           index=False,
                           header=False,
                           sep='\t')

    df_test_negs = pd.DataFrame(test_negs)
    df_test_negs.to_csv(os.path.join(args.output, TEST_NEG_FILENAME),
                        index=False,
                        header=False,
                        sep='\t')