def main():
    args = parse_args()

    state_dict = torch.load(args.model_checkpoint_path,
                            map_location=torch.device("cpu"))

    layers = [state_dict["mlp.0.weight"].size()[1]]
    layer_keys = sorted([
        k for k in state_dict.keys()
        if (k.startswith("mlp") and k.endswith("bias"))
    ])
    layers.extend([state_dict[k].size()[0] for k in layer_keys])

    n_items = state_dict["mf_item_embed.weight"].size()[0]
    n_users = state_dict["mf_user_embed.weight"].size()[0]

    model = NeuMF(
        nb_users=n_users,
        nb_items=n_items,
        mf_dim=state_dict["mf_item_embed.weight"].size()[1],
        mlp_layer_sizes=layers,
        dropout=0.5,
    )

    model.load_state_dict(state_dict)

    model.eval()

    users = torch.LongTensor(np.full(n_items, n_users - 1))
    items = torch.LongTensor(np.arange(n_items, dtype=np.int64))

    predictions = model(users, items, sigmoid=True)

    predictions = predictions.detach().numpy().squeeze()

    if args.output_dir != "./":
        try:
            os.makedirs(args.output_dir, exist_ok=True)
        except OSError as err:
            print("Failed to create output directory: {}".format(err))
            sys.exit(-1)

    names = []
    with open(args.movie_db_file) as fh:
        for line in fh:
            mid, _, name = line.partition(",")
            name, _, tail = name.rpartition(",")
            name = name.strip('"')
            names.append(name)

    predictions_file = os.path.join(args.output_dir, "predictions.csv")
    with open(predictions_file, "wt") as fh:
        fh.write("Movie, Predicted Rating\n")
        argsorts = np.argsort(predictions)[::-1]
        for idx in argsorts:
            fh.write("{}, {:0.2f}\n".format(names[idx], predictions[idx] * 5))

    print("Predictions saved to {}".format(predictions_file))

    return
예제 #2
0
def main():
    args = parse_args()
    dllogger.init(backends=[
        dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
                                   filename=args.log_path),
        dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)
    ])

    dllogger.log(data=vars(args), step='PARAMETER')

    model = NeuMF(nb_users=args.n_users,
                  nb_items=args.n_items,
                  mf_dim=args.factors,
                  mlp_layer_sizes=args.layers,
                  dropout=args.dropout)

    model = model.cuda()

    if args.load_checkpoint_path:
        state_dict = torch.load(args.load_checkpoint_path)
        model.load_state_dict(state_dict)

    if args.opt_level == "O2":
        model = amp.initialize(model,
                               opt_level=args.opt_level,
                               keep_batchnorm_fp32=False,
                               loss_scale='dynamic')
    model.eval()

    users = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_users)
    items = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_items)

    latencies = []
    for _ in range(args.num_batches):
        torch.cuda.synchronize()
        start = time.time()
        predictions = model(users, items, sigmoid=True)
        torch.cuda.synchronize()
        latencies.append(time.time() - start)

    dllogger.log(data={
        'batch_size':
        args.batch_size,
        'best_inference_throughput':
        args.batch_size / min(latencies),
        'best_inference_latency':
        min(latencies),
        'mean_inference_throughput':
        args.batch_size / np.mean(latencies),
        'mean_inference_latency':
        np.mean(latencies),
        'inference_latencies':
        latencies
    },
                 step=tuple())
    dllogger.flush()
    return
예제 #3
0
def main():
    args = parse_args()
    dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
                                                       filename=args.log_path),
                            dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)])

    dllogger.log(data=vars(args), step='PARAMETER')

    model = NeuMF(nb_users=args.n_users, nb_items=args.n_items, mf_dim=args.factors,
                  mlp_layer_sizes=args.layers, dropout=args.dropout)

    model = model.cuda()

    if args.load_checkpoint_path:
        state_dict = torch.load(args.load_checkpoint_path)
        model.load_state_dict(state_dict)

    if args.fp16:
        model.half()
    model.eval()
    
    batch_sizes = args.batch_sizes.split(',')
    batch_sizes = [int(s) for s in batch_sizes]

    result_data = {}
    for batch_size in batch_sizes:
        print('benchmarking batch size: ', batch_size)
        users = torch.cuda.LongTensor(batch_size).random_(0, args.n_users)
        items = torch.cuda.LongTensor(batch_size).random_(0, args.n_items)

        latencies = []
        for _ in range(args.num_batches):
            torch.cuda.synchronize()
            start = time.time()
            _ = model(users, items, sigmoid=True)
            torch.cuda.synchronize()
            latencies.append(time.time() - start)

        result_data[f'batch_{batch_size}_mean_throughput'] = batch_size / np.mean(latencies)
        result_data[f'batch_{batch_size}_mean_latency'] = np.mean(latencies)
        result_data[f'batch_{batch_size}_p90_latency'] = np.percentile(latencies, 0.90)
        result_data[f'batch_{batch_size}_p95_latency'] = np.percentile(latencies, 0.95)
        result_data[f'batch_{batch_size}_p99_latency'] = np.percentile(latencies, 0.99)

    dllogger.log(data=result_data, step=tuple())
    dllogger.flush()
    return
def main():
    log_hardware()
    args = parse_args()
    log_args(args)

    model = NeuMF(nb_users=args.n_users,
                  nb_items=args.n_items,
                  mf_dim=args.factors,
                  mlp_layer_sizes=args.layers,
                  dropout=args.dropout)

    model = model.cuda()

    if args.load_checkpoint_path:
        state_dict = torch.load(args.load_checkpoint_path)
        model.load_state_dict(state_dict)

    if args.opt_level == "O2":
        model = amp.initialize(model,
                               opt_level=args.opt_level,
                               keep_batchnorm_fp32=False,
                               loss_scale='dynamic')
    model.eval()

    users = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_users)
    items = torch.cuda.LongTensor(args.batch_size).random_(0, args.n_items)

    latencies = []
    for _ in range(args.num_batches):
        torch.cuda.synchronize()
        start = time.time()
        predictions = model(users, items, sigmoid=True)
        torch.cuda.synchronize()
        latencies.append(time.time() - start)

    LOGGER.log(key='batch_size', value=args.batch_size)
    LOGGER.log(key='best_inference_throughput',
               value=args.batch_size / min(latencies))
    LOGGER.log(key='best_inference_latency', value=min(latencies))
    LOGGER.log(key='inference_latencies', value=latencies)
    return