def main(): args = parse_args() if args.seed is not None: print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) np.random.seed(seed=args.seed) # Check that GPUs are actually available use_cuda = not args.no_cuda and torch.cuda.is_available() # Create model model = NeuMF(2197225, 855776, mf_dim=64, mf_reg=0., mlp_layer_sizes=[256, 256, 128, 64], mlp_layer_regs=[0. for i in [256, 256, 128, 64]]) print(model) if use_cuda: # Move model and loss to GPU model = model.cuda() if args.load_ckp: ckp = torch.load(args.load_ckp) model.load_state_dict(ckp) if args.quantize: all_embeding = [ n for n, m in model.named_modules() if isinstance(m, nn.Embedding) ] all_linear = [ n for n, m in model.named_modules() if isinstance(m, nn.Linear) ] all_relu = [ n for n, m in model.named_modules() if isinstance(m, nn.ReLU) ] all_relu6 = [ n for n, m in model.named_modules() if isinstance(m, nn.ReLU6) ] # layers = all_relu + all_relu6 + all_linear layers = all_embeding replacement_factory = { nn.ReLU: ActivationModuleWrapperPost, nn.ReLU6: ActivationModuleWrapperPost, nn.Linear: ParameterModuleWrapperPost, nn.Embedding: ActivationModuleWrapperPost } mq = ModelQuantizer(model, args, layers, replacement_factory) # mq.log_quantizer_state(ml_logger, -1) test_users, test_items, dup_mask, real_indices, K, samples_per_user, num_user = data_loader( args.data) data = NcfData(test_users, test_items, dup_mask, real_indices, K, samples_per_user, num_user) hr, ndcg = val(model, data) print('') print('') print('HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}'.format(K=K, hit_rate=hr, ndcg=ndcg))
def main(args, ml_logger): # Fix the seed random.seed(args.seed) if not args.dont_fix_np_seed: np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Check that GPUs are actually available use_cuda = not args.no_cuda and torch.cuda.is_available() # Create model model = NeuMF(2197225, 855776, mf_dim=64, mf_reg=0., mlp_layer_sizes=[256, 256, 128, 64], mlp_layer_regs=[0. for i in [256, 256, 128, 64]]) print(model) if use_cuda: # Move model and loss to GPU model = model.cuda() model.device = torch.device('cuda:{}'.format(0)) if args.load_ckp: ckp = torch.load(args.load_ckp) model.load_state_dict(ckp) all_embeding = [ n for n, m in model.named_modules() if isinstance(m, nn.Embedding) ] all_linear = [ n for n, m in model.named_modules() if isinstance(m, nn.Linear) ] all_relu = [n for n, m in model.named_modules() if isinstance(m, nn.ReLU)] all_relu6 = [ n for n, m in model.named_modules() if isinstance(m, nn.ReLU6) ] layers = all_relu + all_relu6 + all_linear + all_embeding replacement_factory = { nn.ReLU: ActivationModuleWrapperPost, nn.ReLU6: ActivationModuleWrapperPost, nn.Linear: ParameterModuleWrapperPost, nn.Embedding: ActivationModuleWrapperPost } mq = ModelQuantizer(model, args, layers, replacement_factory) # mq.log_quantizer_state(ml_logger, -1) test_users, test_items, dup_mask, real_indices, K, samples_per_user, num_user = data_loader( args.data) data = NcfData(test_users, test_items, dup_mask, real_indices, K, samples_per_user, num_user) cal_data = CalibrationSet('ml-20mx16x32/cal_set').cuda() cal_data.split(batch_size=10000) criterion = nn.BCEWithLogitsLoss(reduction='mean') criterion = criterion.cuda() print("init_method: {}, qtype {}".format(args.init_method, args.qtype)) # evaluate to initialize dynamic clipping loss = evaluate_calibration(model, cal_data, criterion) print("Initial loss: {:.4f}".format(loss)) # get clipping values init = get_clipping(mq) # evaluate hr, ndcg = validate(model, data) ml_logger.log_metric('HR init', hr, step='auto') # run optimizer min_options = {} if args.maxiter is not None: min_options['maxiter'] = args.maxiter if args.maxfev is not None: min_options['maxfev'] = args.maxfev _iter = count(0) def local_search_callback(x): it = next(_iter) loss = run_inference_on_calibration(x, model, mq, cal_data, criterion) print("\n[{}]: Local search callback".format(it)) print("loss: {:.4f}\n".format(loss)) res = opt.minimize(lambda scales: run_inference_on_calibration( scales, model, mq, cal_data, criterion), np.array(init), method=args.min_method, options=min_options, callback=local_search_callback) print(res) scales = res.x set_clipping(mq, scales, model.device) # evaluate hr, ndcg = validate(model, data) ml_logger.log_metric('HR Powell', hr, step='auto')