if args.temperature > 0.0: currentMin = numpy.amin(loggingPolicy.multinomials[i]) else: currentMin = 1.0 / i if currentMin < smallestProb: smallestProb = currentMin print("Parallel:main [LOG] Temperature:", args.temperature, "\t Smallest marginal probability:", smallestProb, flush=True) metric = None if args.value_metric == "DCG": metric = Metrics.DCG(data, args.length_ranking) elif args.value_metric == "NDCG": metric = Metrics.NDCG(data, args.length_ranking, args.replacement) elif args.value_metric == "ERR": metric = Metrics.ERR(data, args.length_ranking) elif args.value_metric == "MaxRel": metric = Metrics.MaxRelevance(data, args.length_ranking) elif args.value_metric == "SumRel": metric = Metrics.SumRelevance(data, args.length_ranking) else: print("Parallel:main [ERR] Metric %s not supported." % args.value_metric, flush=True) sys.exit(0) estimator = None if args.approach == "OnPolicy": estimator = Estimators.OnPolicy(args.length_ranking, loggingPolicy, targetPolicy, metric) estimator.estimateAll() elif args.approach == "IPS": if args.temperature > 0.0: estimator = Estimators.NonUniformIPS(args.length_ranking, loggingPolicy, targetPolicy)
trainMetric = None validationMetric = None testMetric = None if args.value_metric == "DCG": trainMetric = Metrics.DCG(trainDataset, args.length_ranking) validationMetric = Metrics.DCG(validationDataset, args.length_ranking) testMetric = Metrics.DCG(testDataset, args.length_ranking) elif args.value_metric == "NDCG": trainMetric = Metrics.NDCG(trainDataset, args.length_ranking, False) validationMetric = Metrics.NDCG(validationDataset, args.length_ranking, False) testMetric = Metrics.NDCG(testDataset, args.length_ranking, False) elif args.value_metric == "ERR": trainMetric = Metrics.ERR(trainDataset, args.length_ranking) validationMetric = Metrics.ERR(validationDataset, args.length_ranking) testMetric = Metrics.ERR(testDataset, args.length_ranking) else: print("Optimization:main [ERR] Metric %s not supported." % args.value_metric, flush=True) sys.exit(0) #Fully supervised baseline supervisedPolicy = Policy.DeterministicPolicy(trainDataset, args.ranker, hyper_params=hyperParams, regress_gains=False, weighted_ls=True)