import sys import numpy import matplotlib import powerlaw matplotlib.use("GTK3Agg") import matplotlib.pyplot as plt from wallhack.rankingexp.DatasetUtils import DatasetUtils """ Do some basic analysis on the recommendation datasets. """ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) #X, U, V = DatasetUtils.syntheticDataset1() #X = DatasetUtils.syntheticDataset2() X = DatasetUtils.movieLens(quantile=100) #X = DatasetUtils.flixster(quantile=100) #X = DatasetUtils.mendeley(quantile=50) print(X.shape) m, n = X.shape userCounts = X.sum(1) itemCounts = X.sum(0) results = powerlaw.Fit(itemCounts, discrete=True, xmax=n) print(results.power_law.alpha) print(results.power_law.xmin) print(results.power_law.xmax) u = 5
dataArgs = argparse.Namespace() # Arguments related to the algorithm defaultAlgoArgs = argparse.Namespace() # data args parser # dataParser = argparse.ArgumentParser(description="", add_help=False) dataParser.add_argument("-h", "--help", action="store_true", help="show this help message and exit") devNull, remainingArgs = dataParser.parse_known_args(namespace=dataArgs) if dataArgs.help: helpParser = argparse.ArgumentParser(description="", add_help=False, parents=[dataParser, RankingExpHelper.newAlgoParser(defaultAlgoArgs)]) helpParser.print_help() exit() #Load/create the dataset X = DatasetUtils.movieLens() (m, n) = X.shape defaultAlgoArgs.u = 0.1 defaultAlgoArgs.ks = numpy.array([32, 64, 128]) dataArgs.extendedDirName = "" dataArgs.extendedDirName += "MovieLens" # print args # logging.info("Running on " + dataArgs.extendedDirName) logging.info("Data params:") keys = list(vars(dataArgs).keys()) keys.sort() for key in keys: logging.info(" " + str(key) + ": " + str(dataArgs.__getattribute__(key)))