def main(): import os import logging import glob from optparse import OptionParser from collections import defaultdict from mrec import load_sparse_matrix from mrec.evaluation.metrics import compute_main_metrics, compute_hit_rate from mrec.evaluation import Evaluator from mrec.evaluation.metrics import print_report from filename_conventions import get_testfile, get_recsfile logging.basicConfig(level=logging.INFO,format='[%(asctime)s] %(levelname)s: %(message)s') parser = OptionParser() parser.add_option('--input_format',dest='input_format',help='format of training dataset(s) tsv | csv | mm (matrixmarket) | fsm (fast_sparse_matrix)') parser.add_option('--test_input_format',dest='test_input_format',default='npz',help='format of test dataset(s) tsv | csv | mm (matrixmarket) | npz (numpy binary) (default: %default)') parser.add_option('--train',dest='train',help='glob specifying path(s) to training dataset(s) IMPORTANT: must be in quotes if it includes the * wildcard') parser.add_option('--recsdir',dest='recsdir',help='directory containing tsv files of precomputed recommendations') parser.add_option('--metrics',dest='metrics',default='main',help='which set of metrics to compute, main|hitrate (default: %default)') parser.add_option('--description',dest='description',help='description of model which generated the recommendations') metrics_funcs = {'main':compute_main_metrics, 'hitrate':compute_hit_rate} (opts,args) = parser.parse_args() if not opts.input_format or not opts.train or not opts.recsdir \ or opts.metrics not in metrics_funcs: parser.print_help() raise SystemExit opts.train = os.path.abspath(os.path.expanduser(opts.train)) opts.recsdir = os.path.abspath(os.path.expanduser(opts.recsdir)) evaluator = Evaluator(metrics_funcs[opts.metrics],max_items=20) trainfiles = glob.glob(opts.train) all_metrics = defaultdict(list) for trainfile in trainfiles: logging.info('processing {0}...'.format(trainfile)) testfile = get_testfile(trainfile) recsfile = get_recsfile(trainfile,opts.recsdir) testdata = load_sparse_matrix(opts.test_input_format,testfile).tocsr() cum_metrics,count = evaluator.process(testdata,recsfile,0,testdata.shape[0]) if cum_metrics is not None: for m in cum_metrics: all_metrics[m].append(float(cum_metrics[m])/count) print_report([opts.description],[all_metrics])
def main(): import os from optparse import OptionParser from ipyparallel import Client from mrec.evaluation.metrics import compute_main_metrics, compute_hit_rate from mrec.evaluation import Evaluator from mrec.evaluation.metrics import print_report logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s: %(message)s') parser = OptionParser() parser.add_option('--mb_per_task', dest='mb_per_task', type='int', default=None, help='approximate memory limit per task in MB, so total memory usage is num_engines * mb_per_task (default: share all available RAM across engines)') parser.add_option('--input_format', dest='input_format', help='format of training dataset(s) tsv | csv | mm (matrixmarket) | fsm (fast_sparse_matrix)') parser.add_option('--test_input_format', dest='test_input_format', default='npz', help='format of test dataset(s) tsv | csv | mm (matrixmarket) | npz (numpy binary) (default: %default)') parser.add_option('--train', dest='train', help='glob specifying path(s) to training dataset(s) IMPORTANT: must be in quotes if it includes the * wildcard') parser.add_option('--modeldir', dest='modeldir', help='directory containing trained models') parser.add_option('--outdir', dest='outdir', help='directory for output files') parser.add_option('--metrics', dest='metrics', default='main', help='which set of metrics to compute, main|hitrate (default: %default)') parser.add_option('--item_feature_format', dest='item_feature_format', help='format of item features tsv | csv | mm (matrixmarket) | npz (numpy arrays)') parser.add_option('--item_features', dest='item_features', help='path to sparse item features in tsv format (item_id,feature_id,val)') parser.add_option('--overwrite', dest='overwrite', action='store_true', default=False, help='overwrite existing files in outdir (default: %default)') parser.add_option('--packer', dest='packer', default='json', help='packer for IPython.parallel (default: %default)') parser.add_option('--add_module_paths', dest='add_module_paths', help='optional comma-separated list of paths to append to pythonpath (useful if you need to import uninstalled modules to IPython engines on a cluster)') metrics_funcs = {'main': compute_main_metrics, 'hitrate': compute_hit_rate} (opts, args) = parser.parse_args() if not opts.input_format or not opts.train or not opts.outdir \ or not opts.modeldir or opts.metrics not in metrics_funcs: parser.print_help() raise SystemExit opts.train = os.path.abspath(os.path.expanduser(opts.train)) opts.modeldir = os.path.abspath(os.path.expanduser(opts.modeldir)) opts.outdir = os.path.abspath(os.path.expanduser(opts.outdir)) # create an ipython client c = Client(packer=opts.packer) view = c.load_balanced_view() if opts.mb_per_task is None: import psutil num_engines = len(view) opts.mb_per_task = psutil.virtual_memory().available / ONE_MB / (num_engines + 1) # don't take *all* the memory if opts.add_module_paths: c[:].execute('import sys') for path in opts.add_module_paths.split(','): logging.info('adding {0} to pythonpath on all engines'.format(path)) c[:].execute("sys.path.append('{0}')".format(path)) evaluator = Evaluator(metrics_funcs[opts.metrics], max_items=20) trainfiles = glob.glob(opts.train) descriptions = set() all_metrics = defaultdict(list) for trainfile in trainfiles: logging.info('processing {0}...'.format(trainfile)) modelfile = get_modelfile(trainfile, opts.modeldir) testfile = get_testfile(trainfile) description, metrics = process(view, opts, modelfile, trainfile, testfile, opts.item_features, opts.outdir, evaluator) descriptions.add(description) if metrics is not None: for m in metrics: all_metrics[m].append(metrics[m]) description = ' AND '.join(descriptions) if len(descriptions) > 1: logging.warning('You are aggregating metrics from different models! {}'.format(description)) print_report([description], [all_metrics])
def main(): import os import logging import glob from optparse import OptionParser from collections import defaultdict from mrec import load_sparse_matrix from mrec.evaluation.metrics import compute_main_metrics, compute_hit_rate from mrec.evaluation import Evaluator from mrec.evaluation.metrics import print_report from filename_conventions import get_testfile, get_recsfile logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s: %(message)s') parser = OptionParser() parser.add_option( '--input_format', dest='input_format', help= 'format of training dataset(s) tsv | csv | mm (matrixmarket) | fsm (fast_sparse_matrix)' ) parser.add_option( '--test_input_format', dest='test_input_format', default='npz', help= 'format of test dataset(s) tsv | csv | mm (matrixmarket) | npz (numpy binary) (default: %default)' ) parser.add_option( '--train', dest='train', help= 'glob specifying path(s) to training dataset(s) IMPORTANT: must be in quotes if it includes the * wildcard' ) parser.add_option( '--recsdir', dest='recsdir', help='directory containing tsv files of precomputed recommendations') parser.add_option( '--metrics', dest='metrics', default='main', help='which set of metrics to compute, main|hitrate (default: %default)' ) parser.add_option( '--description', dest='description', help='description of model which generated the recommendations') metrics_funcs = {'main': compute_main_metrics, 'hitrate': compute_hit_rate} (opts, args) = parser.parse_args() if not opts.input_format or not opts.train or not opts.recsdir \ or opts.metrics not in metrics_funcs: parser.print_help() raise SystemExit opts.train = os.path.abspath(os.path.expanduser(opts.train)) opts.recsdir = os.path.abspath(os.path.expanduser(opts.recsdir)) evaluator = Evaluator(metrics_funcs[opts.metrics], max_items=20) trainfiles = glob.glob(opts.train) all_metrics = defaultdict(list) for trainfile in trainfiles: logging.info('processing {0}...'.format(trainfile)) testfile = get_testfile(trainfile) recsfile = get_recsfile(trainfile, opts.recsdir) testdata = load_sparse_matrix(opts.test_input_format, testfile).tocsr() cum_metrics, count = evaluator.process(testdata, recsfile, 0, testdata.shape[0]) if cum_metrics is not None: for m in cum_metrics: all_metrics[m].append(float(cum_metrics[m]) / count) print_report([opts.description], [all_metrics])
def main(): import os from optparse import OptionParser from IPython.parallel import Client from mrec.evaluation.metrics import compute_main_metrics, compute_hit_rate from mrec.evaluation import Evaluator from mrec import load_recommender from mrec.evaluation.metrics import print_report logging.basicConfig(level=logging.INFO,format='[%(asctime)s] %(levelname)s: %(message)s') parser = OptionParser() parser.add_option('-n','--num_engines',dest='num_engines',type='int',default=0,help='number of IPython engines to use') parser.add_option('--input_format',dest='input_format',help='format of training dataset(s) tsv | csv | mm (matrixmarket) | fsm (fast_sparse_matrix)') parser.add_option('--test_input_format',dest='test_input_format',default='npz',help='format of test dataset(s) tsv | csv | mm (matrixmarket) | npz (numpy binary) (default: %default)') parser.add_option('--train',dest='train',help='glob specifying path(s) to training dataset(s) IMPORTANT: must be in quotes if it includes the * wildcard') parser.add_option('--modeldir',dest='modeldir',help='directory containing trained models') parser.add_option('--outdir',dest='outdir',help='directory for output files') parser.add_option('--metrics',dest='metrics',default='main',help='which set of metrics to compute, main|hitrate (default: %default)') parser.add_option('--overwrite',dest='overwrite',action='store_true',default=False,help='overwrite existing files in outdir (default: %default)') parser.add_option('--packer',dest='packer',default='json',help='packer for IPython.parallel (default: %default)') parser.add_option('--add_module_paths',dest='add_module_paths',help='optional comma-separated list of paths to append to pythonpath (useful if you need to import uninstalled modules to IPython engines on a cluster)') metrics_funcs = {'main':compute_main_metrics, 'hitrate':compute_hit_rate} (opts,args) = parser.parse_args() if not opts.input_format or not opts.train or not opts.outdir or not opts.num_engines \ or not opts.modeldir or opts.metrics not in metrics_funcs: parser.print_help() raise SystemExit opts.train = os.path.abspath(os.path.expanduser(opts.train)) opts.modeldir = os.path.abspath(os.path.expanduser(opts.modeldir)) opts.outdir = os.path.abspath(os.path.expanduser(opts.outdir)) # create an ipython client c = Client(packer=opts.packer) view = c.load_balanced_view() if opts.add_module_paths: c[:].execute('import sys') for path in opts.add_module_paths.split(','): logging.info('adding {0} to pythonpath on all engines'.format(path)) c[:].execute("sys.path.append('{0}')".format(path)) evaluator = Evaluator(metrics_funcs[opts.metrics],max_items=20) trainfiles = glob.glob(opts.train) descriptions = set() all_metrics = defaultdict(list) for trainfile in trainfiles: logging.info('processing {0}...'.format(trainfile)) modelfile = get_modelfile(trainfile,opts.modeldir) testfile = get_testfile(trainfile) description,metrics = process(view,opts,modelfile,trainfile,testfile,opts.outdir,evaluator) descriptions.add(description) if metrics is not None: for m in metrics: all_metrics[m].append(metrics[m]) description = ' AND '.join(descriptions) if len(descriptions) > 1: logging.warn('You are aggregating metrics from different models! {}'.format(description)) print_report([description],[all_metrics])