def create_diststats_jobs(context, distances, streams, maxd): # Compmake storage for results store = StoreResults() for id_distance in distances: for delta in range(1, maxd): for i, id_stream in enumerate(streams): key = dict(id_distance=id_distance, delta=delta, stream=id_stream) job_id = '%s-log%s-delta%s' % (id_distance, i, delta) store[key] = context.comp_config(compute_dist_stats, id_distance, id_stream, delta, job_id=job_id) for id_distance in distances: subset = store.select(id_distance=id_distance) stats = context.comp(compute_statistics, subset) report = context.comp(report_statistics, id_distance, stats) context.add_report(report, 'bydistance', id_distance=id_distance) subsets = create_subsets(distances) job_report(context, subsets, store)
def jobs_tables_by_algo_rows_sample_groups(context, samples_groups, tables): source_descs = comp_store(Stats.all_descriptions()) # Crate a new store, add the key "group" allstats = StoreResults() for id_group, samples in samples_groups.items(): for key, value in samples.items(): nkey = dict(id_group=id_group, **key) allstats[nkey] = value for id_statstable, stats in tables.items(): for id_algo, samples in allstats.groups_by_field_value('id_algo'): job_id = 'byalgo-%s-%s' % (id_algo, id_statstable) r = context.comp( table_by_rows, "byalgo-rows-sample-groups-%s-%s" % (id_algo, id_statstable), samples=samples, rows_field='id_group', # rows = tc cols_fields=stats, # which statistics for each col source_descs=source_descs, job_id=job_id) report_attrs = dict(id_statstable=id_statstable) report_attrs.update(samples.fields_with_unique_values()) assert report_attrs['id_algo'] == id_algo context.add_report(r, 'byalgo-rows-sample-groups', **report_attrs)
def jobs_tables_by_algo_rows_sample_groups(context, samples_groups, tables): source_descs = comp_store(Stats.all_descriptions()) # Crate a new store, add the key "group" allstats = StoreResults() for id_group, samples in samples_groups.items(): for key, value in samples.items(): nkey = dict(id_group=id_group, **key) allstats[nkey] = value for id_statstable, stats in tables.items(): for id_algo, samples in allstats.groups_by_field_value('id_algo'): job_id = 'byalgo-%s-%s' % (id_algo, id_statstable) r = context.comp(table_by_rows, "byalgo-rows-sample-groups-%s-%s" % (id_algo, id_statstable), samples=samples, rows_field='id_group', # rows = tc cols_fields=stats, # which statistics for each col source_descs=source_descs, job_id=job_id) report_attrs = dict(id_statstable=id_statstable) report_attrs.update(samples.fields_with_unique_values()) assert report_attrs['id_algo'] == id_algo context.add_report(r, 'byalgo-rows-sample-groups', **report_attrs)
def define_jobs_context(self, context): sizes = np.linspace(16, 256, 32).astype("int") # sizes = [16, 32, 64, 128, 256, 512] nobs = 500 streams = ["test_gauss_drx1"] estimators = [ "test_ddsest_unc_refine0", "test_ddsest_unc_refine0ns", # don't stop "test_ddsest_unc_fast_order", ] max_displs = [0.1, 0.15, 0.2, 0.25, 0.3] def should_do(estimator, shape): if estimator in ["test_ddsest_unc_refine0", "test_ddsest_unc_refine0ns"]: return True if estimator == "test_ddsest_unc_fast_order": return shape <= 128 assert False, estimator results = StoreResults() comp_stats = StoreResults() combs = iterate_context_names_quartet(context, sizes, max_displs, streams, estimators) for c, shape, max_displ, stream, estimator in combs: if not should_do(estimator, shape): continue id_stream = stream + "_%s_%s" % (shape, nobs) key = dict( length=nobs, shape=shape, stream=stream, estimator=estimator, id_stream=id_stream, max_displ=max_displ ) learned = c.subtask(DDSLLearn, stream=id_stream, estimator=estimator, max_displ=max_displ) results[key] = learned comp_stats[key] = compmake_execution_stats(learned) source_descs = {} # For each sample, show the cpu for each algorithm jobs_tables_by_sample( context, id_table="cpu", allstats=comp_stats, one_table_for_each="id_stream", cols_fields=["cpu_time", "wall_time"], rows_field="estimator", source_descs=source_descs, ) estimators_subsets = get_estimators_subsets(estimators) # one plot for each group for id_group, estimators in estimators_subsets.items(): c = context.child(id_group) group_runs = comp_stats.select(lambda k: k["estimator"] in estimators) report = c.comp(report_cputime, group_runs) c.add_report(report, "cputime", group=id_group)
def define_jobs_context(self, context): sizes = np.linspace(16, 256, 32).astype('int') # sizes = [16, 32, 64, 128, 256, 512] nobs = 500 streams = ['test_gauss_drx1'] estimators = ['test_ddsest_unc_refine0', 'test_ddsest_unc_refine0ns', # don't stop 'test_ddsest_unc_fast_order'] max_displs = [0.1, 0.15, 0.2, 0.25, 0.3] def should_do(estimator, shape): if estimator in ['test_ddsest_unc_refine0', 'test_ddsest_unc_refine0ns']: return True if estimator == 'test_ddsest_unc_fast_order': return shape <= 128 assert False, estimator results = StoreResults() comp_stats = StoreResults() combs = iterate_context_names_quartet(context, sizes, max_displs, streams, estimators) for c, shape, max_displ, stream, estimator in combs: if not should_do(estimator, shape): continue id_stream = stream + '_%s_%s' % (shape, nobs) key = dict(length=nobs, shape=shape, stream=stream, estimator=estimator, id_stream=id_stream, max_displ=max_displ) learned = c.subtask(DDSLLearn, stream=id_stream, estimator=estimator, max_displ=max_displ) results[key] = learned comp_stats[key] = compmake_execution_stats(learned) source_descs = {} # For each sample, show the cpu for each algorithm jobs_tables_by_sample(context, id_table='cpu', allstats=comp_stats, one_table_for_each='id_stream', cols_fields=[ 'cpu_time', 'wall_time', ], rows_field='estimator', source_descs=source_descs) estimators_subsets = get_estimators_subsets(estimators) # one plot for each group for id_group, estimators in estimators_subsets.items(): c = context.child(id_group) group_runs = comp_stats.select(lambda k: k['estimator'] in estimators) report = c.comp(report_cputime, group_runs) c.add_report(report, 'cputime', group=id_group)
def create_predstats_jobs(config, distances, streams, id_discdds, rm, maxd): # Compmake storage for results store = StoreResults() # Try to instance it # dds = config.discdds.instance(id_discdds) for delta in range(0, maxd): for i, (id_stream, id_dds) in enumerate(itertools.product(streams, id_discdds)): key = dict(delta=delta, id_stream=id_stream, id_discdds=id_dds) job_id = 'pred-%s-log%s-delta%s' % (id_dds, i, delta) store[key] = comp(compute_predstats, config, id_dds, id_stream, delta, distances, job_id=job_id) comp(discdds_report, id_discdds, store, rm) for id_dds in id_discdds: # pdb.set_trace() subsets = create_subsets(distances) job_report_one(subsets, id_dds, store, rm)
def __init__(self, context): from quickapp.compmake_context import CompmakeContext assert isinstance(context, CompmakeContext), context from reprep.report_utils import StoreResults self.allresources = StoreResults() self.providers = defaultdict(list) # rtype => list of providers self.make_prefix = {} # rtype => function to make prefix self._context = context
def create_uncert_stats_jobs(config, id_ddss, id_streams, max_delta, outdir): store = StoreResults() id_distances = ['L2', 'L2w'] for id_dds in id_ddss: for id_stream in id_streams: for delta in range(1, max_delta): key = dict(id_discdds=id_dds, id_stream=id_stream, delta=delta) store[key] = comp(compute_predstats, config, id_dds, id_stream, delta, id_distances) return store
def sort_by_type(allreports_filename): type2reports = {} for report_type, xs in allreports_filename.groups_by_field_value('report'): fields = xs.remove_field('report') # print(fields) from reprep.report_utils import StoreResults res = StoreResults() for k, v in list(fields.items()): res[k] = v type2reports[report_type] = res return type2reports
def __init__(self, context, outdir, index_filename=None): # TODO: remove context self.context = context self.outdir = outdir if index_filename is None: index_filename = os.path.join(self.outdir, 'report_index.html') self.index_filename = index_filename from reprep.report_utils import StoreResults self.allreports = StoreResults() self.allreports_filename = StoreResults() # report_type -> set of keys necessary self._report_types_format = {} self.html_resources_prefix = '' # check if we are called more than once; would be a bug self.index_job_created = False self.static_dir = os.path.join(self.outdir, 'reprep-static')
def __init__(self, outdir, index_filename=None): self.outdir = outdir if index_filename is None: index_filename = os.path.join(self.outdir, 'report_index.html') self.index_filename = index_filename self.allreports = StoreResults() self.allreports_filename = StoreResults() # report_type -> set of keys necessary self._report_types_format = {} self.html_resources_prefix = '' # check if we are called more than once; would be a bug self.index_job_created = False
def comp_comb(function, *args, **kwargs): sr = StoreResults() for a, b, chosen in all_args_combinations(*args, **kwargs): def s(k): return "" if isinstance(k, int): return "" else: return str(k) pid = '-'.join(['%s%s' % (s(k), v) for k, v in chosen.items()]) job_id = '%s-%s' % (function.__name__, pid) b['job_id'] = job_id job = comp(function, *a, **b) sr[chosen] = job return sr
def create_predstats_jobs(context, distances, streams, id_discdds, maxd): # Compmake storage for results store = StoreResults() # Try to instance it # dds = config.discdds.instance(id_discdds) for delta in range(1, maxd): for i, id_stream in enumerate(streams): key = dict(delta=delta, id_stream=id_stream, id_discdds=id_discdds) job_id = 'pred-%s-log%s-delta%s' % (id_discdds, i, delta) store[key] = context.comp_config(compute_predstats, id_discdds, id_stream, delta, distances, job_id=job_id) subsets = create_subsets(distances) job_report_one(context, subsets, id_discdds, store)
def call_comb(function, *args, **kwargs): sr = StoreResults() for a, b, chosen in all_args_combinations(*args, **kwargs): sr[chosen] = function(*a, **b) return sr
def create_bench_jobs(context, batch, alltestcases): ''' :param context: :param batch: :param alltestcases: list of testcases that are going to be autogenerated. ''' config = get_dp_config() algos = config.algos.expand_names(batch.algorithms) # if not alltestcases: # testcases = config.testcases.expand_names(batch.testcases) # else: # testcases = expand_string(batch.testcases, alltestcases) # dict(id_algo, id_tc, id_discdds, plan_length) => PlanningResults allplanning = StoreResults() # dict(id_algo, id_tc, id_discdds, plan_length) => resultstats allruns = StoreResults() # dict(id_algo, id_tc) => DiffeoPlanningAlgorithm algoinit = StoreResults() config = get_dp_config() # Let's instantiate all test cases and sort them by discdds # so that we do only one initialization per algorithms # id -> {dds -> [tc]} id_discdds2testcases = defaultdict(lambda: {}) # alltc = {} # id -> Promise TestCase id2t = batch.get_id_discdds2testcases(alltestcases) for idd, idd_tcs in id2t.items(): for tcc in idd_tcs: tc = context.comp(instantiate_testcase, tcc) # alltc[tcc] id_discdds2testcases[idd][tcc] = tc # Load discdds before, they might be automatically generated # as well so we want the generation to happen only once. discdds = {} # id -> Promise DiffeoSystem for id_discdds in id_discdds2testcases: discdds[id_discdds] = context.comp(instantiate_discdds, id_discdds) # for each algorithm for id_algo in algos: config.algos[id_algo] # check it is in the configuration # for each dynamics for id_discdds, dds in discdds.items(): job_id = 'init-%s-%s' % (id_algo, id_discdds) # initialize the algorithm for that dynamics algo = context.comp(init_algorithm, id_algo, id_discdds, discdds[id_discdds], job_id=job_id) algoinit[dict(id_algo=id_algo, id_discdds=id_discdds)] = algo # for each test case in that dynamics for id_tc, tc in id_discdds2testcases[id_discdds].items(): # run the planning job_id = 'plan-%s-%s' % (id_algo, id_tc) result = context.comp(run_planning, id_algo, id_tc, tc, algo, job_id=job_id) # compute statistics result_stats = context.comp(run_planning_stats, result, dds, tc, job_id=job_id + '-stats') attrs = dict(id_algo=id_algo, id_tc=id_tc, id_discdds=id_discdds) warnings.warn('this was an important attribute to have...') # true_plan_length=len(tc.true_plan)) allruns[attrs] = result_stats allplanning[attrs] = result jobs_report_algo_init(context, algoinit) jobs_report_tc(context, id_discdds2testcases) jobs_report_dds(context, discdds) allstats = StoreResults() for key, run in allruns.items(): allstats[key] = context.comp(results2stats_dict, run, job_id=comp_stage_job_id(run, 'statsdict')) jobs_tables(context, allstats) jobs_visualization(context, allruns)
def create_bench_jobs(context, batch, alltestcases): ''' :param context: :param batch: :param alltestcases: list of testcases that are going to be autogenerated. ''' config = get_dp_config() algos = config.algos.expand_names(batch.algorithms) # if not alltestcases: # testcases = config.testcases.expand_names(batch.testcases) # else: # testcases = expand_string(batch.testcases, alltestcases) # dict(id_algo, id_tc, id_discdds, plan_length) => PlanningResults allplanning = StoreResults() # dict(id_algo, id_tc, id_discdds, plan_length) => resultstats allruns = StoreResults() # dict(id_algo, id_tc) => DiffeoPlanningAlgorithm algoinit = StoreResults() config = get_dp_config() # Let's instantiate all test cases and sort them by discdds # so that we do only one initialization per algorithms # id -> {dds -> [tc]} id_discdds2testcases = defaultdict(lambda: {}) # alltc = {} # id -> Promise TestCase id2t = batch.get_id_discdds2testcases(alltestcases) for idd, idd_tcs in id2t.items(): for tcc in idd_tcs: tc = context.comp(instantiate_testcase, tcc) # alltc[tcc] id_discdds2testcases[idd][tcc] = tc # Load discdds before, they might be automatically generated # as well so we want the generation to happen only once. discdds = {} # id -> Promise DiffeoSystem for id_discdds in id_discdds2testcases: discdds[id_discdds] = context.comp(instantiate_discdds, id_discdds) # for each algorithm for id_algo in algos: config.algos[id_algo] # check it is in the configuration # for each dynamics for id_discdds, dds in discdds.items(): job_id = 'init-%s-%s' % (id_algo, id_discdds) # initialize the algorithm for that dynamics algo = context.comp(init_algorithm, id_algo, id_discdds, discdds[id_discdds], job_id=job_id) algoinit[dict(id_algo=id_algo, id_discdds=id_discdds)] = algo # for each test case in that dynamics for id_tc, tc in id_discdds2testcases[id_discdds].items(): # run the planning job_id = 'plan-%s-%s' % (id_algo, id_tc) result = context.comp(run_planning, id_algo, id_tc, tc, algo, job_id=job_id) # compute statistics result_stats = context.comp(run_planning_stats, result, dds, tc, job_id=job_id + '-stats') attrs = dict(id_algo=id_algo, id_tc=id_tc, id_discdds=id_discdds) warnings.warn('this was an important attribute to have...') # true_plan_length=len(tc.true_plan)) allruns[attrs] = result_stats allplanning[attrs] = result jobs_report_algo_init(context, algoinit) jobs_report_tc(context, id_discdds2testcases) jobs_report_dds(context, discdds) allstats = StoreResults() for key, run in allruns.items(): allstats[key] = context.comp(results2stats_dict, run, job_id=comp_stage_job_id( run, 'statsdict')) jobs_tables(context, allstats) jobs_visualization(context, allruns)
def create_bench_jobs(config, algos, testcases, outdir): # dict(id_algo, id_tc, id_discdds, plan_length) => PlanningResults allplanning = StoreResults() # dict(id_algo, id_tc, id_discdds, plan_length) => resultstats allruns = StoreResults() # dict(id_algo, id_tc) => DiffeoPlanningAlgorithm algoinit = StoreResults() rm = ReportManager(outdir) comp_store(config, job_id='config') # Let's instantiate all test cases and sort them by discdds # so that we do only one initialization per algorithms id_discdds2testcases = defaultdict(lambda: {}) alltc = {} # id -> Promise TestCase for id_tc in testcases: alltc[id_tc] = comp(instantiate_testcase, comp_store(config), id_tc) # Do it once, now, to get its dds tc1 = config.testcases.instance(id_tc) id_discdds2testcases[tc1.id_discdds][id_tc] = tc1 # Load discdds before, they might be automatically generated # as well so we want the generation to happen only once. discdds = {} # id -> Promise DiffeoSystem for id_discdds in id_discdds2testcases: discdds[id_discdds] = comp(instantiate_discdds, comp_store(config), id_discdds) # for each algorithm for id_algo in algos: config.algos[id_algo] # check it is in the configuration # for each dynamics for id_discdds, dds in discdds.items(): job_id = 'init-%s-%s' % (id_algo, id_discdds) # initialize the algorithm for that dynamics algo = comp(init_algorithm, comp_store(config), id_algo, id_discdds, discdds[id_discdds], job_id=job_id) algoinit[dict(id_algo=id_algo, id_discdds=id_discdds)] = algo # for each test case in that dynamics for id_tc, tc in id_discdds2testcases[id_discdds].items(): # run the planning job_id = 'plan-%s-%s' % (id_algo, id_tc) result = comp(run_planning, id_algo, id_tc, alltc[id_tc], algo, job_id=job_id) # compute statistics result_stats = comp(run_planning_stats, result, dds, alltc[id_tc], job_id=job_id + '-stats') attrs = dict(id_algo=id_algo, id_tc=id_tc, id_discdds=tc.id_discdds, true_plan_length=len(tc.true_plan)) allruns[attrs] = result_stats allplanning[attrs] = result jobs_report_algo_init(config, rm, algoinit) jobs_report_tc(config, rm, testcases, alltc) jobs_report_dds(config, rm, discdds) allstats = StoreResults() for key, run in allruns.items(): allstats[key] = comp(results2stats_dict, run, job_id=comp_stage_job_id(run, 'statsdict')) jobs_tables(allstats, rm) jobs_visualization(config, allruns, rm) rm.create_index_job()
class ReportManager(object): # TODO: make it use a context def __init__(self, outdir, index_filename=None): self.outdir = outdir if index_filename is None: index_filename = os.path.join(self.outdir, 'report_index.html') self.index_filename = index_filename self.allreports = StoreResults() self.allreports_filename = StoreResults() # report_type -> set of keys necessary self._report_types_format = {} self.html_resources_prefix = '' # check if we are called more than once; would be a bug self.index_job_created = False def set_html_resources_prefix(self, prefix): """ Sets the prefix for the resources filename. example: set_resources_prefix('jbds') """ self.html_resources_prefix = prefix + '-' def _check_report_format(self, report_type, **kwargs): keys = sorted(list(kwargs.keys())) # print('report %r %r' % (report_type, keys)) if not report_type in self._report_types_format: self._report_types_format[report_type] = keys else: keys0 = self._report_types_format[report_type] if not keys == keys0: msg = 'Report %r %r' % (report_type, keys) msg += '\ndoes not match previous format %r' % keys0 raise ValueError(msg) def get(self, report_type, **kwargs): key = frozendict2(report=report_type, **kwargs) return self.allreports[key] @contract(report_type='str') def add(self, report, report_type, **kwargs): """ Adds a report to the collection. :param report: Promise of a Report object :param report_type: A string that describes the "type" of the report :param kwargs: str->str,int,float parameters used for grouping """ if not isinstance(report_type, str): msg = 'Need a string for report_type, got %r.' % describe_value(report_type) raise ValueError(msg) from compmake import Promise if not isinstance(report, Promise): msg = ('ReportManager is mean to be given Promise objects, ' 'which are the output of comp(). Obtained: %s' % describe_type(report)) raise ValueError(msg) # check the format is ok self._check_report_format(report_type, **kwargs) key = frozendict2(report=report_type, **kwargs) if key in self.allreports: msg = 'Already added report for %s' % key msg += '\n its values is %s' % self.allreports[key] msg += '\n new value would be %s' % report raise ValueError(msg) self.allreports[key] = report report_type_sane = report_type.replace('_', '') key_no_report = dict(**key) del key_no_report['report'] basename = self.html_resources_prefix + report_type_sane if key_no_report: basename += '-' + basename_from_key(key_no_report) dirname = os.path.join(self.outdir, report_type_sane) filename = os.path.join(dirname, basename) self.allreports_filename[key] = filename + '.html' def create_index_job(self): if self.index_job_created: msg = 'create_index_job() was already called once' raise ValueError(msg) self.index_job_created = True if not self.allreports: # no reports necessary return from compmake import comp # Do not pass as argument, it will take lots of memory! # XXX FIXME: there should be a way to make this update or not # otherwise new reports do not appear optimize_space = False if optimize_space and len(self.allreports_filename) > 100: allreports_filename = comp_store(self.allreports_filename, 'allfilenames') else: allreports_filename = self.allreports_filename type2reports = {} for report_type, xs in self.allreports_filename.groups_by_field_value('report'): type2reports[report_type] = StoreResults(**xs.remove_field('report')) for key in self.allreports: job_report = self.allreports[key] filename = self.allreports_filename[key] write_job_id = job_report.job_id + '-write' # Create the links to reports of the same type report_type = key['report'] other_reports_same_type = type2reports[report_type] key = dict(**key) del key['report'] # find the closest report for different type others = [] for other_type, other_type_reports in type2reports.items(): if other_type == report_type: continue best = get_most_similar(other_type_reports, key) if best is not None: # print('Best match:\n-%s %s\n- %s %s' % (report_type, key, # other_type, best)) others.append((other_type, best, other_type_reports[best])) report_type_sane = report_type.replace('_', '') report_nid = self.html_resources_prefix + report_type_sane if key: report_nid += '-' + basename_from_key(key) comp(write_report_and_update, report=job_report, report_nid=report_nid, report_html=filename, all_reports=allreports_filename, index_filename=self.index_filename, write_pickle=False, this_report=key, other_reports_same_type=other_reports_same_type, most_similar_other_type=others, job_id=write_job_id)