def learn_synthetic(input_directory, output_directory, runs, sample_size, processes, time_out, learn_options: LearnOptions): commands = [] db = get_synthetic_db(input_directory) for name in db.getall(): entry = db.get(name) matching_samples = [] for sample in entry["samples"]: if sample["sample_size"] == sample_size and len(matching_samples) < runs: matching_samples.append(sample) if len(matching_samples) != runs: raise RuntimeError("Insufficient samples available, prepare more samples first") for sample in matching_samples: detail_learn_options = learn_options.copy() detail_learn_options.domain = os.path.join(input_directory, "{}.density".format(name)) detail_learn_options.data = os.path.join(input_directory, sample["samples_file"]) detail_learn_options.labels = os.path.join(input_directory, sample["labels_file"]) export_file = "{}{sep}{}.{}.{}.result" \ .format( output_directory, name, sample_size, sample["seed"], sep=os.path.sep) log_file = "{}{sep}{}.{}.{}.log" \ .format(output_directory, name, sample_size, sample["seed"], sep=os.path.sep) if not os.path.exists(os.path.dirname(export_file)): os.makedirs(os.path.dirname(export_file)) commands.append("incal-track {} --export {} --log {}" .format(detail_learn_options.print_arguments(), export_file, log_file)) run_commands(commands, processes, time_out)
def learn_inc(_data, _labels, _i, _k, _h): # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) strategy = RandomViolationsStrategy(10) learner = KCnfSmtLearner(_k, _h, strategy, "mvn") initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), domain.real_vars[0], domain.real_vars[1], None, False)) return learner.learn(domain, _data, _labels, initial_indices)
def get_experiment(res_path=None): def import_handler(parameters_dict, results_dict, config_dict): for key, entry in parameters_dict.items(): if isinstance(entry, str): index = entry.find("res/") if index >= 0: parameters_dict[key] = res_path + os.path.sep + entry[index+4:] config = Options() config.add_option("export", str) return Experiment(LearnOptions(), LearnResults(), config, import_handler if res_path else None)
def learn_inc(_data, _labels, _i, _k, _h): strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds, background_knowledge=bg_knowledge) if negative_bootstrap > 0: _data, _labels = OneClassStrategy.add_negatives(domain, _data, _labels, thresholds, negative_bootstrap) learner = KCnfSmtLearner(_k, _h, strategy, symmetry_breaking) random.seed(seed) initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) res = learner.learn(domain, _data, _labels, initial_indices) return res
def learn_inc(_data, _labels, _i, _k, _h): strategy = OneClassStrategy( RandomViolationsStrategy(10), thresholds) #, background_knowledge=(a | b) & (~a | ~b)) learner = KCnfSmtLearner(_k, _h, strategy, "mvn") initial_indices = LearnOptions.initial_random(20)(list( range(len(_data)))) # learner.add_observer(LoggingObserver(None, _k, _h, None, True)) learner.add_observer( PlottingObserver(domain, "test_output/bg", "run_{}_{}_{}".format(_i, _k, _h), domain.real_vars[0], domain.real_vars[1], None, False)) return learner.learn(domain, _data, _labels, initial_indices)
def main(): smt_lib_name = "smt-lib-benchmark" synthetic_name = "synthetic" parser = argparse.ArgumentParser( description="Interface with benchmark or synthetic data for experiments" ) parser.add_argument("source") parser.add_argument("--sample_size", type=int, default=None) parser.add_argument("--runs", type=int, default=None) parser.add_argument("--input_dir", type=str, default=None) parser.add_argument("--output_dir", type=str, default=None) parser.add_argument("--processes", type=int, default=None) parser.add_argument("--time_out", type=int, default=None) task_parsers = parser.add_subparsers(dest="task") prepare_parser = task_parsers.add_parser("prepare") prepare_parser.add_argument("--reset_samples", type=bool, default=False) learn_parser = task_parsers.add_parser("learn") analyze_parser = task_parsers.add_parser("analyze") analyze_parser.add_argument("--dirs", nargs="+", type=str) analyze_parser.add_argument("--res_path", type=str, default=None) show_parsers = analyze_parser.add_subparsers() show_parser = show_parsers.add_parser("show") show.add_arguments(show_parser) learn_options = LearnOptions() learn_options.add_arguments(learn_parser) args = parser.parse_args() if args.task == "prepare": if args.source == smt_lib_name: prepare_smt_lib_benchmark() prepare_ratios() prepare_samples(args.runs, args.sample_size, args.reset_samples) elif args.source == synthetic_name: prepare_synthetic(args.input_dir, args.output_dir, args.runs, args.sample_size) elif args.task == "learn": learn_options.parse_arguments(args) if args.source == smt_lib_name: learn_benchmark(args.runs, args.sample_size, args.processes, args.time_out, learn_options) elif args.source == synthetic_name: learn_synthetic(args.input_dir, args.output_dir, args.runs, args.sample_size, args.processes, args.time_out, learn_options) elif args.source.startswith("ex"): example_name = args.source.split(":", 1)[1] domain, formula = examples.get_by_name(example_name) np.random.seed(1) from pywmi.sample import uniform samples = uniform(domain, args.sample_size) from pywmi import evaluate labels = evaluate(domain, formula, samples) learn_options.set_value("domain", domain, False) learn_options.set_value("data", samples, False) learn_options.set_value("labels", labels, False) (formula, k, h), duration = learn_options.call(True) print("[{:.2f}s] Learned formula (k={}, h={}): {}".format( duration, k, h, pretty_print(formula))) elif args.task == "analyze": analyze(args.dirs, args.res_path, show.parse_args(args))
def learn_benchmark(runs, sample_size, processes, time_out, learn_options: LearnOptions): # def filter1(entry): # return "real_variables_count" in entry and entry["real_variables_count"] + entry["bool_variables_count"] <= 10 # # count = 0 # boolean = 0 # for name, entry, density_filename in select_benchmark_files(filter1): # if entry["bool_variables_count"] > 0: # boolean += 1 # count += 1 # # print("{} / {}".format(boolean, count)) # # count = 0 # boolean = 0 # for name, entry, density_filename in select_benchmark_files(benchmark_filter): # if entry["bool_variables_count"] > 0: # boolean += 1 # count += 1 # # print("{} / {}".format(boolean, count)) def learn_filter(_e): return benchmark_filter(_e) and "samples" in _e count = 0 problems_to_learn = [] for name, entry, density_filename in select_benchmark_files(learn_filter): if len(entry["bounds"]) > 0: best_ratio = min(rel_ratio(t[1]) for t in entry["bounds"]) if best_ratio <= 0.3: qualifying = [t for t in entry["bounds"] if rel_ratio(t[1]) <= 0.3 and abs(rel_ratio(t[1]) - best_ratio) <= best_ratio / 5] selected = sorted(qualifying, key=lambda x: get_bound_volume(x[0]))[0] print(name, "\n", rel_ratio(selected[1]), best_ratio, selected[0], entry["bool_variables_count"]) count += 1 selected_samples = [s for s in entry["samples"] if s["bounds"] == selected[0] and s["sample_size"] >= sample_size] if len(selected_samples) < runs: raise RuntimeError("Insufficient number of data set available ({} of {})" .format(len(selected_samples), runs)) elif len(selected_samples) > runs: selected_samples = selected_samples[:runs] for selected_sample in selected_samples: problems_to_learn.append((name, density_filename, selected_sample)) commands = [] for name, density_filename, selected_sample in problems_to_learn: detail_learn_options = learn_options.copy() detail_learn_options.domain = density_filename detail_learn_options.data = selected_sample["samples_filename"] detail_learn_options.labels = selected_sample["labels_filename"] export_file = "{}{sep}{}.{}.{}.result".format(get_benchmark_results_dir(), name, selected_sample["sample_size"], selected_sample["seed"], sep=os.path.sep) log_file = "{}{sep}{}.{}.{}.log".format(get_benchmark_results_dir(), name, selected_sample["sample_size"], selected_sample["seed"], sep=os.path.sep) if not os.path.exists(os.path.dirname(export_file)): os.makedirs(os.path.dirname(export_file)) commands.append("incal-track {} --export {} --log {}" .format(detail_learn_options.print_arguments(), export_file, log_file)) run_commands(commands, processes, time_out)