def main(argv=None): start_time = time.time() # add and parse arguments args = _parse_args(argv) dataset = os.path.expanduser(args.dataset) # Read k, tw_width, tw_step and cls_rank_iterator of the insights experiment to use the same settings exp_ins_settings = pdp.get_exp_ins_settings(args.pdpfile) k = exp_ins_settings.k tw_width = exp_ins_settings.tw_width tw_step = exp_ins_settings.tw_step cls_rank_iterator = exp_ins_settings.cls_rank_iterator cls_rank_iterator_kwargs = exp_ins_settings.cls_rank_iterator_kwargs reuse = REUSE_CLASSES[args.reuse]["cls"] stop_w_soln = args.wsoln # Generate file names out_file = args.outfile if args.outfile else classify.gen_classifier_output_f_path(dataset, args.pdpfile, tw_width, tw_step, k, args.confthold, args.z, args.testsize, cls_rank_iterator, reuse, stop_w_soln) log_file = args.logfile if args.logfile else common.gen_log_file(out_file) # Set logger logger = common.initialize_logger(console_level=args.logc, output_dir=common.APP.FOLDER.LOG, log_file=log_file, file_level=args.logf) logger.info("Classification experiment script launched with args: {}".format(str(vars(args)))) # Create an interruption experiment engine engine = _create_exp_classifier_engine(dataset, args.pdpfile, tw_width, tw_step, k, args.confthold, args.z, args.testsize, reuse, stop_w_soln, cls_rank_iterator=cls_rank_iterator, cls_rank_iterator_kwargs=cls_rank_iterator_kwargs) # engine.run -> collected data processed_data = engine.run() # create a result obj output = classify.ExpClassifierOutput( settings=classify.ExpClassifierSettings(dataset, args.pdpfile, tw_width, tw_step, k, args.confthold, args.z, args.testsize, reuse, stop_w_soln, cls_rank_iterator=cls_rank_iterator, cls_rank_iterator_kwargs=cls_rank_iterator_kwargs), data=processed_data) logger.info("Classification experiment finished in {}.".format(datetime.timedelta(seconds=(time.time() - start_time)))) # save the output output.save(out_file=out_file)
def main(argv=None): start_time = time.time() # add and parse arguments args = _parse_args(argv) dataset = os.path.expanduser(args.dataset) # Generate file names out_file = args.outfile if args.outfile else exploit.gen_exploit_output_f_path( dataset, args.width, args.step, args.k, args.testsize) log_file = args.logfile if args.logfile else common.gen_log_file(out_file) # Set logger logger = common.initialize_logger(console_level=args.logc, output_dir=common.APP.FOLDER.LOG, log_file=log_file, file_level=args.logf) logger.info( "Exploit Candidates Iteration experiment script launched with args: {}" .format(str(vars(args)))) # create the experiment engine engine = _create_exp_exploit_engine(dataset, args.width, args.step, args.k, args.testsize) # engine.run -> experiment data exploit_data = engine.run() # create a result obj output = exploit.ExpExploitOutput(settings=exploit.ExpExploitSettings( dataset, args.width, args.step, args.k, args.testsize, engine.cb.size()), data=exploit_data) logger.info( "Exploit Candidates Iteration experiment finished in {}.".format( datetime.timedelta(seconds=(time.time() - start_time)))) # save the output output.save(out_file=out_file)
def main(argv=None): start_time = time.time() # add and parse arguments args = _parse_args(argv) pdp_file = args.outfile log_file = args.logfile # Generate file names if not pdp_file: pdp_file = pdp.gen_pdp_f_path(args.expfile, args.calcstep, args.qstep) if not log_file: log_file = common.gen_log_file(pdp_file) # Set logger logger = common.initialize_logger(console_level=args.logc, output_dir=common.APP.FOLDER.LOG, log_file=log_file, file_level=args.logf) logger.info("PDP generation script launched with args: {}".format( str(vars(args)))) # build the PDP pdp_data, calc_step_actual = pdp.build_pdp(os.path.expanduser( args.expfile), calc_step=args.calcstep, q_step=args.qstep) # create an output object pdp_output = pdp.PDPOutput(settings=pdp.PDPSettings( experiment=args.expfile, calc_step_arg=args.calcstep, calc_step=calc_step_actual, q_step=args.qstep), data=pdp_data) logger.info("PDP generation finished in {}.".format( datetime.timedelta(seconds=(time.time() - start_time)))) # save the pdp pdp_output.save(out_file=pdp_file)
def main(argv=None): start_time = time.time() # add and parse arguments args = _parse_args(argv) dataset = os.path.expanduser(args.dataset) cls_rank_iterator = run_common.RANK_ITER_OPTIONS[args.iter]["cls"] cls_rank_iterator_kwargs = args.kwargsiter # Generate file names out_file = args.outfile if args.outfile else insights.gen_insights_output_f_path( dataset, args.width, args.step, args.k, args.testsize, cls_rank_iterator) log_file = args.logfile if args.logfile else common.gen_log_file(out_file) # Set logger logger = common.initialize_logger(console_level=args.logc, output_dir=common.APP.FOLDER.LOG, log_file=log_file, file_level=args.logf) logger.info("Insights experiment script launched with args: {}".format( str(vars(args)))) # create the experiment engine engine = _create_exp_insights_engine( dataset, args.width, args.step, args.k, args.testsize, n_exp=args.runs, cls_rank_iterator=cls_rank_iterator, cls_rank_iterator_kwargs=cls_rank_iterator_kwargs) # engine.run -> processed data processed_insights = engine.run() # create a result obj output = insights.ExpInsightsOutput(settings=insights.ExpInsightsSettings( dataset, args.width, args.step, args.k, args.testsize, engine.cb.size(), cls_rank_iterator, cls_rank_iterator_kwargs), data=processed_insights) logger.info("Insights experiment finished in {}.".format( datetime.timedelta(seconds=(time.time() - start_time)))) # save the output output.save(out_file=out_file)
def main(argv=None): start_time = time.time() # add and parse arguments args = _parse_args(argv) LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" bins = args.bins bin_width = 1. / bins decimals_bin_width = len(str(bin_width).split('.')[1]) if decimals_bin_width > 2: decimals_bin_width = 2 cols_bin = [ "{0:.{1}f}".format(i, decimals_bin_width) for i in list(np.arange(0, 1. + bin_width, bin_width)[1:]) ] # Create output dataframe df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] + cols_bin) # Get datasets if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) datasets = [os.path.join(fpath, f) for f in files_] else: datasets = [os.path.expanduser(e) for e in args.datasets] test_size = args.testsize if 0. < args.testsize < 1. else int( args.testsize) update = args.update dec_digits = args.dec float_formatter = lambda x: "{0:.{1}f}".format( x, dec_digits) if isinstance(x, (int, float)) else x # Set logger save_fn = "similarity_distr_(x{n})_w_{w}_s_{s}_t_{t}{u}".format( n=len(datasets), w=str(args.width), s=str(args.step), t=str(test_size), u="_u_{}".format(update) if update is not None else "") save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) log_file = common.gen_log_file(save_fpath) logger = common.initialize_logger(console_level="INFO", output_dir=common.APP.FOLDER.LOG, log_file=log_file, file_level="INFO") logger.info( "Case base similarity distribution script launched with args: {}". format(str(vars(args)))) # Start computing distributions for CBs for dataset in datasets: dataset_name = os.path.expanduser(dataset) dataset_name = os.path.splitext(os.path.basename(dataset_name))[0] logger.info("Dataset: {}".format(dataset_name)) # get the similarity for the dataset similarity = ts.euclidean_similarity_ts_dataset(dataset) for tw_width in args.width: logger.info(".. TW width: {}".format(tw_width)) for tw_step in args.step: logger.info(".. TW step: {}".format(tw_step)) # read the dataset -> cb cb = ts.gen_cb(dataset=dataset, tw_width=tw_width, tw_step=tw_step) distr_hist = sim_hist(cb, similarity, bins, test_size, update) # logger.info(".... Distribution:\n{}".format( # pd.DataFrame([distr_hist * 100], columns=cols_bin).to_string(index=False, # float_format=float_formatter))) dict_distr = { LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str(tw_width), LBL_FSTEP: tw_step } dict_distr.update(dict(zip(cols_bin, distr_hist * 100.))) # Add the distribution to the output dataframe df_output = df_output.append(dict_distr, ignore_index=True) # Export the df to LaTeX if len(df_output) > 0: # Create a multiindex for a sorted (and prettier) output df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_output = df_output.sort_index() df_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=True, multirow=True, index=True) logger.info( "Similarity distribution saved as LaTeX table into '{}'.".format( save_fpath)) else: logger.info("No similarity distribution data could be calculated.") logger.info("Script finished in {}.".format( datetime.timedelta(seconds=(time.time() - start_time))))