def compare_ground_truth(): data = read_data_sc2('../data/sequences-TZ-45.txt')[:5000] data = reduce_k_length(10, data) target = '1' enable_i = True # if we want to average nb_launched = 5 pool = Pool(processes=3, maxtasksperchild=1) iterations_limit = 50 iteration_step = 1000 data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []} # found with exaustive search ground_truth = 0.008893952000000009 for i in range(10): print('Iteration: {}'.format(i)) for i in range(nb_launched): result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) data_add_generic( data_final, WRAcc=max(0, average_results(result_ucb_opti.get())) / ground_truth, iterations=iterations_limit, Algorithm='SeqScout') iterations_limit += iteration_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm') ax.set(xlabel='iterations', ylabel='WRAcc') plt.savefig('./ground_truth/gt.png') df.to_pickle('./ground_truth/result') if SHOW: plt.show()
def quality_over_size(): number_dataset = 6 data_origin, target, enable_i = datasets[number_dataset] pool = Pool(processes=3, maxtasksperchild=1) # if we want to average nb_launched = 5 size = 15 size_step = 4 data_final = {'WRAcc': [], 'size': [], 'Algorithm': []} for i in range(10): print('Iteration: {}'.format(i)) data = reduce_k_length(size, data_origin) for i in range(nb_launched): results_misere = pool.apply_async(misere, (data, target), {'time_budget': TIME_BUDGET_XP}) results_beam = pool.apply_async(beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) result_ucb_opti = pool.apply_async(seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) results_mcts = pool.apply_async(launch_mcts, (data, target), {'time_budget': TIME_BUDGET_XP}) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() results_mcts = results_mcts.get() if len(results_beam) < TOP_K: print("Too few beam: {}".format(len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few SeqScout: {}".format(len(result_ucb_opti))) if len(results_misere) < TOP_K: print("Too few misere: {}".format(len(results_misere))) if len(results_mcts) < TOP_K: print("Too few MCTSExtent: {}".format(len(results_mcts))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), size=size, Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam)), size=size, Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti)), size=size, Algorithm='SeqScout') data_add_generic(data_final, WRAcc=max(0, average_results(results_mcts)), size=size, Algorithm='MCTSExtent') size += size_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='size', y='WRAcc', hue='Algorithm') ax.set(xlabel='Length max', ylabel='WRAcc') # ax.set(xlabel='Time(s)', ylabel='Average WRAcc top-10 patterns') plt.savefig('./space_size/over_size.png') df.to_pickle('./space_size/result') if SHOW: plt.show()
def quality_over_theta(): number_dataset = 1 data, target, enable_i = datasets[number_dataset] pool = Pool(processes=5, maxtasksperchild=1) # if we want to average nb_launched = 5 theta = 0.1 data_final = {'WRAcc': [], 'theta': [], 'Algorithm': []} for i in range(10): print('Iteration: {}'.format(i)) for i in range(nb_launched): results_misere = pool.apply_async(misere, (data, target), { 'time_budget': TIME_BUDGET_XP, 'theta': theta }) results_beam = pool.apply_async( beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'theta': theta }) result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'theta': theta }) results_mcts = pool.apply_async(launch_mcts, (data, target), { 'time_budget': TIME_BUDGET_XP, 'theta': theta }) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() results_mcts = results_mcts.get() if len(results_beam) < TOP_K: print("Too few beam: {}".format(len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few ucb: {}".format(len(result_ucb_opti))) if len(results_misere) < TOP_K: print("Too few misere: {}".format(len(results_misere))) if len(results_mcts) < TOP_K: print("Too few mctsextent : {}".format(len(results_mcts))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), theta=theta, Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam)), theta=theta, Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti)), theta=theta, Algorithm='SeqScout') data_add_generic(data_final, WRAcc=max(0, average_results(results_mcts)), theta=theta, Algorithm='MCTSExtent') theta += 0.1 df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='theta', y='WRAcc', hue='Algorithm') plt.savefig('./theta/over_theta.png') df.to_pickle('./theta/result') if SHOW: plt.show()
def barplot_dataset_iterations(): pool = Pool(processes=5, maxtasksperchild=1) xp_repeat = 5 data_final = {'WRAcc': [], 'dataset': [], 'Algorithm': []} for i, (data, target, enable_i) in enumerate(datasets): print("Dataset {}".format(datasets_names[i])) for j in range(xp_repeat): results_misere = pool.apply_async(misere, (data, target), {'time_budget': TIME_BUDGET_XP}) results_beam = pool.apply_async(beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) result_ucb_opti = pool.apply_async(seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) results_mcts = pool.apply_async(launch_mcts, (data, target), {'time_budget': TIME_BUDGET_XP}) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() results_mcts = results_mcts.get() if len(results_misere) < TOP_K: print("Too few example on misere on dataset {}: {} results". format(datasets_names[i], len(results_misere))) if len(results_beam) < TOP_K: print( "Too few example on beam_search on dataset {}: {} results". format(datasets_names[i], len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few example on seqscout on dataset {}: {} results". format(datasets_names[i], len(result_ucb_opti))) if len(results_mcts) < TOP_K: print( "Too few example on mctsextend on dataset {}: {} results". format(datasets_names[i], len(results_mcts))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), dataset=datasets_names[i], Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam)), dataset=datasets_names[i], Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti)), dataset=datasets_names[i], Algorithm='SeqScout') data_add_generic(data_final, WRAcc=max(0, average_results(results_mcts)), dataset=datasets_names[i], Algorithm='MCTSExtent') df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.barplot(x='dataset', y='WRAcc', hue='Algorithm', data=df) plt.savefig('./wracc_datasets/iterations_boxplot.png') df.to_pickle('./wracc_datasets/result') if SHOW: plt.show()
def other_measures(): pool = Pool(processes=5, maxtasksperchild=1) xp_repeat = 5 nb_iterations = 10000 for i, (data, target, enable_i) in enumerate(datasets): print("Dataset {}".format(datasets_names[i])) for measure in ['Informedness', 'F1']: mean_misere = 0 mean_beam = 0 mean_seqscout = 0 mean_mcts = 0 for j in range(xp_repeat): results_misere = pool.apply_async( misere, (data, target), { 'time_budget': TIME_BUDGET_XP, 'quality_measure': measure, 'iterations_limit': nb_iterations }) results_beam = pool.apply_async( beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'quality_measure': measure, 'iterations_limit': nb_iterations }) result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'quality_measure': measure, 'iterations_limit': nb_iterations }) results_mcts = pool.apply_async( launch_mcts, (data, target), { 'time_budget': TIME_BUDGET_XP, 'iterations_limit': nb_iterations, 'quality_measure': measure }) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() results_mcts = results_mcts.get() if len(results_misere) < TOP_K: print( "Too few example on misere on dataset {}: {} results". format(datasets_names[i], len(results_misere))) if len(results_beam) < TOP_K: print( "Too few example on beam_search on dataset {}: {} results" .format(datasets_names[i], len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few example on ucb on dataset {}: {} results". format(datasets_names[i], len(result_ucb_opti))) if len(results_mcts) < TOP_K: print( "Too few example on mctsextend on dataset {}: {} results" .format(datasets_names[i], len(results_mcts))) mean_misere += average_results(results_misere) mean_beam += average_results(results_beam) mean_seqscout += average_results(result_ucb_opti) mean_mcts += average_results(results_mcts) mean_misere = mean_misere / xp_repeat mean_beam = mean_beam / xp_repeat mean_seqscout = mean_seqscout / xp_repeat mean_mcts = mean_mcts / xp_repeat print( 'For datasets {}, measure {}, algorithm misere the means score is: {}' .format(datasets_names[i], measure, mean_misere)) print( 'For datasets {}, measure {}, algorithm beam_search the means score is: {}' .format(datasets_names[i], measure, mean_beam)) print( 'For datasets {}, measure {}, algorithm ucb the means score is: {}' .format(datasets_names[i], measure, mean_seqscout)) print( 'For datasets {}, measure {}, algorithm mctsextend the means score is: {}' .format(datasets_names[i], measure, mean_mcts))
def show_quality_over_iterations_ucb(number_dataset): data, target, enable_i = datasets[number_dataset] # if we want to average nb_launched = 5 pool = Pool(processes=3, maxtasksperchild=1) iterations_limit = 50 iterations_step = 1000 data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []} for i in range(12): print('Iteration: {}'.format(i)) for i in range(nb_launched): results_misere = pool.apply_async( misere, (data, target), { 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) results_beam = pool.apply_async( beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) results_mcts = pool.apply_async( launch_mcts, (data, target), { 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere.get())), iterations=iterations_limit, Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam.get())), iterations=iterations_limit, Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti.get())), iterations=iterations_limit, Algorithm='SeqScout') data_add_generic(data_final, WRAcc=max(0, average_results(results_mcts.get())), iterations=iterations_limit, Algorithm='MCTSExtent') iterations_limit += iterations_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm') plt.savefig('./iterations_ucb/over_iterations{}.png'.format( datasets_names[number_dataset])) df.to_pickle('./iterations_ucb/result_{}'.format( datasets_names[number_dataset])) if SHOW: plt.show()
def barplot_dataset_improvement_iterations(): pool = Pool(processes=5, maxtasksperchild=1) xp_repeat = 5 seqscout_vs_beam = {'Improvement': [], 'dataset': []} seqscout_vs_misere = {'Improvement': [], 'dataset': []} mcts_vs_misere = {'Improvement': [], 'dataset': []} mcts_vs_beam = {'Improvement': [], 'dataset': []} mcts_vs_seqscout = {'Improvement': [], 'dataset': []} for i, (data, target, enable_i) in enumerate(datasets): print("Dataset {}".format(datasets_names[i])) for j in range(xp_repeat): results_misere = pool.apply_async(misere, (data, target), {'time_budget': TIME_BUDGET_XP}) results_beam = pool.apply_async(beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) result_ucb_opti = pool.apply_async(seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) results_mcts = pool.apply_async(launch_mcts, (data, target), {'time_budget': TIME_BUDGET_XP}) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() results_mcts = results_mcts.get() if len(results_misere) < TOP_K: print("Too few example on misere on dataset {}: {} results". format(datasets_names[i], len(results_misere))) if len(results_beam) < TOP_K: print( "Too few example on beam_search on dataset {}: {} results". format(datasets_names[i], len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few example on seqscout on dataset {}: {} results". format(datasets_names[i], len(result_ucb_opti))) if len(results_mcts) < TOP_K: print( "Too few example on mctsextend on dataset {}: {} results". format(datasets_names[i], len(results_mcts))) data_add_generic(seqscout_vs_misere, Improvement=compute_improvement( average_results(result_ucb_opti), average_results(results_misere)), dataset=datasets_names[i]) data_add_generic(seqscout_vs_beam, Improvement=compute_improvement( average_results(result_ucb_opti), average_results(results_beam)), dataset=datasets_names[i]) data_add_generic(mcts_vs_misere, Improvement=compute_improvement( average_results(results_mcts), average_results(results_misere)), dataset=datasets_names[i]) data_add_generic(mcts_vs_beam, Improvement=compute_improvement( average_results(results_mcts), average_results(results_beam)), dataset=datasets_names[i]) data_add_generic(mcts_vs_seqscout, Improvement=compute_improvement( average_results(results_mcts), average_results(result_ucb_opti)), dataset=datasets_names[i]) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() df = pd.DataFrame(data=seqscout_vs_misere) ax = sns.barplot(x='dataset', y='Improvement', data=df) ax.set(yscale='symlog') annotate_bars(ax) plt.savefig('./improvement/seqscout_vs_misere.png') df.to_pickle('./improvement/seqscout_vs_misere') plt.clf() df = pd.DataFrame(data=seqscout_vs_beam) bx = sns.barplot(x='dataset', y='Improvement', data=df) bx.set(yscale='symlog') annotate_bars(bx) plt.savefig('./improvement/seqscout_vs_beam.png') df.to_pickle('./improvement/seqscout_vs_beam') plt.clf() df = pd.DataFrame(data=mcts_vs_misere) cx = sns.barplot(x='dataset', y='Improvement', data=df) cx.set(yscale='symlog') annotate_bars(cx) plt.savefig('./improvement/mcts_vs_misere.png') df.to_pickle('./improvement/mcts_vs_misere') plt.clf() df = pd.DataFrame(data=mcts_vs_beam) dx = sns.barplot(x='dataset', y='Improvement', data=df) dx.set(yscale='symlog') annotate_bars(dx) plt.savefig('./improvement/mcts_vs_beam.png') df.to_pickle('./improvement/mcts_vs_beam') plt.clf() df = pd.DataFrame(data=mcts_vs_seqscout) ex = sns.barplot(x='dataset', y='Improvement', data=df) ex.set(yscale='symlog') annotate_bars(ex) plt.savefig('./improvement/mcts_vs_seqscout.png') df.to_pickle('./improvement/mcts_bs_seqscout') plt.clf() if SHOW: plt.show()
def barplot_increase_local_optima(it_number): pool = Pool(processes=5, maxtasksperchild=1) xp_repeat = 5 data_final = { 'WRAcc': [], 'WRAcc_opti': [], 'dataset': [], 'Algorithm': [] } for i, (data, target, enable_i) in enumerate(datasets): print("Dataset {}".format(datasets_names[i])) for j in range(xp_repeat): results_misere = pool.apply_async( optimizer, (data, target, 'misere', TIME_BUDGET_XP, it_number, enable_i)) results_beam = pool.apply_async( optimizer, (data, target, 'BeamSearch', TIME_BUDGET_XP, it_number, enable_i)) results_ucb_opti = pool.apply_async( optimizer, (data, target, 'SeqScout', TIME_BUDGET_XP, it_number, enable_i)) results_mcts = pool.apply_async( optimizer, (data, target, 'MCTSExtent', TIME_BUDGET_XP, it_number, enable_i)) results_misere, results_misere_opti = results_misere.get() results_beam, results_beam_opti = results_beam.get() result_ucb_opti, results_ucb_opti = results_ucb_opti.get() results_mcts, results_mcts_opti = results_mcts.get() if len(results_misere) < TOP_K: print("Too few example on misere on dataset {}: {} results". format(datasets_names[i], len(results_misere))) if len(results_beam) < TOP_K: print( "Too few example on beam_search on dataset {}: {} results". format(datasets_names[i], len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few example on seqscout on dataset {}: {} results". format(datasets_names[i], len(result_ucb_opti))) if len(results_mcts) < TOP_K: print( "Too few example on mctsextend on dataset {}: {} results". format(datasets_names[i], len(results_mcts))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), dataset=datasets_names[i], Algorithm='misere', WRAcc_opti=max( 0, average_results(results_misere_opti))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), dataset=datasets_names[i], Algorithm='beam', WRAcc_opti=max( 0, average_results(results_beam_opti))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), dataset=datasets_names[i], Algorithm='SeqScout', WRAcc_opti=max(0, average_results(results_ucb_opti))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), dataset=datasets_names[i], Algorithm='MCTSExtent', WRAcc_opti=max( 0, average_results(results_mcts_opti))) df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) df.to_pickle('./local_opti_increase/result_{}'.format(it_number)) plt.clf() sns.set_color_codes("pastel") ax = sns.barplot(x='dataset', y='WRAcc_opti', hue='Algorithm', data=df) sns.set_color_codes("muted") ax = sns.barplot(x='dataset', y='WRAcc', hue='Algorithm', data=df) plt.savefig('./local_opti_increase/barplot_{}.png'.format(it_number)) if SHOW: plt.show()