def compare_ground_truth(): data = read_data_sc2('../data/sequences-TZ-45.txt')[:5000] data = reduce_k_length(10, data) target = '1' enable_i = True # if we want to average nb_launched = 5 pool = Pool(processes=3) iterations_limit = 50 iteration_step = 1000 data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []} # found with exaustive search ground_truth = 0.008893952000000009 for i in range(10): print('Iteration: {}'.format(i)) for i in range(nb_launched): result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) data_add_generic( data_final, WRAcc=max(0, average_results(result_ucb_opti.get())) / ground_truth, iterations=iterations_limit, Algorithm='seqscout') iterations_limit += iteration_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm') ax.set(xlabel='iterations', ylabel='WRAcc') plt.savefig('./ground_truth/gt.png') df.to_pickle('./ground_truth/result') if SHOW: plt.show()
def boxplot_dataset_iterations(): pool = Pool(processes=5) xp_repeat = 5 data_final = {'WRAcc': [], 'dataset': [], 'Algorithm': []} for i, (data, target, enable_i) in enumerate(datasets): print("Dataset {}".format(datasets_names[i])) for j in range(xp_repeat): results_misere = pool.apply_async(misere, (data, target), {'time_budget': TIME_BUDGET_XP}) results_beam = pool.apply_async(beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) result_ucb_opti = pool.apply_async(seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() if len(results_misere) < TOP_K: print("Too few example on misere on dataset {}: {} results". format(datasets_names[i], len(results_misere))) if len(results_beam) < TOP_K: print( "Too few example on beam_search on dataset {}: {} results". format(datasets_names[i], len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few example on seqscout on dataset {}: {} results". format(datasets_names[i], len(result_ucb_opti))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), dataset=datasets_names[i], Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam)), dataset=datasets_names[i], Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti)), dataset=datasets_names[i], Algorithm='seqscout') df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.barplot(x='dataset', y='WRAcc', hue='Algorithm', data=df) plt.savefig('./wracc_datasets/iterations_boxplot.png') df.to_pickle('./wracc_datasets/result') if SHOW: plt.show()
def quality_over_size(): number_dataset = 6 data_origin, target, enable_i = datasets[number_dataset] pool = Pool(processes=3) # if we want to average nb_launched = 5 size = 15 size_step = 4 data_final = {'WRAcc': [], 'size': [], 'Algorithm': []} for i in range(10): print('Iteration: {}'.format(i)) data = reduce_k_length(size, data_origin) for i in range(nb_launched): results_misere = pool.apply_async(misere, (data, target), {'time_budget': TIME_BUDGET_XP}) results_beam = pool.apply_async(beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) result_ucb_opti = pool.apply_async(seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP }) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() if len(results_beam) < TOP_K: print("Too few beam: {}".format(len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few seqscout: {}".format(len(result_ucb_opti))) if len(results_misere) < TOP_K: print("Too few misere: {}".format(len(results_misere))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), size=size, Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam)), size=size, Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti)), size=size, Algorithm='seqscout') size += size_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='size', y='WRAcc', hue='Algorithm') ax.set(xlabel='Length max', ylabel='WRAcc') # ax.set(xlabel='Time(s)', ylabel='Average WRAcc top-10 patterns') plt.savefig('./space_size/over_size.png') df.to_pickle('./space_size/result') if SHOW: plt.show()
def quality_over_theta(): number_dataset = 1 data, target, enable_i = datasets[number_dataset] pool = Pool(processes=5) # if we want to average nb_launched = 5 theta = 0.1 data_final = {'WRAcc': [], 'theta': [], 'Algorithm': []} for i in range(10): print('Iteration: {}'.format(i)) for i in range(nb_launched): results_misere = pool.apply_async(misere, (data, target), { 'time_budget': TIME_BUDGET_XP, 'theta': theta }) results_beam = pool.apply_async( beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'theta': theta }) result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'theta': theta }) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() if len(results_beam) < TOP_K: print("Too few beam: {}".format(len(results_beam))) if len(result_ucb_opti) < TOP_K: print("Too few seqscout: {}".format(len(result_ucb_opti))) if len(results_misere) < TOP_K: print("Too few misere: {}".format(len(results_misere))) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere)), theta=theta, Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam)), theta=theta, Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti)), theta=theta, Algorithm='seqscout') theta += 0.1 df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='theta', y='WRAcc', hue='Algorithm') plt.savefig('./theta/over_theta.png') df.to_pickle('./theta/result') if SHOW: plt.show()
def other_measures(): pool = Pool(processes=5) xp_repeat = 5 nb_iterations = 1000 for i, (data, target, enable_i) in enumerate(datasets): print("Dataset {}".format(datasets_names[i])) for measure in ['Informedness', 'F1']: mean_misere = 0 mean_beam = 0 mean_seqscout = 0 for j in range(xp_repeat): results_misere = pool.apply_async( misere, (data, target), { 'time_budget': TIME_BUDGET_XP, 'quality_measure': measure, 'iterations_limit': nb_iterations }) results_beam = pool.apply_async( beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'quality_measure': measure, 'iterations_limit': nb_iterations }) result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'quality_measure': measure, 'iterations_limit': nb_iterations }) results_misere = results_misere.get() results_beam = results_beam.get() result_ucb_opti = result_ucb_opti.get() if len(results_misere) < TOP_K: print( "Too few example on misere on dataset {}: {} results". format(datasets_names[i], len(results_misere))) if len(results_beam) < TOP_K: print( "Too few example on beam_search on dataset {}: {} results" .format(datasets_names[i], len(results_beam))) if len(result_ucb_opti) < TOP_K: print( "Too few example on seqscout on dataset {}: {} results" .format(datasets_names[i], len(result_ucb_opti))) mean_misere += average_results(results_misere) mean_beam += average_results(results_beam) mean_seqscout += average_results(result_ucb_opti) mean_misere = mean_misere / xp_repeat mean_beam = mean_beam / xp_repeat mean_seqscout = mean_seqscout / xp_repeat print( 'For datasets {}, measure {}, algorithm misere the means score is: {}' .format(datasets_names[i], measure, mean_misere)) print( 'For datasets {}, measure {}, algorithm beam_search the means score is: {}' .format(datasets_names[i], measure, mean_beam)) print( 'For datasets {}, measure {}, algorithm seqscout the means score is: {}' .format(datasets_names[i], measure, mean_seqscout))
def show_quality_over_iterations_ucb(number_dataset): data, target, enable_i = datasets[number_dataset] # if we want to average nb_launched = 5 pool = Pool(processes=3) iterations_limit = 50 iterations_step = 1000 data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []} for i in range(12): print('Iteration: {}'.format(i)) for i in range(nb_launched): results_misere = pool.apply_async( misere, (data, target), { 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) results_beam = pool.apply_async( beam_search, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) result_ucb_opti = pool.apply_async( seq_scout, (data, target), { 'enable_i': enable_i, 'time_budget': TIME_BUDGET_XP, 'iterations_limit': iterations_limit }) data_add_generic(data_final, WRAcc=max(0, average_results(results_misere.get())), iterations=iterations_limit, Algorithm='misere') data_add_generic(data_final, WRAcc=max(0, average_results(results_beam.get())), iterations=iterations_limit, Algorithm='beam') data_add_generic(data_final, WRAcc=max(0, average_results(result_ucb_opti.get())), iterations=iterations_limit, Algorithm='seqscout') iterations_limit += iterations_step df = pd.DataFrame(data=data_final) sns.set(rc={'figure.figsize': (8, 6.5)}) plt.clf() ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm', markers=True) plt.savefig('./iterations_ucb/over_iterations{}.png'.format( datasets_names[number_dataset])) df.to_pickle('./iterations_ucb/result{}'.format( datasets_names[number_dataset])) if SHOW: plt.show()