Example #1
0
def compare_ground_truth():
    data = read_data_sc2('../data/sequences-TZ-45.txt')[:5000]
    data = reduce_k_length(10, data)

    target = '1'
    enable_i = True

    # if we want to average
    nb_launched = 5
    pool = Pool(processes=3, maxtasksperchild=1)

    iterations_limit = 50
    iteration_step = 1000

    data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []}

    # found with exaustive search
    ground_truth = 0.008893952000000009

    for i in range(10):
        print('Iteration: {}'.format(i))

        for i in range(nb_launched):
            result_ucb_opti = pool.apply_async(
                seq_scout, (data, target), {
                    'enable_i': enable_i,
                    'time_budget': TIME_BUDGET_XP,
                    'iterations_limit': iterations_limit
                })

            data_add_generic(
                data_final,
                WRAcc=max(0, average_results(result_ucb_opti.get())) /
                ground_truth,
                iterations=iterations_limit,
                Algorithm='SeqScout')

        iterations_limit += iteration_step

    df = pd.DataFrame(data=data_final)

    sns.set(rc={'figure.figsize': (8, 6.5)})
    plt.clf()
    ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm')
    ax.set(xlabel='iterations', ylabel='WRAcc')
    plt.savefig('./ground_truth/gt.png')
    df.to_pickle('./ground_truth/result')

    if SHOW:
        plt.show()
Example #2
0
def quality_over_size():
    number_dataset = 6
    data_origin, target, enable_i = datasets[number_dataset]

    pool = Pool(processes=3, maxtasksperchild=1)

    # if we want to average
    nb_launched = 5

    size = 15
    size_step = 4
    data_final = {'WRAcc': [], 'size': [], 'Algorithm': []}

    for i in range(10):
        print('Iteration: {}'.format(i))
        data = reduce_k_length(size, data_origin)
        for i in range(nb_launched):
            results_misere = pool.apply_async(misere, (data, target),
                                              {'time_budget': TIME_BUDGET_XP})

            results_beam = pool.apply_async(beam_search, (data, target), {
                'enable_i': enable_i,
                'time_budget': TIME_BUDGET_XP
            })

            result_ucb_opti = pool.apply_async(seq_scout, (data, target), {
                'enable_i': enable_i,
                'time_budget': TIME_BUDGET_XP
            })

            results_mcts = pool.apply_async(launch_mcts, (data, target),
                                            {'time_budget': TIME_BUDGET_XP})

            results_misere = results_misere.get()
            results_beam = results_beam.get()
            result_ucb_opti = result_ucb_opti.get()
            results_mcts = results_mcts.get()

            if len(results_beam) < TOP_K:
                print("Too few beam: {}".format(len(results_beam)))
            if len(result_ucb_opti) < TOP_K:
                print("Too few SeqScout: {}".format(len(result_ucb_opti)))
            if len(results_misere) < TOP_K:
                print("Too few misere: {}".format(len(results_misere)))
            if len(results_mcts) < TOP_K:
                print("Too few MCTSExtent: {}".format(len(results_mcts)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             size=size,
                             Algorithm='misere')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_beam)),
                             size=size,
                             Algorithm='beam')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(result_ucb_opti)),
                             size=size,
                             Algorithm='SeqScout')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_mcts)),
                             size=size,
                             Algorithm='MCTSExtent')

        size += size_step

    df = pd.DataFrame(data=data_final)

    sns.set(rc={'figure.figsize': (8, 6.5)})

    plt.clf()
    ax = sns.lineplot(data=df, x='size', y='WRAcc', hue='Algorithm')
    ax.set(xlabel='Length max', ylabel='WRAcc')

    # ax.set(xlabel='Time(s)', ylabel='Average WRAcc top-10 patterns')

    plt.savefig('./space_size/over_size.png')
    df.to_pickle('./space_size/result')

    if SHOW:
        plt.show()
Example #3
0
def quality_over_theta():
    number_dataset = 1
    data, target, enable_i = datasets[number_dataset]

    pool = Pool(processes=5, maxtasksperchild=1)

    # if we want to average
    nb_launched = 5

    theta = 0.1

    data_final = {'WRAcc': [], 'theta': [], 'Algorithm': []}

    for i in range(10):
        print('Iteration: {}'.format(i))
        for i in range(nb_launched):
            results_misere = pool.apply_async(misere, (data, target), {
                'time_budget': TIME_BUDGET_XP,
                'theta': theta
            })
            results_beam = pool.apply_async(
                beam_search, (data, target), {
                    'enable_i': enable_i,
                    'time_budget': TIME_BUDGET_XP,
                    'theta': theta
                })

            result_ucb_opti = pool.apply_async(
                seq_scout, (data, target), {
                    'enable_i': enable_i,
                    'time_budget': TIME_BUDGET_XP,
                    'theta': theta
                })

            results_mcts = pool.apply_async(launch_mcts, (data, target), {
                'time_budget': TIME_BUDGET_XP,
                'theta': theta
            })

            results_misere = results_misere.get()
            results_beam = results_beam.get()
            result_ucb_opti = result_ucb_opti.get()
            results_mcts = results_mcts.get()

            if len(results_beam) < TOP_K:
                print("Too few beam: {}".format(len(results_beam)))
            if len(result_ucb_opti) < TOP_K:
                print("Too few ucb: {}".format(len(result_ucb_opti)))
            if len(results_misere) < TOP_K:
                print("Too few misere: {}".format(len(results_misere)))
            if len(results_mcts) < TOP_K:
                print("Too few mctsextent : {}".format(len(results_mcts)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             theta=theta,
                             Algorithm='misere')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_beam)),
                             theta=theta,
                             Algorithm='beam')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(result_ucb_opti)),
                             theta=theta,
                             Algorithm='SeqScout')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_mcts)),
                             theta=theta,
                             Algorithm='MCTSExtent')

        theta += 0.1

    df = pd.DataFrame(data=data_final)

    sns.set(rc={'figure.figsize': (8, 6.5)})

    plt.clf()
    ax = sns.lineplot(data=df, x='theta', y='WRAcc', hue='Algorithm')
    plt.savefig('./theta/over_theta.png')

    df.to_pickle('./theta/result')

    if SHOW:
        plt.show()
Example #4
0
def barplot_dataset_iterations():
    pool = Pool(processes=5, maxtasksperchild=1)
    xp_repeat = 5

    data_final = {'WRAcc': [], 'dataset': [], 'Algorithm': []}

    for i, (data, target, enable_i) in enumerate(datasets):
        print("Dataset {}".format(datasets_names[i]))

        for j in range(xp_repeat):
            results_misere = pool.apply_async(misere, (data, target),
                                              {'time_budget': TIME_BUDGET_XP})
            results_beam = pool.apply_async(beam_search, (data, target), {
                'enable_i': enable_i,
                'time_budget': TIME_BUDGET_XP
            })

            result_ucb_opti = pool.apply_async(seq_scout, (data, target), {
                'enable_i': enable_i,
                'time_budget': TIME_BUDGET_XP
            })
            results_mcts = pool.apply_async(launch_mcts, (data, target),
                                            {'time_budget': TIME_BUDGET_XP})

            results_misere = results_misere.get()
            results_beam = results_beam.get()
            result_ucb_opti = result_ucb_opti.get()
            results_mcts = results_mcts.get()

            if len(results_misere) < TOP_K:
                print("Too few example on misere on dataset {}: {} results".
                      format(datasets_names[i], len(results_misere)))
            if len(results_beam) < TOP_K:
                print(
                    "Too few example on beam_search on dataset {}: {} results".
                    format(datasets_names[i], len(results_beam)))
            if len(result_ucb_opti) < TOP_K:
                print("Too few example on seqscout on dataset {}: {} results".
                      format(datasets_names[i], len(result_ucb_opti)))
            if len(results_mcts) < TOP_K:
                print(
                    "Too few example on mctsextend on dataset {}: {} results".
                    format(datasets_names[i], len(results_mcts)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             dataset=datasets_names[i],
                             Algorithm='misere')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_beam)),
                             dataset=datasets_names[i],
                             Algorithm='beam')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(result_ucb_opti)),
                             dataset=datasets_names[i],
                             Algorithm='SeqScout')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_mcts)),
                             dataset=datasets_names[i],
                             Algorithm='MCTSExtent')

    df = pd.DataFrame(data=data_final)

    sns.set(rc={'figure.figsize': (8, 6.5)})

    plt.clf()
    ax = sns.barplot(x='dataset', y='WRAcc', hue='Algorithm', data=df)

    plt.savefig('./wracc_datasets/iterations_boxplot.png')
    df.to_pickle('./wracc_datasets/result')

    if SHOW:
        plt.show()
Example #5
0
def other_measures():
    pool = Pool(processes=5, maxtasksperchild=1)
    xp_repeat = 5
    nb_iterations = 10000

    for i, (data, target, enable_i) in enumerate(datasets):
        print("Dataset {}".format(datasets_names[i]))

        for measure in ['Informedness', 'F1']:
            mean_misere = 0
            mean_beam = 0
            mean_seqscout = 0
            mean_mcts = 0

            for j in range(xp_repeat):
                results_misere = pool.apply_async(
                    misere, (data, target), {
                        'time_budget': TIME_BUDGET_XP,
                        'quality_measure': measure,
                        'iterations_limit': nb_iterations
                    })
                results_beam = pool.apply_async(
                    beam_search, (data, target), {
                        'enable_i': enable_i,
                        'time_budget': TIME_BUDGET_XP,
                        'quality_measure': measure,
                        'iterations_limit': nb_iterations
                    })

                result_ucb_opti = pool.apply_async(
                    seq_scout, (data, target), {
                        'enable_i': enable_i,
                        'time_budget': TIME_BUDGET_XP,
                        'quality_measure': measure,
                        'iterations_limit': nb_iterations
                    })

                results_mcts = pool.apply_async(
                    launch_mcts, (data, target), {
                        'time_budget': TIME_BUDGET_XP,
                        'iterations_limit': nb_iterations,
                        'quality_measure': measure
                    })

                results_misere = results_misere.get()
                results_beam = results_beam.get()
                result_ucb_opti = result_ucb_opti.get()
                results_mcts = results_mcts.get()

                if len(results_misere) < TOP_K:
                    print(
                        "Too few example on misere on dataset {}: {} results".
                        format(datasets_names[i], len(results_misere)))
                if len(results_beam) < TOP_K:
                    print(
                        "Too few example on beam_search on dataset {}: {} results"
                        .format(datasets_names[i], len(results_beam)))
                if len(result_ucb_opti) < TOP_K:
                    print("Too few example on ucb on dataset {}: {} results".
                          format(datasets_names[i], len(result_ucb_opti)))
                if len(results_mcts) < TOP_K:
                    print(
                        "Too few example on mctsextend on dataset {}: {} results"
                        .format(datasets_names[i], len(results_mcts)))

                mean_misere += average_results(results_misere)
                mean_beam += average_results(results_beam)
                mean_seqscout += average_results(result_ucb_opti)
                mean_mcts += average_results(results_mcts)

            mean_misere = mean_misere / xp_repeat
            mean_beam = mean_beam / xp_repeat
            mean_seqscout = mean_seqscout / xp_repeat
            mean_mcts = mean_mcts / xp_repeat

            print(
                'For datasets {}, measure {}, algorithm misere the means score is: {}'
                .format(datasets_names[i], measure, mean_misere))
            print(
                'For datasets {}, measure {}, algorithm beam_search the means score is: {}'
                .format(datasets_names[i], measure, mean_beam))
            print(
                'For datasets {}, measure {}, algorithm ucb the means score is: {}'
                .format(datasets_names[i], measure, mean_seqscout))
            print(
                'For datasets {}, measure {}, algorithm mctsextend the means score is: {}'
                .format(datasets_names[i], measure, mean_mcts))
Example #6
0
def show_quality_over_iterations_ucb(number_dataset):
    data, target, enable_i = datasets[number_dataset]

    # if we want to average
    nb_launched = 5
    pool = Pool(processes=3, maxtasksperchild=1)

    iterations_limit = 50
    iterations_step = 1000

    data_final = {'WRAcc': [], 'iterations': [], 'Algorithm': []}

    for i in range(12):
        print('Iteration: {}'.format(i))

        for i in range(nb_launched):
            results_misere = pool.apply_async(
                misere, (data, target), {
                    'time_budget': TIME_BUDGET_XP,
                    'iterations_limit': iterations_limit
                })
            results_beam = pool.apply_async(
                beam_search, (data, target), {
                    'enable_i': enable_i,
                    'time_budget': TIME_BUDGET_XP,
                    'iterations_limit': iterations_limit
                })

            result_ucb_opti = pool.apply_async(
                seq_scout, (data, target), {
                    'enable_i': enable_i,
                    'time_budget': TIME_BUDGET_XP,
                    'iterations_limit': iterations_limit
                })

            results_mcts = pool.apply_async(
                launch_mcts, (data, target), {
                    'time_budget': TIME_BUDGET_XP,
                    'iterations_limit': iterations_limit
                })

            data_add_generic(data_final,
                             WRAcc=max(0,
                                       average_results(results_misere.get())),
                             iterations=iterations_limit,
                             Algorithm='misere')
            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_beam.get())),
                             iterations=iterations_limit,
                             Algorithm='beam')
            data_add_generic(data_final,
                             WRAcc=max(0,
                                       average_results(result_ucb_opti.get())),
                             iterations=iterations_limit,
                             Algorithm='SeqScout')

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_mcts.get())),
                             iterations=iterations_limit,
                             Algorithm='MCTSExtent')

        iterations_limit += iterations_step

    df = pd.DataFrame(data=data_final)

    sns.set(rc={'figure.figsize': (8, 6.5)})

    plt.clf()
    ax = sns.lineplot(data=df, x='iterations', y='WRAcc', hue='Algorithm')

    plt.savefig('./iterations_ucb/over_iterations{}.png'.format(
        datasets_names[number_dataset]))
    df.to_pickle('./iterations_ucb/result_{}'.format(
        datasets_names[number_dataset]))

    if SHOW:
        plt.show()
Example #7
0
def barplot_dataset_improvement_iterations():
    pool = Pool(processes=5, maxtasksperchild=1)
    xp_repeat = 5

    seqscout_vs_beam = {'Improvement': [], 'dataset': []}
    seqscout_vs_misere = {'Improvement': [], 'dataset': []}
    mcts_vs_misere = {'Improvement': [], 'dataset': []}
    mcts_vs_beam = {'Improvement': [], 'dataset': []}
    mcts_vs_seqscout = {'Improvement': [], 'dataset': []}

    for i, (data, target, enable_i) in enumerate(datasets):
        print("Dataset {}".format(datasets_names[i]))

        for j in range(xp_repeat):
            results_misere = pool.apply_async(misere, (data, target),
                                              {'time_budget': TIME_BUDGET_XP})

            results_beam = pool.apply_async(beam_search, (data, target), {
                'enable_i': enable_i,
                'time_budget': TIME_BUDGET_XP
            })
            result_ucb_opti = pool.apply_async(seq_scout, (data, target), {
                'enable_i': enable_i,
                'time_budget': TIME_BUDGET_XP
            })
            results_mcts = pool.apply_async(launch_mcts, (data, target),
                                            {'time_budget': TIME_BUDGET_XP})

            results_misere = results_misere.get()
            results_beam = results_beam.get()
            result_ucb_opti = result_ucb_opti.get()
            results_mcts = results_mcts.get()

            if len(results_misere) < TOP_K:
                print("Too few example on misere on dataset {}: {} results".
                      format(datasets_names[i], len(results_misere)))
            if len(results_beam) < TOP_K:
                print(
                    "Too few example on beam_search on dataset {}: {} results".
                    format(datasets_names[i], len(results_beam)))
            if len(result_ucb_opti) < TOP_K:
                print("Too few example on seqscout on dataset {}: {} results".
                      format(datasets_names[i], len(result_ucb_opti)))
            if len(results_mcts) < TOP_K:
                print(
                    "Too few example on mctsextend on dataset {}: {} results".
                    format(datasets_names[i], len(results_mcts)))

            data_add_generic(seqscout_vs_misere,
                             Improvement=compute_improvement(
                                 average_results(result_ucb_opti),
                                 average_results(results_misere)),
                             dataset=datasets_names[i])

            data_add_generic(seqscout_vs_beam,
                             Improvement=compute_improvement(
                                 average_results(result_ucb_opti),
                                 average_results(results_beam)),
                             dataset=datasets_names[i])
            data_add_generic(mcts_vs_misere,
                             Improvement=compute_improvement(
                                 average_results(results_mcts),
                                 average_results(results_misere)),
                             dataset=datasets_names[i])
            data_add_generic(mcts_vs_beam,
                             Improvement=compute_improvement(
                                 average_results(results_mcts),
                                 average_results(results_beam)),
                             dataset=datasets_names[i])
            data_add_generic(mcts_vs_seqscout,
                             Improvement=compute_improvement(
                                 average_results(results_mcts),
                                 average_results(result_ucb_opti)),
                             dataset=datasets_names[i])

    sns.set(rc={'figure.figsize': (8, 6.5)})
    plt.clf()

    df = pd.DataFrame(data=seqscout_vs_misere)
    ax = sns.barplot(x='dataset', y='Improvement', data=df)
    ax.set(yscale='symlog')
    annotate_bars(ax)
    plt.savefig('./improvement/seqscout_vs_misere.png')
    df.to_pickle('./improvement/seqscout_vs_misere')
    plt.clf()

    df = pd.DataFrame(data=seqscout_vs_beam)
    bx = sns.barplot(x='dataset', y='Improvement', data=df)
    bx.set(yscale='symlog')
    annotate_bars(bx)
    plt.savefig('./improvement/seqscout_vs_beam.png')
    df.to_pickle('./improvement/seqscout_vs_beam')
    plt.clf()

    df = pd.DataFrame(data=mcts_vs_misere)
    cx = sns.barplot(x='dataset', y='Improvement', data=df)
    cx.set(yscale='symlog')
    annotate_bars(cx)
    plt.savefig('./improvement/mcts_vs_misere.png')
    df.to_pickle('./improvement/mcts_vs_misere')
    plt.clf()

    df = pd.DataFrame(data=mcts_vs_beam)
    dx = sns.barplot(x='dataset', y='Improvement', data=df)
    dx.set(yscale='symlog')
    annotate_bars(dx)
    plt.savefig('./improvement/mcts_vs_beam.png')
    df.to_pickle('./improvement/mcts_vs_beam')
    plt.clf()

    df = pd.DataFrame(data=mcts_vs_seqscout)
    ex = sns.barplot(x='dataset', y='Improvement', data=df)
    ex.set(yscale='symlog')
    annotate_bars(ex)
    plt.savefig('./improvement/mcts_vs_seqscout.png')
    df.to_pickle('./improvement/mcts_bs_seqscout')
    plt.clf()

    if SHOW:
        plt.show()
Example #8
0
def barplot_increase_local_optima(it_number):
    pool = Pool(processes=5, maxtasksperchild=1)
    xp_repeat = 5

    data_final = {
        'WRAcc': [],
        'WRAcc_opti': [],
        'dataset': [],
        'Algorithm': []
    }

    for i, (data, target, enable_i) in enumerate(datasets):
        print("Dataset {}".format(datasets_names[i]))

        for j in range(xp_repeat):
            results_misere = pool.apply_async(
                optimizer,
                (data, target, 'misere', TIME_BUDGET_XP, it_number, enable_i))
            results_beam = pool.apply_async(
                optimizer, (data, target, 'BeamSearch', TIME_BUDGET_XP,
                            it_number, enable_i))
            results_ucb_opti = pool.apply_async(
                optimizer, (data, target, 'SeqScout', TIME_BUDGET_XP,
                            it_number, enable_i))
            results_mcts = pool.apply_async(
                optimizer, (data, target, 'MCTSExtent', TIME_BUDGET_XP,
                            it_number, enable_i))

            results_misere, results_misere_opti = results_misere.get()
            results_beam, results_beam_opti = results_beam.get()
            result_ucb_opti, results_ucb_opti = results_ucb_opti.get()
            results_mcts, results_mcts_opti = results_mcts.get()

            if len(results_misere) < TOP_K:
                print("Too few example on misere on dataset {}: {} results".
                      format(datasets_names[i], len(results_misere)))
            if len(results_beam) < TOP_K:
                print(
                    "Too few example on beam_search on dataset {}: {} results".
                    format(datasets_names[i], len(results_beam)))
            if len(result_ucb_opti) < TOP_K:
                print("Too few example on seqscout on dataset {}: {} results".
                      format(datasets_names[i], len(result_ucb_opti)))
            if len(results_mcts) < TOP_K:
                print(
                    "Too few example on mctsextend on dataset {}: {} results".
                    format(datasets_names[i], len(results_mcts)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             dataset=datasets_names[i],
                             Algorithm='misere',
                             WRAcc_opti=max(
                                 0, average_results(results_misere_opti)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             dataset=datasets_names[i],
                             Algorithm='beam',
                             WRAcc_opti=max(
                                 0, average_results(results_beam_opti)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             dataset=datasets_names[i],
                             Algorithm='SeqScout',
                             WRAcc_opti=max(0,
                                            average_results(results_ucb_opti)))

            data_add_generic(data_final,
                             WRAcc=max(0, average_results(results_misere)),
                             dataset=datasets_names[i],
                             Algorithm='MCTSExtent',
                             WRAcc_opti=max(
                                 0, average_results(results_mcts_opti)))

    df = pd.DataFrame(data=data_final)

    sns.set(rc={'figure.figsize': (8, 6.5)})

    df.to_pickle('./local_opti_increase/result_{}'.format(it_number))

    plt.clf()
    sns.set_color_codes("pastel")
    ax = sns.barplot(x='dataset', y='WRAcc_opti', hue='Algorithm', data=df)

    sns.set_color_codes("muted")
    ax = sns.barplot(x='dataset', y='WRAcc', hue='Algorithm', data=df)

    plt.savefig('./local_opti_increase/barplot_{}.png'.format(it_number))

    if SHOW:
        plt.show()