Esempio n. 1
0
def plot_l1_with_code_switches(results_dir='../../simulations/messageless/messageless_balanced',
                               languages=['en', 'es']):
    df = pd.read_csv(f'{results_dir}/performance_per_lang.csv',
                     index_col=None, header=0, skipinitialspace=True, dtype={'epoch': int})
    plt = Plotter(results_dir=results_dir)
    for l in languages:
        plt.performance(df[df.switch_from == l], fname=f'l1_performance_{l}', include_code_switches=True, max_epochs=40)
Esempio n. 2
0
def cognate_simulations(results_dir, models, only_last_epoch, create_files, create_csv,
                        plot_items, per_switch_direction):
    if create_files:
        create_all_model_csv_files(results_dir, models=models, create_csv=create_csv,
                                   per_switch_direction=per_switch_direction, only_last_epoch=only_last_epoch)

    if plot_items:
        plt = Plotter(results_dir=results_dir)
        # plt.plot_cognate_last_epoch()
        all_models = []
        for m in models:
            all_models.extend([f'{m}1', f'{m}2'])

        # plt.performance_all_models(models=all_models)

        # plt.plot_cognate_effect_over_time(df_name=f'count_{fname}', ci=68)
        # for i in range(10, 31, 10):
        #    plt.plot_cognate_effect_over_time(df_name=f'count_{i}cog_models_merged.csv', ci=68, ignore_baseline=True,
        #                                      info_to_plot=('code_switched',))
        #    plt.print_switches_around_switch_point(df_name=f'count_{i}cog_models_merged.csv')
        for i in models:
            print(i)
            i = i.replace("/", "_")
            print(i, results_dir)
            info_to_plot = (('code_switched', 'switched_before', 'switched_at', 'switched_right_after',
                             'switched_second_after', 'switched_after_anywhere') if ('eos' in results_dir or
                                                                                     'bos' in results_dir) else
                            ('code_switched',))
            plt.plot_cognate_last_epoch(df_name=f'{"per_lang" if per_switch_direction else ""}'
                                                f'count_{i}_models_merged.csv', ci=68,
                                        info_to_plot=info_to_plot, lineplot=False)
Esempio n. 3
0
def pairwise_cross_model_comparison(create_files, fname='cog_models_merged', only_last_epoch=True,
                                    results_dir='../../simulations/cog_paper/cognate_percentage/pairwise_training/'
                                                'evaluation',
                                    models=('10/cog', '15/cog', '20/cog', '25/cog', '30/cog')):
    if create_files:
        create_all_model_csv_files(results_dir, models=models, create_csv=True,
                                   only_last_epoch=only_last_epoch)

    all_df = []
    for m in models:
        m = m.replace('/', '_')
        print(m, f'{results_dir}/{m}_models_merged.csv')
        df = pd.read_csv(f'{results_dir}/{m}_models_merged.csv')
        df['model_name'] = m.split('_')[0]
        all_df.append(df)
    sentences_to_test = pd.concat(all_df, sort=False)

    only_common_sentences = True
    if only_common_sentences:
        count_correct = sentences_to_test.groupby('sentence_idx').count()
        num_per_unique_sentence = len(models) * 2  # models * (cognate + non_cognate sentences)
        sentences_to_test = sentences_to_test.loc[sentences_to_test['sentence_idx'].isin(
            count_correct[count_correct['model_name'] == num_per_unique_sentence].index)]

    sentences_to_test.to_csv(f'{results_dir}/{fname}.csv')
    group_models(results_dir, f'{fname}.csv')

    plt = Plotter(results_dir=results_dir)
    plt.plot_cognate_last_epoch(df_name=f'count_{fname}.csv', ci=68, xaxis_step=5,
                                info_to_plot=('code_switched',), lineplot=True)
Esempio n. 4
0
def non_pairwise_cross_model_comparison(results_dir='../../simulations/cog_paper/cognate_percentage/'
                                                    'non_pairwise_training/',
                                        create_csv=True, num_simulations=70, remove_incorrect_meaning=True):
    if create_csv:
        df_per_test_set = []
        for fname in ['non_cognate_evaluation', 'cognate_evaluation']:
            models = [i for i in range(0 if 'non' in fname else 10, num_simulations + 1, 10)]
            num_per_unique_sentence = len(models)
            all_dfs = []
            for m in models:
                print(m)
                df = pd.read_csv(f'{results_dir}/{fname}/{m}/all_results.csv')
                df = df[df.epoch == df.epoch.max()]
                if remove_incorrect_meaning:
                    df.drop(df.loc[df.meaning == 0].index, inplace=True)
                if fname == 'cognate_evaluation':
                    assert len(df) == len(df[df.target_has_cognate == True])
                df['model_name'] = m
                df['sentence_idx'] = df.apply(lambda x: hash_string(f'{x.message}{x.network_num}'), axis=1)
                df['test_set'] = fname
                all_dfs.append(df)
            non_pairwise_merged = pd.concat(all_dfs, sort=False)

            only_common_sentences = True
            if only_common_sentences:
                count_correct = non_pairwise_merged.groupby('sentence_idx').count()
                print(num_per_unique_sentence, 'total:', len(non_pairwise_merged))
                non_pairwise_merged = non_pairwise_merged.loc[non_pairwise_merged['sentence_idx'].isin(
                    count_correct[count_correct['model_name'] == num_per_unique_sentence].index)]
                print('clean:', len(non_pairwise_merged))

            df_per_test_set.append(non_pairwise_merged)

        test_sets_merged = pd.concat(df_per_test_set, sort=False)
        test_sets_merged.to_csv(f'{results_dir}/non_pairwise_cognate_non_cognate.csv')

        gb = test_sets_merged.groupby(['epoch', 'network_num', 'model_name', 'test_set']).apply(
            lambda dft: pd.Series({'code_switched': dft.is_code_switched.sum(),
                                   'total_sentences': len(dft.meaning),
                                   'code_switched_percentage': dft.is_code_switched.sum() * 100 / len(dft.meaning)
                                   }))
        gb.to_csv(f'{results_dir}/non_pairwise_test_sets_grouped'
                  f'{f"_include_incorrect" if not remove_incorrect_meaning else ""}.csv')

    plt = Plotter(results_dir=results_dir)
    plt.plot_cognate_last_epoch(df_name='non_pairwise_test_sets_grouped.csv', hue='test_set',
                                include_annotations=False, lineplot=True, ci=68, xrow='model_name')
Esempio n. 5
0
def code_switching_patterns_model_comparison():
    results_dir = '../../simulations/patterns'
    plot_code_switches = True
    create_csv_files = False

    if create_csv_files:
        num_sim = 40
        for m in ['early', 'esen', 'enes']:
            create_dataframes_for_plots(results_dir=f'{results_dir}/{m}', epoch_from=0, epoch_to=40,
                                        simulation_range=range(1, num_sim + 1))

    plt = Plotter(results_dir=results_dir)
    if plot_code_switches:
        plt.l1_performance_all_models()
        plt.l2_performance_all_models()
Esempio n. 6
0
def plot_l1_l2_performance_cognate_models(results_dir='../../simulations/cog_paper/'):
    plt = Plotter(results_dir=results_dir)
    for testset in ['generic']:  # ['bos', 'eos', 'generic']:
        for model in ['balanced', 'enes', 'esen']:
            for m in ['cog1', 'cog2']:
                print(f'{results_dir}/{testset}/{model}/{m}/performance_per_lang.csv')
                df = pd.read_csv(f'{results_dir}/{testset}/{model}/{m}/performance_per_lang.csv',
                                 index_col=None, header=0, skipinitialspace=True, dtype={'epoch': int})
                l2_lang = model[-2:] if model != 'balanced' else None
                lang = [model[-4:-2]] if model != 'balanced' else ['en', 'es']
                if model != 'balanced':
                    df.l2_epoch = 10
                print(lang, l2_lang)
                for l in lang:
                    plt.performance(df[df.switch_from == l], fname=f'l1_{testset}_{model}_{m}_{l}',
                                    include_code_switches=True)
                if l2_lang:
                    plt.l2_performance(df, l2_lang=l2_lang, fname=f'l2_{testset}_{model}_{m}_{l2_lang}',
                                       include_code_switches=True)
Esempio n. 7
0
elif str(platform.system() == 'Darwin'):
    messagebox.showinfo("Alert", "Platform is macOS - Auto load dataset")
    try:
        omega = LoadOmega.Load_Omega(
            "../../data/Dataset 2/run2/run2.omega.pasco.csv")
        print("Loaded omega run 2 from dataset 2")
    except:
        print("Unable to load dataset. Please contact maintainer HA.")
else:
    messagebox.showinfo("ALERT", "Select an omega dataset from data/dataset/")
    try:
        omega = LoadOmega.Load_Omega()
    except:
        print("No files were chosen. Exiting with code 69-420-247...")

accel = Simulate.AccelData_Rotate(
    Simulate.AccelData_CreateFromRotary(omega, random.randint(0, 10)),
    random.randint(0, 2))

plt1 = Plotter.MultiPlotter([accel], [omega])

a = []

for i in range(100):
    a.append(
        [random.randint(0, 20),
         random.randint(0, 20),
         random.randint(0, 20)])

plt1.display()
                ])
            eval_sets = set()
            if args.eval_test:
                eval_sets.add('test')
            if args.eval_training:
                eval_sets.add('training')
            results_mean_and_std = compute_mean_and_std(
                valid_results, evaluated_sets=eval_sets, epochs=args.epochs)

            with lz4.open(f"{results_dir}/summary_results.pickled",
                          'wb') as pckl:
                pickle.dump(results_mean_and_std, pckl, protocol=-1)

            plot = Plotter(results_dir=results_dir,
                           summary_sim=num_valid_simulations,
                           title=args.title,
                           epochs=args.epochs,
                           num_training=num_training,
                           num_test=num_test)
            plot.plot_results(
                results_mean_and_std,
                cognate_experiment=args.cognate_experiment,
                test_sentences_with_pronoun=test_sentences_with_pronoun,
                auxiliary_experiment=args.auxiliary_experiment,
                evaluated_datasets=eval_sets)
            if not isinstance(
                    results_mean_and_std['correct_code_switches']['test'],
                    int):
                with open(f"{results_dir}/results.log", 'w') as f:
                    f.write(
                        f"Code-switched percentage (test set): "
                        f"{Plotter.percentage(results_mean_and_std['correct_code_switches']['test'], num_test)}"
Esempio n. 9
0
def plot_regression_analysis_results(results_dir='../../mixed_effects_regression_analysis'):
    plt = Plotter(results_dir=results_dir)
    for fname, ylim in [('cog_enes_esen_sim_per_L1', 3)]:#, ('cog_enes_esen_sim_per_L2', 30),
                        #('cog_balanced_enes_esen_sim', 26)]:
        plt.plot_merged_cognate_csv(df_name=fname, ylim=ylim)  # cog_enes_all_esen_all_sim_per_L1')#_per_L2