コード例 #1
0
ファイル: neural_network8.py プロジェクト: dfd/CS7641_hw02
def run_experiments(problems):
    lfname = log_filename(problems, STEM)
    logging.basicConfig(filename='./nn_logs/' + lfname + '.txt',
                        level=logging.DEBUG)
    algo_runs = {
        'gradient_descent': 3,
        'random_hill_climb': 3,
        'simulated_annealing': 3,
        'genetic_alg': 3
    }
    param_grids = get_param_grids()
    results = {}
    for problem in problems:
        results[problem] = {}
        X_train, y_train = load_data()
        ss = StandardScaler()
        X_train_scaled = ss.fit_transform(X_train)
        #X_test_scaled = ss.transform(X_test)
        for algo in algo_runs.keys():
            results[problem][algo] = {}
            pg = ParameterGrid(param_grids[algo])
            for i, kwargs in enumerate(pg):
                logging.info('problem ' + problem + ' algo ' + algo + ' i ' +
                             str(i) + str(kwargs))
                results[problem][algo][i] = {}
                print(algo)
                for run in range(algo_runs[algo]):
                    results[problem][algo][i][run] = {}
                    mdl = LogisticRegression(
                        algorithm=algo,  #'gradient_descent',
                        bias=True,
                        early_stopping=True,  #False,
                        #early_stopping=True,
                        #clip_max=5,
                        max_attempts=500,
                        random_state=0,
                        curve=True,
                        **kwargs)

                    start_time = time.time()
                    logging.info('start time ' + str(time.time()))
                    mdl.fit(X_train_scaled, y_train)
                    end_time = time.time()
                    duration = end_time - start_time
                    logging.info('duration ' + str(duration))
                    y_pred_train = mdl.predict(X_train_scaled)
                    train_acc = balanced_accuracy_score(y_train, y_pred_train)
                    train_f1 = f1_score(y_train, y_pred_train)
                    results[problem][algo][i][run]['fit_time'] = duration
                    results[problem][algo][i][run]['curve'] = mdl.fitness_curve
                    results[problem][algo][i][run][
                        'balanced_accuracy'] = train_acc
                    results[problem][algo][i][run]['F1_score'] = train_f1
                    #results[problem][algo][i][run]['train'] = {}
                    #results[problem][algo][i][run]['train']['balanced_accuracy'] = train_acc
                    #results[problem][algo][i][run]['train']['F1_score'] = train_f1
                    #y_pred_test= mdl.predict(X_test_scaled)
                    #test_acc = balanced_accuracy_score(y_test, y_pred_test)
                    #test_f1 = f1_score(y_test, y_pred_test)
                    #results[problem][algo][i][run]['test'] = {}
                    #results[problem][algo][i][run]['test']['balanced_accuracy'] = test_acc
                    #results[problem][algo][i][run]['test']['F1_score'] = test_f1
                    logging.info('train ba' + str(train_acc))
                    #logging.info('test_ba' + str(test_acc))

    with open('./nn_output/' + STEM + '_nn_results_dict.pkl', 'wb') as f:
        pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)
コード例 #2
0
ファイル: neural_network9.py プロジェクト: dfd/CS7641_hw02
def run_experiments(problems):
    lfname = log_filename(problems, STEM)
    logging.basicConfig(filename='./nn_logs/' + lfname + '.txt',
                        level=logging.DEBUG)
    algo_runs = {
        'gradient_descent': 3,
        'random_hill_climb': 3,
        'simulated_annealing': 3,
        'genetic_alg': 3
    }
    param_grids = get_param_grids()
    results = {}
    for problem in problems:
        results[problem] = {}
        X_train, y_train = load_data()
        n_classes = np.unique(y_train).size
        markers = 'xo'
        colors = ['purple', 'yellow']
        fig, axes = plt.subplots(1, 1, figsize=(6, 6))
        for k, label in enumerate(np.unique(y_train)):
            plot_mask = (y_train == label)
            plot_mask = plot_mask.astype(bool)
            plt.scatter(
                X_train[plot_mask, 0],
                X_train[plot_mask, 1],
                marker=markers[k],
                c=colors[k],  #'gray',
                edgecolor='k',
                alpha=0.6)
        plt.xlabel('x1')
        plt.ylabel('x2')
        plt.suptitle('The Two Deterministic Features of the Synthetic Data')
        plt.savefig('./nn_plots/' + STEM + '_data.png')

        ss = StandardScaler()
        X_train_scaled = ss.fit_transform(X_train)
        #X_test_scaled = ss.transform(X_test)
        for algo in algo_runs.keys():
            results[problem][algo] = {}
            pg = ParameterGrid(param_grids[algo])
            for i, kwargs in enumerate(pg):
                logging.info('problem ' + problem + ' algo ' + algo + ' i ' +
                             str(i) + str(kwargs))
                results[problem][algo][i] = {}
                print(algo)
                for run in range(algo_runs[algo]):
                    results[problem][algo][i][run] = {}
                    mdl = LogisticRegression(
                        algorithm=algo,  #'gradient_descent',
                        bias=True,
                        early_stopping=True,  #False,
                        #early_stopping=True,
                        #clip_max=5,
                        max_attempts=500,
                        random_state=0,
                        curve=True,
                        **kwargs)

                    start_time = time.time()
                    logging.info('start time ' + str(time.time()))
                    mdl.fit(X_train_scaled, y_train)
                    end_time = time.time()
                    duration = end_time - start_time
                    logging.info('duration ' + str(duration))
                    y_pred_train = mdl.predict(X_train_scaled)
                    train_acc = balanced_accuracy_score(y_train, y_pred_train)
                    train_f1 = f1_score(y_train, y_pred_train)
                    results[problem][algo][i][run]['fit_time'] = duration
                    results[problem][algo][i][run]['curve'] = mdl.fitness_curve
                    results[problem][algo][i][run][
                        'weights'] = mdl.fitted_weights
                    results[problem][algo][i][run][
                        'balanced_accuracy'] = train_acc
                    results[problem][algo][i][run]['F1_score'] = train_f1
                    #results[problem][algo][i][run]['train'] = {}
                    #results[problem][algo][i][run]['train']['balanced_accuracy'] = train_acc
                    #results[problem][algo][i][run]['train']['F1_score'] = train_f1
                    #y_pred_test= mdl.predict(X_test_scaled)
                    #test_acc = balanced_accuracy_score(y_test, y_pred_test)
                    #test_f1 = f1_score(y_test, y_pred_test)
                    #results[problem][algo][i][run]['test'] = {}
                    #results[problem][algo][i][run]['test']['balanced_accuracy'] = test_acc
                    #results[problem][algo][i][run]['test']['F1_score'] = test_f1
                    logging.info('train ba' + str(train_acc))
                    #logging.info('test_ba' + str(test_acc))

    with open('./nn_output/' + STEM + '_nn_results_dict.pkl', 'wb') as f:
        pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)
コード例 #3
0
def run_experiments(problems):
    lfname = log_filename(problems, STEM)
    logging.basicConfig(filename='./logs/' + lfname + '.txt',
                        level=logging.DEBUG)
    problems_dict = get_problems()
    optimizers = get_optimizers()
    runs_per_problem = get_runs_per_problem()
    func_kwargs_per_problem = get_func_kwargs_per_problem()
    best_state_collection = {}
    best_fitness_collection = {}
    curves = {}
    eval_curves = {}
    times = {}
    iters = {}

    for problem in problems:
        print('problem', problem)
        best_state_collection[problem] = {}
        best_fitness_collection[problem] = {}
        curves[problem] = {}
        eval_curves[problem] = {}
        times[problem] = {}
        iters[problem] = {}
        prob_dict = problems_dict[problem]
        #print('prob_dict', prob_dict)
        for exp_id, experiment in enumerate(prob_dict['experiments']):
            print('exp_id', exp_id)
            kwargs = experiment['kwargs']
            problem_kwargs = kwarg_str(kwargs)
            logging.info('problem ' + problem + ' experiment id ' +
                         str(exp_id) + ' for ' + problem_kwargs)
            best_state_collection[problem][exp_id] = {}
            best_fitness_collection[problem][exp_id] = {}
            curves[problem][exp_id] = {}
            eval_curves[problem][exp_id] = {}
            times[problem][exp_id] = {}
            iters[problem][exp_id] = {}
            fitness = prob_dict['class']
            opt_prob = prob_dict['opt_prob']
            for opt_name, optimizer in optimizers.items():
                best_state_collection[problem][exp_id][opt_name] = {}
                best_fitness_collection[problem][exp_id][opt_name] = {}
                curves[problem][exp_id][opt_name] = {}
                eval_curves[problem][exp_id][opt_name] = {}
                times[problem][exp_id][opt_name] = {}
                iters[problem][exp_id][opt_name] = {}
                func = optimizer['function']
                #for f_kwargs_id, func_kwargs in enumerate(optimizer['kwargs']):
                f_kwargs_id = func_kwargs_per_problem[problem][opt_name]
                func_kwargs = optimizer['kwargs'][f_kwargs_id]
                print('fitness', fitness.__name__)
                print('kwargs', kwargs)
                print('func', func)
                print('func_kwargs', func_kwargs)
                func_kwargs_str = kwarg_str(func_kwargs)
                logging.info('problem ' + problem + 'experiment id ' +
                             str(exp_id) + ' fkwargs_id ' + str(f_kwargs_id) +
                             ' is ' + func_kwargs_str)
                fit_func = fitness(**kwargs)
                problem_fit = opt_prob(experiment['length'], fit_func)
                print('problem_fit', problem_fit)
                best_state_collection[problem][exp_id][opt_name][
                    f_kwargs_id] = {}
                best_fitness_collection[problem][exp_id][opt_name][
                    f_kwargs_id] = {}
                curves[problem][exp_id][opt_name][f_kwargs_id] = {}
                eval_curves[problem][exp_id][opt_name][f_kwargs_id] = {}
                times[problem][exp_id][opt_name][f_kwargs_id] = {}
                iters[problem][exp_id][opt_name][f_kwargs_id] = {}

                param_list = []

                for run in range(runs_per_problem[problem]):
                    params = {
                        'problem_fit': problem_fit,
                        'max_attempts': 200,  #1000,
                        #'max_iters': 5000,
                        'run': run,
                        'func_kwargs': func_kwargs,
                        'func': func
                    }
                    param_list.append(params)

                logging.info('time ' + str(time.time()))
                with mp.Pool() as pool:
                    outcome = pool.map(run_opt, param_list)
                    pool.close()
                    pool.join()
                    #outcome = run_opt(params)
                    #start_time = time.time()
                    #best_state, best_fitness, curve = (
                    #    func(
                    #        problem_fit,
                    #        max_attempts=1000,
                    #        max_iters=10000,
                    #        curve=True,
                    #        random_state=run,
                    #        **func_kwargs
                    #    )
                    #)
                    #end_time = time.time()
                    #time = end_time - start_time
                    #best_state_collection[problem][problem_kwargs][opt_name][func_kwargs_str][run] = best_state
                    #best_fitness_collection[problem][problem_kwargs][opt_name][func_kwargs_str][run] = best_fitness
                    #curves[problem][problem_kwargs][opt_name][func_kwargs_str][run] = curve
                    #times[problem][problem_kwargs][opt_name][func_kwargs_str][run] = time
                    #best_state_collection[problem][problem_kwargs][opt_name][func_kwargs_str][run] = outcome['best_state']
                    #best_fitness_collection[problem][problem_kwargs][opt_name][func_kwargs_str][run] = outcome['best_fitness']
                    #curves[problem][problem_kwargs][opt_name][func_kwargs_str][run] = outcome['curve']
                    #times[problem][problem_kwargs][opt_name][func_kwargs_str][run] = outcome['duration']

                for run in range(runs_per_problem[problem]):
                    best_state_collection[problem][exp_id][opt_name][
                        f_kwargs_id][run] = outcome[run]['best_state']
                    best_fitness_collection[problem][exp_id][opt_name][
                        f_kwargs_id][run] = outcome[run]['best_fitness']
                    curves[problem][exp_id][opt_name][f_kwargs_id][
                        run] = outcome[run]['curve']
                    eval_curves[problem][exp_id][opt_name][f_kwargs_id][
                        run] = outcome[run]['eval_curve']
                    times[problem][exp_id][opt_name][f_kwargs_id][
                        run] = outcome[run]['duration']
                    iters[problem][exp_id][opt_name][f_kwargs_id][run] = len(
                        outcome[run]['curve'])

    #best_state_df = pd.DataFrame.from_dict(
    #    {(i, j, k, l, m): best_state_collection[i][j][k][l][m]
    #     for i in best_state_collection.keys()
    #     for j in best_state_collection[i].keys()
    #     for k in best_state_collection[i][j].keys()
    #     for l in best_state_collection[i][j][k].keys()
    #     for m in best_state_collection[i][j][k][l].keys()
    #     }, orient='index'
    #    )

    best_fitness_df = pd.DataFrame.from_dict(
        {(i, j, k, l, m): best_fitness_collection[i][j][k][l][m]
         for i in best_fitness_collection.keys()
         for j in best_fitness_collection[i].keys()
         for k in best_fitness_collection[i][j].keys()
         for l in best_fitness_collection[i][j][k].keys()
         for m in best_fitness_collection[i][j][k][l].keys()},
        orient='index')
    best_fitness_df.columns = ['fitness']
    best_fitness_df.index = pd.MultiIndex.from_tuples(
        best_fitness_df.index,
        names=('problem', 'experiment', 'algo', 'params', 'run'))
    #best_fitness_df.to_pickle('./output/best_fitness_df.pkl')
    for problem in problems:
        for exp_id, experiment in enumerate(prob_dict['experiments']):

            fig, ax = plt.subplots(figsize=(6, 4))
            plt.subplots_adjust(bottom=.2)
            #f, ax = plt.subplots(1, 1)
            #plt.subplots_adjust(bottom=.3)
            summary = best_fitness_df.loc[idx[(
                problem, exp_id)], :].unstack().droplevel(1)
            #summary = best_fitness_df.loc[idx[(problem)], :].unstack()
            summary.plot(kind='bar',
                         rot=45,
                         ax=ax,
                         legend=False,
                         color="C0",
                         edgecolor='black')
            plt.xlabel('Each Run Grouped By Algorithm')
            plt.ylabel('fitness')
            plt.suptitle('Fitness Result on ' + problem + '\nfor ' +
                         str(runs_per_problem[problem]) +
                         ' runs of each Algorithm ')
            #plt.legend(loc='lower right')
            plt.savefig('./plots/' + STEM + '_end_fitness_by_run_' +
                        file_join([problem, exp_id]) + '.png')
            plt.clf()

    curves_df = pd.DataFrame.from_dict(
        {(i, j, k, l, m): curves[i][j][k][l][m]
         for i in curves.keys() for j in curves[i].keys()
         for k in curves[i][j].keys() for l in curves[i][j][k].keys()
         for m in curves[i][j][k][l].keys()},
        orient='index')
    curves_df = curves_df.ffill(axis=1)
    curves_df.index = pd.MultiIndex.from_tuples(curves_df.index,
                                                names=('problem', 'experiment',
                                                       'algo', 'params',
                                                       'run'))
    avg_curves = curves_df.mean(level=list(range(4)))
    for problem in problems:
        for exp_id, experiment in enumerate(prob_dict['experiments']):

            fig, ax = plt.subplots(figsize=(6, 4))
            plt.subplots_adjust(bottom=.15)
            #plot_df = avg_curves.loc[idx[problem, exp_id, : , ], :]
            plot_df = avg_curves.loc[idx[problem, exp_id, :, ], :].droplevel(
                level=[0, 1, 3])
            plot_df.T.plot(ax=ax)
            plt.xlabel('iteration')
            plt.ylabel('fitness')
            plt.suptitle('Fitness on ' + problem + ' by iteration')
            plt.legend(loc='lower right')
            plt.savefig('./plots/' + STEM + '_fitness_curves_' +
                        file_join([problem, exp_id]) + '.png')
            plt.clf()
    #curves_df.to_pickle('./output/curves_df.pkl')
    #print(curves_df.unstack(-1))

    eval_curves_df = pd.DataFrame.from_dict(
        {(i, j, k, l, m): eval_curves[i][j][k][l][m]
         for i in eval_curves.keys() for j in eval_curves[i].keys()
         for k in eval_curves[i][j].keys()
         for l in eval_curves[i][j][k].keys()
         for m in eval_curves[i][j][k][l].keys()},
        orient='index')
    eval_curves_df.index = pd.MultiIndex.from_tuples(
        eval_curves_df.index,
        names=('problem', 'experiment', 'algo', 'params', 'run'))
    avg_eval_curves = eval_curves_df.mean(level=list(range(4)))
    #print(avg_eval_curves)
    for problem in problems:
        for exp_id, experiment in enumerate(prob_dict['experiments']):

            fig, ax = plt.subplots(figsize=(6, 4))
            plt.subplots_adjust(bottom=.15)
            plot_df = avg_eval_curves.loc[idx[problem,
                                              exp_id, :, ], :].droplevel(
                                                  level=[0, 1, 3])
            plot_df.T.plot(ax=ax)
            plt.xlabel('iteration')
            plt.ylabel('evaluations')
            plt.yscale('log')
            plt.suptitle('Evaluations on ' + problem + ' by Iteration')
            plt.legend(loc='lower right')
            plt.savefig('./plots/' + STEM + '_eval_curves_' +
                        file_join([problem, exp_id]) + '.png')
            plt.clf()
    colors = ['C0', 'C1', 'C2', 'C3']

    #print('curves')
    #print(curves_df.head())
    #print('evals')
    #print(eval_curves_df.head())

    for problem in problems:
        for exp_id, experiment in enumerate(prob_dict['experiments']):
            fig, ax = plt.subplots(figsize=(6, 4))
            plt.subplots_adjust(bottom=.15)
            #for i, algo in enumerate(algos): # change
            for i, (opt_name, optimizer) in enumerate(optimizers.items()):
                color = colors[i]
                f_kwargs_id = func_kwargs_per_problem[problem][opt_name]
                for run in range(runs_per_problem[problem]):
                    fitness = curves_df.loc[idx[problem, exp_id, opt_name,
                                                f_kwargs_id, run], :]
                    #print('fitness')
                    #print(fitness)
                    evals = eval_curves_df.loc[idx[problem, exp_id, opt_name,
                                                   f_kwargs_id, run], :]
                    #print('evals')
                    #print(evals)
                    #plt.plot(evals, fitness)
                    #plt.show()
                    if run == 0:
                        ax.plot(evals,
                                fitness,
                                color=color,
                                linestyle=':',
                                label=opt_name)
                    else:
                        ax.plot(evals,
                                fitness,
                                color=color,
                                linestyle=':',
                                label='_nolegend_')
                    ax.set_xlabel('evaluations')
                    ax.set_ylabel('fitness')
                    ax.set_xscale('log')
                    ax.set_title('Fitness by Evaluation for ' + problem)
            ax.legend()
            plt.savefig('./plots/' + STEM + '_fitness_by_eval_' +
                        file_join([problem, exp_id]) + '.png')
            plt.clf()

    times_df = pd.DataFrame.from_dict(
        {(i, j, k, l, m): times[i][j][k][l][m]
         for i in times.keys() for j in times[i].keys()
         for k in times[i][j].keys() for l in times[i][j][k].keys()
         for m in times[i][j][k][l].keys()},
        orient='index')
    times_df.columns = ['time']
    times_df.index = pd.MultiIndex.from_tuples(times_df.index,
                                               names=('problem', 'experiment',
                                                      'algo', 'params', 'run'))
    iters_df = pd.DataFrame.from_dict(
        {(i, j, k, l, m): iters[i][j][k][l][m]
         for i in iters.keys() for j in iters[i].keys()
         for k in iters[i][j].keys() for l in iters[i][j][k].keys()
         for m in iters[i][j][k][l].keys()},
        orient='index')
    iters_df.columns = ['iterations']
    iters_df.index = pd.MultiIndex.from_tuples(iters_df.index,
                                               names=('problem', 'experiment',
                                                      'algo', 'params', 'run'))
    combined = pd.merge(left=times_df,
                        right=iters_df,
                        how='left',
                        left_index=True,
                        right_index=True)

    #combined.to_pickle('./output/combined.pkl')
    avgtimes = {}
    for problem in problems:
        avgtimes[problem] = {}
        for exp_id, experiment in enumerate(prob_dict['experiments']):
            avgtimes[problem][exp_id] = {}
            fig, ax = plt.subplots(figsize=(6, 4))
            plt.subplots_adjust(bottom=.15)
            #fig, ax = plt.subplots()
            plot_df = combined.loc[idx[(problem, exp_id)], :]
            #print(plot_df)
            for opt_name, optimizer in optimizers.items():
                df = plot_df.loc[idx[opt_name], :]
                avgtime = df['time'].sum() / df['iterations'].sum()
                avgtimes[problem][exp_id][opt_name] = avgtime

                ax.scatter(df['iterations'],
                           df['time'],
                           label=f'{opt_name}: {avgtime:.2e} s/iter')
                ax.set_yscale('log')
            plt.xlabel('iterations')
            plt.ylabel('completion time (seconds)')
            plt.suptitle('Time vs Iterations for each algorithm on ' + problem)
            plt.legend()
            plt.savefig('./plots/' + STEM + '_time_iterations_' +
                        file_join([problem, exp_id]) + '.png')

    combined = pd.merge(left=times_df,
                        right=best_fitness_df,
                        how='left',
                        left_index=True,
                        right_index=True)

    #combined.to_pickle('./output/combined.pkl')
    for problem in problems:
        for exp_id, experiment in enumerate(prob_dict['experiments']):
            fig, ax = plt.subplots(figsize=(6, 4))
            plt.subplots_adjust(bottom=.15)
            #fig, ax = plt.subplots()
            plot_df = combined.loc[idx[(problem, exp_id)], :]
            #print(plot_df)
            for opt_name, optimizer in optimizers.items():
                avgtime = avgtimes[problem][exp_id][opt_name]
                df = plot_df.loc[idx[opt_name], :]
                ax.scatter(df['time'],
                           df['fitness'],
                           label=f'{opt_name}: {avgtime:.2e} s/iter')
                ax.set_xscale('log')
            plt.xlabel('time (seconds)')
            plt.ylabel('Fitness')
            plt.suptitle('Fitness vs Time for each algorithm on ' + problem)
            plt.legend()
            plt.savefig('./plots/' + STEM + '_fitness_time_' +
                        file_join([problem, exp_id]) + '.png')

    #best_state_df.to_csv('./results/' + lfname + '_best_states.csv')
    best_fitness_df.to_csv('./results/' + lfname + '_best_fitness.csv')
    curves_df.to_csv('./results/' + lfname + '_curves.csv')
    eval_curves_df.to_csv('./results/' + lfname + '_eval_curves.csv')
    times_df.to_csv('./results/' + lfname + '_times.csv')
    iters_df.to_csv('./results/' + lfname + '_iterations.csv')

    #logging.info(str(best_state_collection))
    logging.info(str(best_fitness_collection))
    logging.info(str(curves))
    logging.info(str(times))