def main(args):
    setup = experiment_setups.parse(args.setup)
    # use_base_dir = False
    dirname = fileutil.run_dir(setup.dest_dir, setup.name,
                               setup.max_quantifier_length, setup.model_size,
                               setup.pareto_name)
    file_util = FileUtil(dirname)

    expressions = language_loader.load_all_evaluated_expressions(file_util)
    languages_0 = language_generator.generate_sampled(
        expressions, args.lang_size, int(args.sample_size / args.lang_size))
    universe = generator.generate_simplified_models(setup.model_size)

    measure_complexity = SumComplexityMeasurer(args.lang_size, 1)
    measure_informativeness = SimMaxInformativenessMeasurer(universe)
    pool = ProcessPool(nodes=setup.processes)
    languages = languages_0  #lanuages will be iteratively updated in subsequent loop

    for gen in range(args.generations):
        print('GENERATION {0}'.format(gen))
        print('measuring')
        complexity = pool.map(measure_complexity, languages)
        informativeness = pool.map(measure_informativeness, languages)

        measurements = [(1 - inf, comp)
                        for inf, comp in zip(informativeness, complexity)]

        print('calculating dominating')
        dominating_indices = pygmo.non_dominated_front_2d(measurements)
        dominating_languages = [languages[i] for i in dominating_indices]

        print('mutating')
        languages = sample_mutated(dominating_languages, args.sample_size,
                                   expressions)

    language_indices = [[e.index for e in lang]
                        for lang in dominating_languages]
    dominating_complexity = [complexity[i] for i in dominating_indices]
    dominating_informativeness = [
        informativeness[i] for i in dominating_indices
    ]

    file_util.dump_dill(dominating_complexity,
                        'complexity_wordcomplexity.dill')
    file_util.dump_dill(dominating_informativeness,
                        'informativeness_simmax.dill')
    file_util.dump_dill(language_indices, 'language_indices.dill')
    file_util.save_stringlist([list(map(str, lang)) for lang in languages],
                              'languages.txt')

    print("generate_evolutionary.py finished.")
def plot_from_csv(filepath, title, perf_index, comp_index, add_ndf):
    '''
    Read data from csv file and redirect fitness data to plot_front function.
    '''

    fitness = []
    ndf = []

    with open(filepath) as csv_file:
        csv_data = csv.reader(csv_file, delimiter=',')
        for row in csv_data:
            fitness.append([float(row[perf_index]), float(row[comp_index])])

    if add_ndf: ndf = pyg.non_dominated_front_2d(fitness)

    plot_front(title, fitness, ndf)
 def write_ndf_csv(self, name):
     '''Write a csv file that contains the non dominated vectors of the optimisation'''
     fitness_keys = [ key for key in self.fitness_dict.keys() ]
     fitness_values = [ val for val in self.fitness_dict.values() ]
     _, _, dc, ndr  = pyg.fast_non_dominated_sorting(fitness_values)
     ndf = pyg.non_dominated_front_2d(fitness_values)
     
     logger.info(name + "\nNon dominated vectors: "+str(len(ndf)) + "\nDomination count: " + str(dc) +"\nNon domination ranks: " + str(ndr))
     pl.plot_front(name +" all fits", fitness_values, ndf)
     
     # Save ndf results to file
     with open(cfg.RESULTS_PATH + cfg.timestamp + '/NDF-' + name + '.csv', mode='w') as data_file:
         data_writer = csv.writer(data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
         for i in ndf:
             data = np.concatenate((fitness_keys[i], fitness_values[i]), axis=None)
             data = np.concatenate((data, self.complete_results[fitness_keys[i]]), axis=None)
             data_writer.writerow(data)
pareto_data = pd.DataFrame({
    'complexity': evo_comp,
    'comm_cost': [1 - x for x in evo_inf]
})

main_data = pd.read_csv(
    '../../../results/Final_length=12_size=10/tables/pandas_27_may_natural_degrees.csv'
)

comm_cost = list(pareto_data['comm_cost'].values) + list(
    main_data['comm_cost'].values)
complexity = list(pareto_data['complexity'].values) + list(
    main_data['complexity'].values)

dominating_indices = pygmo.non_dominated_front_2d(
    list(zip(complexity, comm_cost)))
dominating_complexity = [complexity[i] for i in dominating_indices]
dominating_comm_cost = [comm_cost[i] for i in dominating_indices]
values = list(zip(dominating_comm_cost, dominating_complexity))

values.sort(key=lambda val: -val[1])
values.sort(key=lambda val: val[0])
values = [np.array(value) for value in values]

x = []
y = []
interval = .001

for (left, right) in zip(values[:-1], values[1:]):
    diff = right - left
    if norm(diff) == 0:
Exemple #5
0
            item.reset_index(inplace=True, drop=True)
            languages = pd.concat([languages, item])
            languages.reset_index(inplace=True, drop=True)

    # Run the evolutionary algorithm for 100 generations, each with 2000 languages
    num_of_gen = 100
    sample_size = 2000
    max_mut = 3

    for i in range(1, num_of_gen + 1):
        temp = copy(languages)
        temp = prep(temp)
        tempcc = costcomplexity(temp)
        tempcc['new_col'] = list(
            zip(tempcc.costoflanguages, tempcc.complexityoflanguages))
        dominating_indices = pygmo.non_dominated_front_2d(tempcc['new_col'])
        dominating_languages_list = [
            tempcc.LANG[k] for k in dominating_indices
        ]  #this was bound by upper i somehow!!!
        dominating_languages_df = languages[languages['LANG'].isin(
            dominating_languages_list)]
        languages = sample_mutated(dominating_languages_df, sample_size, i,
                                   max_mut)

    languages = prep(languages)
    costcomlang = costcomplexity(languages)
    costcomlang.to_csv(r'' + Folder + 'finalgencostcom.csv', index=False)

    # Find the dominant languages from the final generation + languages of Experiment1
    allfinal = pd.read_csv(
        Folder +
Exemple #6
0
        new_l.append([l1[i],l2[i]])
    return new_l

def create_configs(keys):
    configs=[]
    l1 = list(itertools.permutations( keys,2))
    for each in l1:
        for key in keys:
            if key not in each:
                if [each[0],key,each[1]] not in configs:
                    configs.append([key,each[0],each[1]])
    return configs


ndf, dl, dc, ndr = pg.fast_non_dominated_sorting(points = [[0,1],[-1,3],[2.3,-0.2],[1.1,-0.12],[1.1, 2.12],[-70,-100]])
ndf = pg.non_dominated_front_2d(points = [[0,1],[-1,3],[2.3,-0.2],[1.1,-0.12],[1.1, 2.12],[-70,-100]])

print(ndf)

df = pd.read_csv('output/data64.csv', sep = "\t")
datasets = list(set(df["dataset"]))

#eval_labels={"silhouette_score":1,"calinski_harabasz_score":1,"davies_bouldin_score":-1,"SSE":-1,"nSSE":-1}
#eval_labels={"silhouette_score":1,"calinski_harabasz_score":1,"davies_bouldin_score":-1}
#eval_labels={"Baker_Hubert_Gamma":-1,"Banfeld_Raferty":-1,"Davies_Bouldin":-1,"Dunns_index":1,"McClain_Rao":-1,"PBM_index":1,"Ratkowsky_Lance":1,"Ray_Turi":-1,"Scott_Symons":-1,"Wemmert_Gancarski":1,"Xie_Beni":-1,"c_index":-1,"g_plus_index":-1,"i_index":1,"modified_hubert_t":1,"point_biserial":1,"s_dbw":-1,"silhouette":1,"tau_index":1,"IIndex":1,"SDBW":-1,"calinski_harabasz_score":1}
eval_labels={"Banfeld_Raferty":-1,"Davies_Bouldin":-1,"Dunns_index":1,"McClain_Rao":-1,"PBM_index":1,"Ratkowsky_Lance":1,"Ray_Turi":-1,"Scott_Symons":-1,"Xie_Beni":-1,"c_index":-1,"i_index":1,"modified_hubert_t":1,"point_biserial":1,"s_dbw":-1,"silhouette":1,"IIndex":1,"SDBW":-1,"calinski_harabasz_score":1}

configs = create_configs(list(eval_labels.keys()))
print(len(configs))
print(len(list(itertools.permutations( list(eval_labels.keys()),3))))
infos=[]
Exemple #7
0
"""
Pareto Front
"""
PF_total = []
for pf in range(len(Sigma_pred.T)):
    r = Sigma_pred[:, ][:, pf]
    un = Sigma_unpop[:, ][:, pf]

    PF = []
    points = []
    for ii in range(len(r)):
        points.append([-r[ii], -un[ii]])
    if len(np.unique(points)) == 2:
        PF.append(np.array([0]))
    else:
        PF.append(pg.non_dominated_front_2d(points))
    PF_total.append([PF])

# plot PFs for user 0
pf = 0
r = Sigma_pred[:, ][:, pf]
un = Sigma_unpop[:, ][:, pf]
print("PFs for user {} is \n{}: ".format(pf, PF_total[pf][0][0]))

n = list(np.arange(1, NP + 1))
fig, ax = plt.subplots()
ax.scatter(r, un)
for i, txt in enumerate(n):
    ax.annotate(txt, (r[i], un[i]))

plt.scatter(r[PF_total[pf][0][0]],
Exemple #8
0
import pygmo
import analysisutil

analysisutil.add_argument('complexity_strategy')
analysisutil.add_argument('informativeness_strategy')

(args, setup, file_util) = analysisutil.init()

languages = file_util.load_dill('languages.dill')
informativeness = file_util.load_dill('informativeness_{0}.dill'.format(
    args.informativeness_strategy))
complexity = file_util.load_dill('complexity_{0}.dill'.format(
    args.complexity_strategy))

measurements = [(1 - inf, comp)
                for inf, comp in zip(informativeness, complexity)]

dominating_indices = pygmo.non_dominated_front_2d(measurements)

dominating_languages = [(languages[i], complexity[i], informativeness[i])
                        for i in dominating_indices]

filename = '{0}/dominating_languages_{1}_{2}.txt'.format(
    file_util.folderName, args.complexity_strategy,
    args.informativeness_strategy)

with open(filename, 'w') as f:
    for lang, comp, inf in dominating_languages:
        f.write("{0}\nComplexity      : {1}\nInformativeness : {2}\n".format(
            list(map(str, lang)), comp, inf))
def NSGA2_pygmo(model, fevals, lb, ub, cf=None):
    """Finds the estimated Pareto front of a GPy model using NSGA2 [1]_.

    Parameters
    ----------
    model : GPy.models.gp_regression.GPRegression
        GPy regression model on which to find the Pareto front of its mean
        prediction and standard deviation.
    fevals : int
        Maximum number of times to evaluate a location using the model.
    lb : (D, ) numpy.ndarray
        Lower bound box constraint on D
    ub : (D, ) numpy.ndarray
        Upper bound box constraint on D
    cf : callable, optional
        Constraint function that returns True if it is called with a
        valid decision vector, else False.

    Returns
    -------
    X_front : (F, D) numpy.ndarray
        The F D-dimensional locations on the estimated Pareto front.
    musigma_front : (F, 2) numpy.ndarray
        The corresponding mean response and standard deviation of the locations
        on the front such that a point X_front[i, :] has a mean prediction
        musigma_front[i, 0] and standard deviation musigma_front[i, 1].

    Notes
    -----
    NSGA2 [1]_ discards locations on the pareto front if the size of the front
    is greater than that of the population size. We counteract this by storing
    every location and its corresponding mean and standard deviation and
    calculate the Pareto front from this - thereby making the most of every
    GP model evaluation.

    References
    ----------
    .. [1] Kalyanmoy Deb, Amrit Pratap, Sameer Agarwal, and T. Meyarivan.
       A fast and elitist multiobjective genetic algorithm: NSGA-II.
       IEEE Transactions on Evolutionary Computation 6, 2 (2001), 182–197.
    """
    # internal class for the pygmo optimiser
    class GPY_WRAPPER(object):
        def __init__(self, model, lb, ub, cf, evals):
            # model = GPy model
            # lb = np.array of lower bounds on X
            # ub = np.array of upper bounds on X
            # cf = callable constraint function
            # evals = total evaluations to be carried out
            self.model = model
            self.lb = lb
            self.ub = ub
            self.nd = lb.size
            self.got_cf = cf is not None
            self.cf = cf
            self.i = 0  # evaluation pointer

        def get_bounds(self):
            return (self.lb, self.ub)

        def get_nobj(self):
            return 2

        def fitness(self, X):
            X = np.atleast_2d(X)
            f = model_fitness(X, self.model, self.cf, self.got_cf,
                              self.i, self.i + X.shape[0])
            self.i += X.shape[0]
            return f

    # fitness function for the optimiser
    def model_fitness(X, model, cf, got_cf, start_slice, end_slice):
        valid = True

        # if we select a location that violates the constraint,
        # ensure it cannot dominate anything by having its fitness values
        # maximally bad (i.e. set to infinity)
        if got_cf:
            if not cf(X):
                f = [np.inf, np.inf]
                valid = False

        if valid:
            mu, sigmaSQR = model.predict(X, full_cov=False)
            # note the negative sigmaSQR here as NSGA2 is minimising
            # so we want to minimise the negative variance
            f = [mu.flat[0], -np.sqrt(sigmaSQR).flat[0]]

        # store every point ever evaluated
        model_fitness.X[start_slice:end_slice, :] = X
        model_fitness.Y[start_slice:end_slice, :] = f

        return f

    # get the problem dimensionality
    D = lb.size

    # NSGA-II settings
    POPSIZE = D * 100
    N_GENS = int(np.ceil(fevals / POPSIZE))
    TOTAL_EVALUATIONS = POPSIZE * N_GENS

    nsga2 = pg.algorithm(pg.nsga2(gen=1,
                                  cr=0.8,       # cross-over probability.
                                  eta_c=20.0,   # distribution index (cr)
                                  m=1 / D,        # mutation rate
                                  eta_m=20.0))  # distribution index (m)

    # preallocate the storage of every location and fitness to be evaluated
    model_fitness.X = np.zeros((TOTAL_EVALUATIONS, D))
    model_fitness.Y = np.zeros((TOTAL_EVALUATIONS, 2))

    # problem instance
    gpy_problem = GPY_WRAPPER(model, lb, ub, cf, TOTAL_EVALUATIONS)
    problem = pg.problem(gpy_problem)

    # initialise the population
    population = pg.population(problem, size=POPSIZE)

    # evolve the population
    for i in range(N_GENS):
        population = nsga2.evolve(population)

    # indices non-dominated points across the entire NSGA-II run
    front_inds = pg.non_dominated_front_2d(model_fitness.Y)

    X_front = model_fitness.X[front_inds, :]
    musigma_front = model_fitness.Y[front_inds, :]

    # convert the standard deviations back to positive values; nsga2 minimises
    # the negative standard deviation (i.e. maximises the standard deviation)
    musigma_front[:, 1] *= -1

    return X_front, musigma_front