#model with dimensionality reduction through Isometric Mapping (not for regression)
#----------------------------------------------------------------------------------
iso = manifold.Isomap()
best = CV(iso, x, y)
benchmark.append(best)
benchmark_names.append('Isomap')
print('Isomap \nNumber of features %d \nError %.3f \nAccuracy %.3f\n' % best)

#-------------------------------------------------------
#model with feature selection based on genetic algorithm
#-------------------------------------------------------
gen = Genetic(features=x.columns.size,
              parents=5,
              children=20,
              mutation_scale=0.05)
gen.fit(x, y, 20, modelCV)
benchmark.append(
    (np.count_nonzero(gen.tab[0]), gen.best[-1][0], gen.best[-1][1]))
benchmark_names.append('Genetic algorithm')
print(
    'Genetic algorithm \nNumber of features %d \nError %.3f \nAccuracy %.3f\n'
    % (np.count_nonzero(gen.tab[0]), gen.best[-1][0], gen.best[-1][1]))

#ploting results normalized to result without feature selection or dimensionality reduction
benchmark = np.array(benchmark)
benchmark /= benchmark[0]
benchmark = pd.DataFrame(benchmark,
                         columns=['number of features', 'loss', 'accuracy'])
benchmark['algorithm'] = benchmark_names
benchmark = benchmark.set_index('algorithm')
Beispiel #2
0
columns.append('seed_pseudorapidity')
data[1] = np.log(data[1])
data[2] = np.log(data[2])
data.columns = columns

sc = preprocessing.StandardScaler()
data = pd.DataFrame(sc.fit_transform(data))

x = pd.DataFrame(np.array(data[:]))
y = pd.DataFrame(np.array(data_org)[:,0])

########################################################
#model with feature selection based on genetic algorithm
########################################################
gen = Genetic( features = x.columns.size, parents = 5, children = 30, mutation_scale = 0.05 )
gen.fit(x, y, 10, modelCV)
print('\n\nGenetic algorithm\nNumber of features %d \nError %.3f \nAccuracy %.3f\n' % (np.count_nonzero(gen.tab[0]), gen.best[-1][0], gen.best[-1][1]))

print('\n============================\n')
print('Set of features from last generation')
print(gen.tab)
print('\n-----------\n')
print('Results of features from last generation (loss)')
print(gen.results)

#ploting results of dimensionality reduction
plt.plot(gen.best.T[0], 'b-')
plt.plot(gen.mean_.T[0], 'r-')
plt.legend(['best', 'mean'])
plt.title('Generic algorithm')
plt.xlabel('generation')