def feature_selection_extra_trees(self): # Load Data file = open("models.obj", 'r') models = pickle.load(file) samples = models[0] responses = models[1] # Using ExtraTreesClassifier forest = FeatureSelectionScikit(n_estimators=10, criterion="gini") forest.fit(samples=samples, response=responses) importances = forest.importance() std = np.std([tree.feature_importances_ for tree in forest.model.estimators_], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print("Feature ranking:") to_plot = [] to_indices = [] for f in range(50): to_plot.append(importances[indices[f]]) to_indices.append(indices[f]) print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]])) # Plot the feature importances of the forest plt.figure() plt.title("Feature importances") plt.bar(range(50), to_plot, color="b", yerr=std[to_indices], align="center") locs, labels = plt.xticks(range(50), indices[50:]) plt.setp(labels, rotation=90) plt.xlim([-1, 50]) plt.show()
def upper(self): model = FeatureSelectionScikit() rdmForestPre = RandomForest_scikit() rdmForest = RandomForest_scikit() file = open("models.obj", 'r') models = pickle.load(file) samples = models[0] responses = models[1] ''' pca = decomposition.PCA() X_digits = samples y_digits = responses pca.fit(X_digits) plt.figure(1, figsize=(4, 3)) plt.clf() plt.axes([.2, .2, .7, .7]) plt.plot(pca.explained_variance_, linewidth=2) plt.axis('tight') plt.xlabel('n_components') plt.ylabel('explained_variance_') plt.show() ''' # Scaled data #samplesScaled = preprocessing.scale(samples) samplesScaled = samples model.fit(samplesScaled, responses) variablesImportance = model.importance() mean = np.mean(variablesImportance) std = np.std(variablesImportance) fig1 = plt.figure(1, figsize=(4, 3)) ax1 = fig1.add_subplot(111) ax1.plot(variablesImportance, linewidth=2) basicPre = [] indices = [] minimo = min(variablesImportance) for i, value in enumerate(variablesImportance): if value > minimo: basicPre.append(value) indices.append(i) print('Escogi %d' % (len(basicPre))) fig2 = plt.figure(2, figsize=(4, 3)) ax2 = fig2.add_subplot(111) ax2.plot(basicPre, linewidth=2) newSample = [] for i, fila in enumerate(samplesScaled): newSample.append([val for is_good, val in izip(indices, fila) if is_good]) t0 = time() rdmForestPre.train(newSample, responses) a, confusionPre = rdmForestPre.test(newSample, responses, True) preTiempo = (time() - t0) print("With Preprocessing %0.3fs" % (preTiempo)) sumPre = 0 for idx, fila in enumerate(confusionPre): for jdx, entrada in enumerate(fila): if idx != jdx: sumPre += entrada t0 = time() rdmForest.train(samples, responses) a, confusion = rdmForest.test(samples, responses, True) Tiempo = time() - t0 print("Without Preprocessing %0.3fs" % (Tiempo)) print("Preprocessing/Without = %0.3fs" % (1.0 * preTiempo / Tiempo)) sum = 0 for idx, fila in enumerate(confusion): for jdx, entrada in enumerate(fila): if idx != jdx: sum += entrada print(str(sumPre), str(sum), float(1.0 * sumPre / sum)) plt.show()