Example #1
0
def print_pca_variance():
	# reading the data from sql table using the get_all_data method
	X_train, y_train, train_le, X_test, y_test, test_le = data_accessor_util.get_all_data_sets()
	classes = list(test_le.classes_)

	# Converting data to numpy
	(X_train, y_train, X_test, y_test) = data_accessor_util.convert_data_sets_to_numpy(X_train, y_train, X_test, y_test)

	n_features = X_test.shape[1]
	# this block need to be used once to create the plots
	pca = PCA(n_components=n_features)
	pca.fit(X_train)
	var = pca.explained_variance_ratio_
	cumulative_var = np.cumsum(var)

	# To enable creating figures on the server
	matplotlib.use('Agg')
	fig = plt.figure(1)
	plt.plot(var)
	plt.title('individual scree plot')
	plt.xlabel('principal components')
	plt.ylabel('proportion of variance explained')
	plt.savefig("proportion_variance.jpg")
	fig2 = plt.figure(2)
	plt.plot(cumulative_var)
	plt.title('commulative scree plot')
	plt.xlabel('principal components')
	plt.ylabel('commulative proportion of variance explained')    
	plt.savefig("proportion_Variance_com.jpg")
Example #2
0
def read_data_perform_pca(var_percentage = 0.95):
	# reading the data from sql table using the get_all_data method
	X_train, y_train, train_le, X_test, y_test, test_le = data_accessor_util.get_all_data_sets()
	classes = list(test_le.classes_)

	# Converting data to numpy
	(X_train, y_train, X_test, y_test) = data_accessor_util.convert_data_sets_to_numpy(X_train, y_train, X_test, y_test)
	n_features = X_test.shape[1]
	# this block need to be used once to create the plots
	pca = PCA(n_components=n_features)
	pca.fit(X_train)
	var = pca.explained_variance_ratio_
	cumulative_var = np.cumsum(var)

	# arg max returns index, add 1 b/c indices start at 0
	percentage_retained = var_percentage
	N_reduced = np.argmax(cumulative_var>percentage_retained)+1;

	pca = PCA(n_components=N_reduced)
	X_train = pca.fit_transform(X_train)
	X_test = pca.transform(X_test)
	return X_train, y_train, train_le, X_test, y_test, test_le
Example #3
0
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA

from sklearn.metrics import classification_report

# Internal
sys.path.append(os.path.realpath("%s/.." % os.path.dirname(__file__)))
from util import data_accessor_util

#-------------------------
# Globals
#-------------------------

# Get data
(train_X, train_Y, train_le, test_X, test_Y,
 test_le) = data_accessor_util.get_all_data_sets()

classes = list(test_le.classes_)
print test_le.inverse_transform([0, 1, 2, 3, 4, 5])

# Convert to numpy
(train_X, train_Y, test_X,
 test_Y) = data_accessor_util.convert_data_sets_to_numpy(
     train_X, train_Y, test_X, test_Y)

Cs = {"C": np.arange(10**-5, 10**-1, 0.005)}

# PCA
print "PCA"
pca = PCA(n_components=train_X.shape[1])
train_X = pca.fit_transform(train_X)
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.gaussian_process import GaussianProcessClassifier
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics.classification import accuracy_score, log_loss
from sklearn.gaussian_process.kernels import RBF

DEBUG = 1

# reading the data from sql table using the get_all_data method
X_train, y_train, train_le, X_test, y_test, test_le = data_accessor_util.get_all_data_sets()

# X = df.drop(['genre'], axis=1)
# y = df['genre']
# le = preprocessing.LabelEncoder()
# y = le.fit_transform(y)
X_train = X_train.drop('songID', axis=1)
X_test = X_test.drop('songID', axis=1)
h = .02  # step size in the mesh
# list of methods used, they correspond to the classifiers list
methods = ["Logistic Regression","Decision Tree", "Random Forest", 
         "Linear SVM", "RBF SVM", "Neural Net, MLP", 
         "Gaussian Process", "Gaussian Naive Bayes", "QDA","AdaBoost"]
# add these two methods as well
# Xgboost
# Gradient boosting