Exemple #1
0
def separate_label_and_data():
	util.print_debug_msg('separating label and data')
	y = list(train['ACTION'])
	# creating a temp object of train, so as to preserve its originality.
	# i an not sure if we really need to do this.
	X = train
	del X['ACTION']
	return X,y
Exemple #2
0
def group_data(data, degree=3, hash=hash):
	util.print_debug_msg('Grouping Data')
	new_data = []
	m,n = data.shape
	#print m,n
	for indicies in combinations(range(n), degree):
		new_data.append([hash(tuple(v)) for v in data[:,indicies]])
	return array(new_data).T
def grid_search(X_train, y_train):
	util.print_debug_msg('Starting grid search')
	parameters = {'C':[0.5,1,1.5,2,2.5,3,3.5,4],'penalty':['l1','l2']}
	clf = GridSearchCV(lg, parameters, scoring="roc_auc", n_jobs=2)
	util.print_debug_msg('Now fitting in grid search')
	clf.fit(X_train, y_train)
	print 'cv_scores: ', clf.cv_scores_
	print 'best_estimator: ', clf.best_estimator_
	print 'best_params: ', clf.best_params_
def normalize_features(X,y=None):
	util.print_debug_msg('Selected Features. Now One Hot Encoding')
	enc = OneHotEncoder()
	if (y!=None):
		enc.fit(np.vstack((X,y)))
		X = enc.transform(X)
		y = enc.transform(y)
		return X,y
	return enc.fit_transform(X)
Exemple #5
0
def pre_process_data(X):
	util.print_debug_msg('Pre processing Data')
	x1 = array((X.ix[:]))
	x2 = group_data(x1, degree=2)
	x3 = group_data(x1, degree=3)
	#print x1.shape
	#print x2.shape
	#print x3.shape
	x_all = np.hstack((x1,x2,x3))
	enc = OneHotEncoder()
	enc.fit(x_all)
	x_transformed = enc.transform(x_all).toarray()
	return x_transformed
def pre_process_data(X):
	util.print_debug_msg('Pre processing Data')
	x1 = array((X.ix[:]))
	x2 = group_data(x1, degree=2)
	x3 = group_data(x1, degree=3)
	#print x1.shape
	#print x2.shape
	#print x3.shape
	x_all = np.hstack((x1,x2,x3))
	
	#features = [0,8,9,10,36,37,38,41,41,43,47,53,60,61,63,64,67,69,71,75,85]
	#features = [0, 8, 9, 10, 19, 34, 36, 37, 38, 41, 42, 43, 47, 53, 55, 60, 61, 63, 64, 67, 69, 71, 75, 81, 82, 85]
	x_selected = x_all#[:,features]
	
	'''util.print_debug_msg('Selected Features. Now One Hot Encoding')
	enc = OneHotEncoder()
	#enc.fit(x_selected)
	#x_transformed = enc.transform(x_all)
	#enc = MinMaxScaler(feature_range=(0,1), copy=False)
	x_transformed = enc.fit_transform(X_selected)'''
	return x_selected
Exemple #7
0
def train_predict(X_train, y_train, X_test):
	util.print_debug_msg('Training SVM Classifier')
	model = svm.SVR(kernel='linear', verbose=True)
	model.fit(X_train, y_train)
	util.print_debug_msg('Predicting SVM Classifier')
	return model.predict(X_test)
Exemple #8
0
def train_predict(X_train, y_train, X_test):
	util.print_debug_msg('Training NB Classifier')
	nb = BernoulliNB()
	nb.fit(X_train, y_train)
	util.print_debug_msg('Predicting NB Classifier')
	return nb.predict(X_test)
Exemple #9
0
def train_predict(X_train, y_train, X_test):
	util.print_debug_msg('Training kNN Classifier')
	knn = KNeighborsClassifier(n_neighbors=10)
	knn.fit(X_train, y_train)
	util.print_debug_msg('Predicting kNN Classifier')
	return knn.predict_proba(X_test)[:, 1]
def train_predict(X_train, y_train, X_test):
	util.print_debug_msg('Training LG Classifier')
	lg.fit(X_train, y_train)
	util.print_debug_msg('Predicting LG Classifier')
	return lg.predict_proba(X_test)[:, 1]