def setUp(self): self.model = SGD(n=2**10, a=0.1, l1=1, l2=1, interaction=True) self.sparse_file = '/tmp/dummy.sps' """Create dummpy sparse files.""" with open(self.sparse_file, 'w') as f: f.write(DUMMY_SPARSE_STR)
class TestSGD(unittest.TestCase): def setUp(self): self.model = SGD(n=2**10, a=0.1, l1=1, l2=1, interaction=True) self.sparse_file = '/tmp/dummy.sps' """Create dummpy sparse files.""" with open(self.sparse_file, 'w') as f: f.write(DUMMY_SPARSE_STR) def tearDown(self): # If a dummy file exists, remove it. if os.path.isfile(self.sparse_file): os.remove(self.sparse_file) def test_read_sparse(self): len_xs = [] ys = [] for x, y in self.model.read_sparse(self.sparse_file): # check hash collision for feature index self.assertEqual(len(set(x)), len(x)) ys.append(y) len_xs.append(len(x)) # check if target values are correct self.assertEqual(ys, DUMMY_Y) # check if the number of feature index are correct self.assertEqual(len_xs, DUMMY_LEN_X)
X = train[train_indices] y = label[train_indices] X_test = train[test_indices] X = sparse.csr_matrix(X) X_test = sparse.csr_matrix(X_test) #clf = RandomForestClassifier(n_estimators=500,n_jobs=-1,verbose = 1) #clf = KNeighborsClassifier(n_neighbors=15, weights='distance', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None) #clf = GaussianNB() #clf = OneVsRestClassifier(SVC(kernel='linear'),n_jobs = 2) #clf = MultinomialNB() #clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=30),n_estimators=600,learning_rate=1.5,algorithm="SAMME.R") #clf = VBGMM(n_components=8, covariance_type='diag', alpha=1.0, random_state=None, thresh=None, tol=0.001, verbose=1, min_covar=None, n_iter=500, params='wmc', init_params='wmc') print 'clf fit' clf = SGD(a=.01, # learning rate l1=1e-6, # L1 regularization parameter l2=1e-6, # L2 regularization parameter n=983, # number of hashed features epoch=10, # number of epochs interaction=True) # use feature interaction or not clf.fit(X,y) print 'Classifier Trained' #Convert the predicted array ''' Y_prob = clf.predict_proba(X_test) Y_pred = [] for i in range(len(Y_prob)): Y_pred.append([]) for j in range(len(Y_prob[i])): if len(Y_prob[i][j]) == 2: Y_pred[i].append(Y_prob[i][j][1]) #positive class prob else:
y = label[train_indices] X_test = train[test_indices] X = sparse.csr_matrix(X) X_test = sparse.csr_matrix(X_test) #clf = RandomForestClassifier(n_estimators=500,n_jobs=-1,verbose = 1) #clf = KNeighborsClassifier(n_neighbors=15, weights='distance', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None) #clf = GaussianNB() #clf = OneVsRestClassifier(SVC(kernel='linear'),n_jobs = 2) #clf = MultinomialNB() #clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=30),n_estimators=600,learning_rate=1.5,algorithm="SAMME.R") #clf = VBGMM(n_components=8, covariance_type='diag', alpha=1.0, random_state=None, thresh=None, tol=0.001, verbose=1, min_covar=None, n_iter=500, params='wmc', init_params='wmc') print 'clf fit' clf = SGD( a=.01, # learning rate l1=1e-6, # L1 regularization parameter l2=1e-6, # L2 regularization parameter n=983, # number of hashed features epoch=10, # number of epochs interaction=True) # use feature interaction or not clf.fit(X, y) print 'Classifier Trained' #Convert the predicted array ''' Y_prob = clf.predict_proba(X_test) Y_pred = [] for i in range(len(Y_prob)): Y_pred.append([]) for j in range(len(Y_prob[i])): if len(Y_prob[i][j]) == 2: Y_pred[i].append(Y_prob[i][j][1]) #positive class prob