Ejemplo n.º 1
0
    def setUp(self):
        self.model = SGD(n=2**10, a=0.1, l1=1, l2=1, interaction=True)
        self.sparse_file = '/tmp/dummy.sps'

        """Create dummpy sparse files."""
        with open(self.sparse_file, 'w') as f:
            f.write(DUMMY_SPARSE_STR)
Ejemplo n.º 2
0
class TestSGD(unittest.TestCase):
    def setUp(self):
        self.model = SGD(n=2**10, a=0.1, l1=1, l2=1, interaction=True)
        self.sparse_file = '/tmp/dummy.sps'
        """Create dummpy sparse files."""
        with open(self.sparse_file, 'w') as f:
            f.write(DUMMY_SPARSE_STR)

    def tearDown(self):
        # If a dummy file exists, remove it.
        if os.path.isfile(self.sparse_file):
            os.remove(self.sparse_file)

    def test_read_sparse(self):
        len_xs = []
        ys = []
        for x, y in self.model.read_sparse(self.sparse_file):
            # check hash collision for feature index
            self.assertEqual(len(set(x)), len(x))

            ys.append(y)
            len_xs.append(len(x))

        # check if target values are correct
        self.assertEqual(ys, DUMMY_Y)

        # check if the number of feature index are correct
        self.assertEqual(len_xs, DUMMY_LEN_X)
Ejemplo n.º 3
0
    def setUp(self):
        self.model = SGD(n=2**10, a=0.1, l1=1, l2=1, interaction=True)
        self.sparse_file = '/tmp/dummy.sps'

        """Create dummpy sparse files."""
        with open(self.sparse_file, 'w') as f:
            f.write(DUMMY_SPARSE_STR)
Ejemplo n.º 4
0
class TestSGD(unittest.TestCase):

    def setUp(self):
        self.model = SGD(n=2**10, a=0.1, l1=1, l2=1, interaction=True)
        self.sparse_file = '/tmp/dummy.sps'

        """Create dummpy sparse files."""
        with open(self.sparse_file, 'w') as f:
            f.write(DUMMY_SPARSE_STR)

    def tearDown(self):
        # If a dummy file exists, remove it.
        if os.path.isfile(self.sparse_file):
            os.remove(self.sparse_file)

    def test_read_sparse(self):
        len_xs = []
        ys = []
        for x, y in self.model.read_sparse(self.sparse_file):
            # check hash collision for feature index
            self.assertEqual(len(set(x)), len(x))

            ys.append(y)
            len_xs.append(len(x))
            
        # check if target values are correct
        self.assertEqual(ys, DUMMY_Y)

        # check if the number of feature index are correct
        self.assertEqual(len_xs, DUMMY_LEN_X)
Ejemplo n.º 5
0
        X = train[train_indices]
        y = label[train_indices]
        X_test = train[test_indices]
        X = sparse.csr_matrix(X)
        X_test = sparse.csr_matrix(X_test)
        #clf = RandomForestClassifier(n_estimators=500,n_jobs=-1,verbose = 1)
        #clf = KNeighborsClassifier(n_neighbors=15, weights='distance', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None)
        #clf = GaussianNB()
        #clf = OneVsRestClassifier(SVC(kernel='linear'),n_jobs = 2)
        #clf = MultinomialNB()
        #clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=30),n_estimators=600,learning_rate=1.5,algorithm="SAMME.R")
        #clf = VBGMM(n_components=8, covariance_type='diag', alpha=1.0, random_state=None, thresh=None, tol=0.001, verbose=1, min_covar=None, n_iter=500, params='wmc', init_params='wmc')
        print 'clf fit'
        clf = SGD(a=.01,                # learning rate
              l1=1e-6,              # L1 regularization parameter
              l2=1e-6,              # L2 regularization parameter
              n=983,              # number of hashed features
              epoch=10,             # number of epochs
              interaction=True)     # use feature interaction or not

        clf.fit(X,y)
        print 'Classifier Trained'
        #Convert the predicted array
        '''
        Y_prob = clf.predict_proba(X_test)
        Y_pred = []
        for i in range(len(Y_prob)):
                Y_pred.append([])
                for j in range(len(Y_prob[i])):
                        if len(Y_prob[i][j]) == 2:
                                Y_pred[i].append(Y_prob[i][j][1]) #positive class prob
                        else:
Ejemplo n.º 6
0
        y = label[train_indices]
        X_test = train[test_indices]
        X = sparse.csr_matrix(X)
        X_test = sparse.csr_matrix(X_test)
        #clf = RandomForestClassifier(n_estimators=500,n_jobs=-1,verbose = 1)
        #clf = KNeighborsClassifier(n_neighbors=15, weights='distance', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None)
        #clf = GaussianNB()
        #clf = OneVsRestClassifier(SVC(kernel='linear'),n_jobs = 2)
        #clf = MultinomialNB()
        #clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=30),n_estimators=600,learning_rate=1.5,algorithm="SAMME.R")
        #clf = VBGMM(n_components=8, covariance_type='diag', alpha=1.0, random_state=None, thresh=None, tol=0.001, verbose=1, min_covar=None, n_iter=500, params='wmc', init_params='wmc')
        print 'clf fit'
        clf = SGD(
            a=.01,  # learning rate
            l1=1e-6,  # L1 regularization parameter
            l2=1e-6,  # L2 regularization parameter
            n=983,  # number of hashed features
            epoch=10,  # number of epochs
            interaction=True)  # use feature interaction or not

        clf.fit(X, y)
        print 'Classifier Trained'
        #Convert the predicted array
        '''
        Y_prob = clf.predict_proba(X_test)
        Y_pred = []
        for i in range(len(Y_prob)):
                Y_pred.append([])
                for j in range(len(Y_prob[i])):
                        if len(Y_prob[i][j]) == 2:
                                Y_pred[i].append(Y_prob[i][j][1]) #positive class prob