Ejemplo n.º 1
0
    def update(self):
        # Handle to storage for model parameters
        params = self._parameters

        # Make sure all my meta data is ready to go
        params.validateMeta()

        observation_vectors = []
        truth_vectors = []

        # Make sure my model data is ready to go
        self._model_data.validate()
        self._model_data.validateViews(self.getMetaData("db_views"))

        # Check my model data
        observation_vectors = self._model_data.getMetaData(
            "observation_vectors")

        truth_vectors = self._model_data.getMetaData("truth_vectors")

        params.setMetaData("db_views", [])

        # Houston we are go
        mnb = MultinomialNB()

        mnb.alpha = self.getMetaData("alpha")
        mnb.fit_prior = self.getMetaData("fit_prior")
        class_prior = self.getMetaData("class_prior")
        if (class_prior != None):
            mnb.class_prior = class_prior

        mnb.fit(observation_vectors, truth_vectors)
        params.setBinaryData("mnb_model", "application/pickle",
                             pickle.dumps(mnb))

        self.finalize()
Ejemplo n.º 2
0
                        dual=True,
                        tol=0.0001,
                        C=0.5,
                        multi_class='ovr',
                        fit_intercept=True,
                        intercept_scaling=1,
                        class_weight=None,
                        verbose=0,
                        random_state=None,
                        max_iter=1000)
clf_nb = MultinomialNB(alpha=0.04, fit_prior=True, class_prior=None)

#clf_svm = neighbors.KNeighborsClassifier(1)

clf_svm.C = 0.3
clf_nb.alpha = 0.04

l_svm_score = []
l_nb_score = []
l_blend_score = []

alpha = 1.
beta = 20.

#sss = StratifiedShuffleSplit(Y, 5, test_size=0.2, random_state=0)
sss = KFold(len(Y), n_folds=5, shuffle=True)
kbest = SelectKBest(chi2, k=300000)
for train_idx, val_idx in sss:
    x_train, y_train, x_val, y_val = X[train_idx], Y[train_idx], X[val_idx], Y[
        val_idx]
    x_train = kbest.fit_transform(x_train, y_train)
Ejemplo n.º 3
0
print "Hstack fini"

X = pipeline.fit_transform(df_train)

X_test = pipeline.transform(df_test)
# print X.shape

# TRAINING

clf_svm = svm.LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=0.5, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000)
clf_nb = MultinomialNB(alpha=0.04, fit_prior=True, class_prior=None)

#clf_svm = neighbors.KNeighborsClassifier(1)

clf_svm.C = 0.3
clf_nb.alpha = 0.04

l_svm_score = []
l_nb_score = []
l_blend_score = []

alpha = 1.
beta = 20.

#sss = StratifiedShuffleSplit(Y, 5, test_size=0.2, random_state=0)
sss = KFold(len(Y), n_folds=5, shuffle=True)
kbest = SelectKBest(chi2, k=300000)
for train_idx, val_idx in sss:
    x_train, y_train, x_val, y_val = X[train_idx], Y[train_idx], X[val_idx], Y[val_idx]
    x_train = kbest.fit_transform(x_train, y_train)
    x_val = kbest.transform(x_val)