def update(self): # Handle to storage for model parameters params = self._parameters # Make sure all my meta data is ready to go params.validateMeta() observation_vectors = [] truth_vectors = [] # Make sure my model data is ready to go self._model_data.validate() self._model_data.validateViews(self.getMetaData("db_views")) # Check my model data observation_vectors = self._model_data.getMetaData( "observation_vectors") truth_vectors = self._model_data.getMetaData("truth_vectors") params.setMetaData("db_views", []) # Houston we are go mnb = MultinomialNB() mnb.alpha = self.getMetaData("alpha") mnb.fit_prior = self.getMetaData("fit_prior") class_prior = self.getMetaData("class_prior") if (class_prior != None): mnb.class_prior = class_prior mnb.fit(observation_vectors, truth_vectors) params.setBinaryData("mnb_model", "application/pickle", pickle.dumps(mnb)) self.finalize()
dual=True, tol=0.0001, C=0.5, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000) clf_nb = MultinomialNB(alpha=0.04, fit_prior=True, class_prior=None) #clf_svm = neighbors.KNeighborsClassifier(1) clf_svm.C = 0.3 clf_nb.alpha = 0.04 l_svm_score = [] l_nb_score = [] l_blend_score = [] alpha = 1. beta = 20. #sss = StratifiedShuffleSplit(Y, 5, test_size=0.2, random_state=0) sss = KFold(len(Y), n_folds=5, shuffle=True) kbest = SelectKBest(chi2, k=300000) for train_idx, val_idx in sss: x_train, y_train, x_val, y_val = X[train_idx], Y[train_idx], X[val_idx], Y[ val_idx] x_train = kbest.fit_transform(x_train, y_train)
print "Hstack fini" X = pipeline.fit_transform(df_train) X_test = pipeline.transform(df_test) # print X.shape # TRAINING clf_svm = svm.LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=0.5, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000) clf_nb = MultinomialNB(alpha=0.04, fit_prior=True, class_prior=None) #clf_svm = neighbors.KNeighborsClassifier(1) clf_svm.C = 0.3 clf_nb.alpha = 0.04 l_svm_score = [] l_nb_score = [] l_blend_score = [] alpha = 1. beta = 20. #sss = StratifiedShuffleSplit(Y, 5, test_size=0.2, random_state=0) sss = KFold(len(Y), n_folds=5, shuffle=True) kbest = SelectKBest(chi2, k=300000) for train_idx, val_idx in sss: x_train, y_train, x_val, y_val = X[train_idx], Y[train_idx], X[val_idx], Y[val_idx] x_train = kbest.fit_transform(x_train, y_train) x_val = kbest.transform(x_val)