def update(self): # Handle to storage for model parameters params = self._parameters print "Starting to update Logistic Regression!" # Make sure all my meta data is ready to go params.validateMeta() observation_vectors = [] truth_vectors = [] # Make sure my model data is ready to go self._model_data.validate() self._model_data.validateViews(self.getMetaData("db_views")) # Check my model data observation_vectors = self._model_data.getMetaData( "observation_vectors") truth_vectors = self._model_data.getMetaData("truth_vectors") params.setMetaData("db_views", []) # Houston we are go lr = LogisticRegression() lr.penalty = params.getMetaData("penalty") lr.dual = params.getMetaData("dual") lr.C = params.getMetaData("C") lr.fit_intercept = params.getMetaData("fit_intercept") lr.intercept_scaling = params.getMetaData("intercept_scaling") class_weight = params.getMetaData("class_weight") if (class_weight != None): lr.class_weight = class_weight lr.max_iter = params.getMetaData("max_iter") lr.random_state = params.getMetaData("random_state") lr.solver = params.getMetaData("solver") tol = params.getMetaData("tol") if (tol != None): lr.tol = tol lr.multi_class = params.getMetaData("multi_class") lr.verbose = params.getMetaData("verbose") # Evaluation mode loads several model artifacts from storage and sets them as inputs lr.fit(observation_vectors, truth_vectors) params.setBinaryData("lr_model", "application/pickle", pickle.dumps(lr)) self.finalize()
def find_best(method_key, feature_set, training_subjects): # Load up all the data data_dict = utilities.build_data_dictionary(feature_set) # Initialize holders training_set_features = np.array([]) training_set_labels = np.array([]) # Build vectors for training subjects for subject in training_subjects: score_features, full_features = utilities.get_features( subject, data_dict) if np.shape(training_set_features)[0] == 0: training_set_features = full_features training_set_labels = score_features else: training_set_features = np.vstack( (training_set_features, full_features)) training_set_labels = np.vstack( (training_set_labels, score_features)) # Convert raw scores from 0-5 to binary,or 0-2 training_set_labels = utilities.process_raw_scores(training_set_labels, classify_sleep.run_flag) if method_key == 'Logistic Regression': parameters = { 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'penalty': ['l1', 'l2'] } classifier = LogisticRegression() if method_key == 'KNeighbors': parameters = {'n_neighbors': [500, 1000, 2000]} classifier = KNeighborsClassifier() if method_key == 'MLP': parameters = { 'solver': ['lbfgs'], 'max_iter': [1000], 'alpha': 10.0**-np.arange(1, 4), 'hidden_layer_sizes': [(30, 30, 30)] } classifier = MLPClassifier() if method_key == 'Random Forest': max_depth = [int(x) for x in np.linspace(10, 110, num=2)] max_depth.append(None) max_depth = [10, 50, 100] min_samples_split = [10] min_samples_leaf = [32] parameters = { 'n_estimators': [50], 'max_features': [None], 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'bootstrap': [True] } classifier = RandomForestClassifier() class_weights = class_weight.compute_class_weight( 'balanced', np.unique(training_set_labels), training_set_labels) class_weight_dict = {0: class_weights[0], 1: class_weights[1]} if len(class_weights) > 2: class_weight_dict = { 0: class_weights[0], 1: class_weights[1], 2: class_weights[2] } classifier.class_weight = class_weight_dict if classify_sleep.run_flag == utilities.RUN_REM: scoring = 'neg_log_loss' else: scoring = 'roc_auc' clf = GridSearchCV(classifier, parameters, scoring=scoring) clf.fit(training_set_features, training_set_labels) if verbose: print('Best parameters for set:') print(clf.best_params_) print('Score on training data: ' + str(clf.score(training_set_features, training_set_labels))) save_name = 'parameters/' + method_key + utilities.string_from_features( feature_set) + '.npy' np.save(save_name, clf.best_params_) return clf.best_params_