def combined_symmetry_train_classifier( data=None, data_backup_filename='ShapeAndTextureScores', filename_to_save_model='ShapeAndTextureModel'): """Train a random forest classifier with data from the "shapeAndTextureScores.csv" (created by merging shape scores and texture scores) file following the expert diagnosis about symmetry (PH2 Dataset). # Arguments : data: As returned by the texture_symmetry_scores function (optional). backup_filename: Only if data is None, file to load data from. filename_to_save_model: String or None. # Outputs : clf: The fitted classifier. acc: The accuracy score of the classifier """ if data is None: data = pd.read_csv( f"{package_path()}/data/patchesDataSet/{data_backup_filename}.csv", index_col=False) features = list(data) del features[0] else: features = list(data) del features[0] # Delete labels too trainX = data[features][50:] trainy = data.Labels[50:] valX = data[features][:50] valy = data.Labels[:50] clf = RandomForestClassifier(n_estimators=10, max_leaf_nodes=3, random_state=2) clf.fit(trainX, trainy) preds = clf.predict(valX) acc = accuracy_score(valy, preds) if filename_to_save_model: save_model(clf, filename_to_save_model) return clf, acc
def classifierTrainer(maxLeafNodes, data=None, data_backup_file='patchesDataSet/Features', filename_to_save_model='PatchClassifierModel'): """Train a random forest classifier with data from the patchesDataSet. # Arguments : maxLeafNodes: Int or None. Grow trees with max_leaf_nodes in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes (scikit-learn RandomForestClassifier() documentation). # Outputs : clf: The fitted classifier. acc: The accuracy score of the classifier """ if data is None: data = pd.read_csv( f"{package_path()}/data/patchesDataSet/{data_backup_file}.csv", index_col=False) features = list(data) del features[0] else: features = list(data) del features[-1] # Remove `Result` colname. trainX = data[features][500:] trainy = data.Result[500:] valX = data[features][:500] valy = data.Result[:500] clf = RandomForestClassifier(max_leaf_nodes=maxLeafNodes, random_state=2) clf.fit(trainX, trainy) if filename_to_save_model: save_model(clf, filename_to_save_model) preds = clf.predict(valX) acc = accuracy_score(valy, preds) return clf, acc
def example(): """Usage example of the main functionalities within this file. """ dataExtractorForTraining(10, 199, 4) classifier, accScore = classifierTrainer(200) print(f'Accuracy score: {accScore}') save_model(classifier, "SimilarPatchClassifier")