"""lda plan for a random forest classifier model.""" from dbispipeline.evaluators import GridEvaluator import dbispipeline.result_handlers as result_handlers from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from nlp4musa2020.dataloaders import vectorizer from nlp4musa2020.dataloaders.alf200k import ALF200KLoader from nlp4musa2020.dataloaders.alf200k import genre_target_labels import nlp4musa2020.evaluators as evaluators dataloader = ALF200KLoader( path='data/processed/dataset-lfm-genres.pickle', load_feature_groups=[], text_vectorizers=vectorizer.lda(), target=genre_target_labels(), ) pipeline = Pipeline([ ('scaler', StandardScaler()), ('model', RandomForestClassifier(n_jobs=-1)), ]) evaluator = GridEvaluator( parameters={ 'model__n_estimators': [10, 100, 300], }, grid_parameters=evaluators.grid_parameters_genres(), )
from sklearn.preprocessing import StandardScaler from nlp4musa2020.dataloaders.vectorizer import lda, tfidf from nlp4musa2020.dataloaders.alf200k import ALF200KLoader, genre_target_labels import nlp4musa2020.evaluators as evaluators from sklearn.svm import LinearSVC from sklearn.multioutput import MultiOutputClassifier dataloader = ALF200KLoader(path='data/processed/dataset-lfm-genres.pickle', load_feature_groups=[ 'rhymes', 'statistical', 'statistical_time', 'explicitness', ], text_vectorizers=lda() + tfidf(), target=genre_target_labels()) pipeline = Pipeline([ ('scaler', StandardScaler()), ('model', MultiOutputClassifier(LinearSVC())), ]) evaluator = GridEvaluator( parameters={ 'model__estimator__C': [ 0.1, 0.5, 1.0, 2.0, 5.0,
"""Plan for a knn model, lda.""" from dbispipeline.evaluators import GridEvaluator import dbispipeline.result_handlers as result_handlers from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from nlp4musa2020.dataloaders.alf200k import ALF200KLoader from nlp4musa2020.dataloaders.vectorizer import lda import nlp4musa2020.evaluators as evaluators dataloader = ALF200KLoader(path='data/processed/dataset-lfm-genres.pickle', load_feature_groups=[], text_vectorizers=lda(), target=[ 'alternative', 'blues', 'country', 'dance', 'electronic', 'funk', 'hip hop', 'indie', 'jazz', 'metal', 'pop', 'punk', 'rap', 'rnb', 'rock', 'soul' ]) pipeline = Pipeline([ ('scaler', StandardScaler()), ('model', KNeighborsClassifier(n_jobs=-1, algorithm='ball_tree')), ]) evaluator = GridEvaluator( parameters={ 'model__n_neighbors': [3, 4, 5, 10], 'model__weights': ['distance'], 'model__p': [1, 2],
from nlp4musa2020.dataloaders import vectorizer from nlp4musa2020.dataloaders.alf200k import ALF200KLoader from nlp4musa2020.dataloaders.alf200k import genre_target_labels import nlp4musa2020.evaluators as evaluators dataloader = ALF200KLoader( path='data/processed/dataset-lfm-genres.pickle', load_feature_groups=[ 'rhymes', 'statistical', 'statistical_time', 'explicitness', 'audio', ], text_vectorizers=vectorizer.lda() + vectorizer.tfidf(), target=genre_target_labels(), ) pipeline = Pipeline([ ('scaler', StandardScaler()), ('model', ExtraTreesClassifier(n_jobs=-1)), ]) evaluator = GridEvaluator( parameters={ 'model__n_estimators': [10, 100, 300], }, grid_parameters=evaluators.grid_parameters_genres(), )