def model_iterator(config): sklearn_models = config['sklearn_models'] sklearn_models = grid_generator.grid_from_classes(sklearn_models, size=config['grid_size']) percentiles = config['feature_percentiles'] all_models = list(product(sklearn_models, percentiles)) return all_models
def model_iterator(config): classes = [ 'sklearn.ensemble.RandomForestClassifier', 'sklearn.ensemble.AdaBoostClassifier' ] models = grid_generator.grid_from_classes(classes) return models
from dstools.config import main from dstools.lab import Experiment from dstools.lab.util import top_k from dstools.sklearn import grid_generator from dstools.sklearn.util import model_name from sklearn.datasets import load_iris from sklearn.metrics import precision_score from sklearn.cross_validation import train_test_split classes = ["sklearn.ensemble.RandomForestClassifier"] models = grid_generator.grid_from_classes(classes) iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.30) # create a new experiment ex = Experiment(main["logger"]) for m in models: # create a new record rec = ex.record() m.fit(X_train, y_train) preds = m.predict(X_test) rec["precision"] = precision_score(y_test, preds) rec["parameters"] = m.get_params() rec["model"] = model_name(m) # select top_k
def model_iterator(config): classes = ['sklearn.ensemble.RandomForestClassifier', 'sklearn.linear_model.LogisticRegression'] models = grid_generator.grid_from_classes(classes) return models
from dstools import ExperimentLogger from dstools.sklearn import grid_generator from experiment import Experiment import pandas as pd # create logger instance exp_logger = ExperimentLogger() train_data = pd.read_csv('data/train.csv') training_x = train_data.drop('survived', axis=1).values training_y = train_data.survived.values models = grid_generator.grid_from_classes(['sklearn.ensemble.RandomForestClassifier']) # ugly mode for model in models: model = model.fit(training_x, training_y) rec = exp_logger.new_record() rec.add(some_metric) rec.add(model_name) rec.add(some_other_metric) rec.add(feature_set) rec.add(model_parameters) # cool kids mode # using the decorator will log everything that ends with '_log' # note that using this mode restricts us to save records only from one function # what if I want to save one record but the data is splitted among several # functions?