예제 #1
0
def trees(x_train, x_test, y_train, y_test):

    res = []

    m = tree.DecisionTreeRegressor()
    m.fit(x_train, y_train)

    predictions = m.predict(x_test)
    acc = mean_squared_error(y_test, predictions)

    modelPack['DecisionTreeRegressor'] = m

    res.append((acc, "DecisionTreeRegressor"))

    m = tree.ExtraTreeRegressor()
    m.fit(x_train, y_train)
    predictions = m.predict(x_test)
    acc = mean_squared_error(y_test, predictions)

    modelPack['ExtraTreeRegressor'] = m

    res.append((acc, "ExtraTreeRegressor"))

    print(res)

    return res
예제 #2
0
파일: algos.py 프로젝트: m-gal/gscreen
def get_xtr():
    """An extremely randomized tree regressor.

    * criterion: {“mse”, “friedman_mse”, “mae”}, default=”mse”
        The function to measure the quality of a split.
    * splitter: {“random”, “best”}, default=”random”
        The strategy used to choose the split at each node.
    * max_depth: int, default=None
        The maximum depth of the tree.
        If None, then nodes are expanded until all leaves are pure
        or until all leaves contain less than min_samples_split samples.
    * min_samples_split: int or float, default=2
        The minimum number of samples required to split an internal node.
    * min_samples_leaf: int or float, default=1
        The minimum number of samples required to be at a leaf node.
    * min_weight_fraction_leaf: float, default=0.0
        The minimum weighted fraction of the sum total of weights
        (of all the input samples) required to be at a leaf node.
        Samples have equal weight when sample_weight is not provided.
    * max_features: int, float, {“auto”, “sqrt”, “log2”} or None, default=”auto”
        The number of features to consider when looking for the best split.
    """
    return tree.ExtraTreeRegressor(
        criterion="mse",
        splitter="random",
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features=None,
        random_state=rnd_state,
    )
예제 #3
0
def test_sk_ExtraTreeRegressor():
    print("Testing sklearn, ExtraTreeRegressor...")
    mod = tree.ExtraTreeRegressor()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "ExtraTreeRegressor test"}
    fv = X[0, :]
    upload(mod, fv, docs)
예제 #4
0
def test_regression_toy():
    """Check regression on a toy dataset."""
    # Decision trees
    clf = tree.DecisionTreeRegressor()
    clf.fit(X, y)
    assert_almost_equal(clf.predict(T), true_result)

    clf = tree.DecisionTreeRegressor(max_features=1, random_state=1)
    clf.fit(X, y)
    assert_almost_equal(clf.predict(T), true_result)

    # Extra-trees
    clf = tree.ExtraTreeRegressor()
    clf.fit(X, y)
    assert_almost_equal(clf.predict(T), true_result)

    clf = tree.ExtraTreeRegressor(max_features=1, random_state=1)
    clf.fit(X, y)
    assert_almost_equal(clf.predict(T), true_result)
예제 #5
0
    def shotgun_models(x, y):

        kernel = gaussian_process.kernels.DotProduct(
        ) + gaussian_process.kernels.WhiteKernel()
        models = [
            gaussian_process.GaussianProcessRegressor(kernel=kernel,
                                                      random_state=1337).fit(
                                                          x, y),
            linear_model.LinearRegression(n_jobs=2).fit(x, y),
            tree.DecisionTreeClassifier().fit(x, y),
            tree.DecisionTreeRegressor().fit(x, y),
            tree.ExtraTreeRegressor().fit(x, y),
            naive_bayes.GaussianNB().fit(x, y),
            neural_network.MLPRegressor(hidden_layer_sizes=(10, ),
                                        activation='relu',
                                        solver='adam',
                                        alpha=0.001,
                                        batch_size='auto',
                                        learning_rate='constant',
                                        learning_rate_init=0.01,
                                        power_t=0.5,
                                        max_iter=1000,
                                        shuffle=True,
                                        random_state=9,
                                        tol=0.0001,
                                        verbose=False,
                                        warm_start=False,
                                        momentum=0.9,
                                        nesterovs_momentum=True,
                                        early_stopping=False,
                                        validation_fraction=0.1,
                                        beta_1=0.9,
                                        beta_2=0.999,
                                        epsilon=1e-08).fit(x, y),
            linear_model.Lasso(alpha=0.1,
                               copy_X=True,
                               fit_intercept=True,
                               max_iter=1000,
                               normalize=False,
                               positive=False,
                               precompute=False,
                               random_state=None,
                               selection='cyclic',
                               tol=0.0001,
                               warm_start=False).fit(x, y),
            linear_model.ElasticNet().fit(x, y),
            linear_model.SGDRegressor().fit(x, y),
            linear_model.Ridge().fit(x, y),
            linear_model.PassiveAggressiveRegressor().fit(x, y)
        ]

        return models
예제 #6
0
 def default_models_(self):
     return {
         'Tree': {'clf': tree.DecisionTreeRegressor(),
                  'param': {'max_depth': [3, 5, 7, 10, 20]
                            }},
         'GBDT': {'clf': ensemble.GradientBoostingRegressor(random_state=1),
                  'param': {
                      'n_estimators': [50, 100, 150, 200],
                      'learning_rate': [0.1],
                      'max_depth': [4, 6, 8],
                      'alpha': [0.7, 0.8, 0.9],
                      'max_leaf_nodes': [10, 20],
                      'min_samples_split': [2, 4, 7]
                  }},
         'Lin': {'clf': linear_model.LinearRegression(),
                 'param': {
                     'fit_intercept': [True, False],
                     'normalize': [True, False]
                 }},
         'Ridge': {'clf': linear_model.Ridge(),
                   'param': {}},
         'Lasso': {'clf': linear_model.Lasso(),
                   'param': {}},
         'ElasN': {'clf': linear_model.ElasticNet(),
                   'param': {}},
         'Lars': {'clf': linear_model.Lars(),
                  'param': {}},
         'Bayers': {'clf': linear_model.BayesianRidge(),
                    'param': {}},
         'Poly2': {'clf': Pipeline([('poly', PolynomialFeatures(degree=2)),
                                    ('std_scaler', StandardScaler()),
                                    ('line_reg', linear_model.LinearRegression())
                                    ]),
                   'param': {}},
         'SGD': {'clf': linear_model.SGDRegressor(),
                 'param': {}},
         'SVM': {'clf': svm.SVR(kernel='rbf', C=1.0, epsilon=1),
                 'param': {
                     'C': [1, 10, 100, 1000, 10000]
                 }},
         'Knn': {'clf': neighbors.KNeighborsRegressor(),
                 'param': {}},
         'RF': {'clf': ensemble.RandomForestRegressor(random_state=1),
                'param':
                    {'n_estimators': [10, 30, 50, 100, 150], }},
         'ADA': {'clf': ensemble.AdaBoostRegressor(n_estimators=100),
                 'param': {}},
         'BAG': {'clf': BaggingRegressor(bootstrap=True),
                 'param': {'n_estimators': [50, 100, 200]}},
         'ET': {'clf': tree.ExtraTreeRegressor(),
                'param': {}},
     }
예제 #7
0
def get_list_of_basic_models():
    print(f"\nCreate list of basic models will pass through...")
    #! BE CAREFUL ! May take more time then expect or freeze process
    # @ Retun NaN in our case
    basic_models = [
        DummyRegressor(),
        # # ^ ----------------------------------------- Classical linear regressors
        # linear_model.LinearRegression(),
        # linear_model.Ridge(alpha=0.5, random_state=rnd_state),
        # # linear_model.SGDRegressor(random_state=rnd_state),
        # # ^ ---------------------------------- Regressors with variable selection
        # linear_model.Lasso(alpha=0.1, random_state=rnd_state),
        # linear_model.ElasticNet(random_state=rnd_state),
        # # @ linear_model.LassoLars(alpha=0.1, random_state=rnd_state),
        # # ^ ------------------------------------------------- Bayesian regressors
        # # @ linear_model.BayesianRidge(),
        # # @ linear_model.ARDRegression(),
        # # ^ ------------------------------------------- Outlier-robust regressors
        # # @ linear_model.HuberRegressor(),
        # linear_model.RANSACRegressor(random_state=rnd_state),
        # # ^ -----------------------Generalized linear models (GLM) for regression
        # linear_model.TweedieRegressor(power=0, alpha=0.5, link="auto"),
        # # linear_model.PoissonRegressor(),
        # linear_model.GammaRegressor(),
        # # ^ ------------------------------------------------------- Miscellaneous
        # linear_model.PassiveAggressiveRegressor(random_state=rnd_state),
        # # @ KernelRidge(),
        # ## --------------------------------------------- Support Vector Machines
        # # svm.LinearSVR(random_state=rnd_state),
        # #! svm.NuSVR(), #! CAN FREEZE
        # #! svm.SVR(),  #! CAN FREEZE
        # ^ ------------------------------------------------------ Decision Trees
        tree.DecisionTreeRegressor(random_state=rnd_state),
        tree.ExtraTreeRegressor(random_state=rnd_state),
        # ^ ---------------------------------------------------- Ensemble Methods
        # @ ensemble.HistGradientBoostingRegressor(random_state=rnd_state),
        # ensemble.AdaBoostRegressor(n_estimators=50, random_state=rnd_state),
        # ensemble.BaggingRegressor(n_estimators=50, random_state=rnd_state),
        # ensemble.ExtraTreesRegressor(n_estimators=100, random_state=rnd_state),  #! CAN BE LOOONG
        # ensemble.RandomForestRegressor(n_estimators=100, random_state=rnd_state),  #! CAN BE LOOONG
        # ensemble.GradientBoostingRegressor(n_estimators=100, random_state=rnd_state),
        # xgb.XGBRegressor(n_estimators=1000, random_state=rnd_state),
        # ^ --------------------------------------------------- Nearest Neighbors
        # @ neighbors.KNeighborsRegressor(),
        # ^ ----------------------------------------------- Neural network models
        # neural_network.MLPRegressor(hidden_layer_sizes=100, random_state=rnd_state),
    ]

    return basic_models
예제 #8
0
def dec_tree_reg(df, test):
    dt = tree.ExtraTreeRegressor()
    #set target, train and test. train and test must have same number of features
    target = df['count']
    train  = df[['time','holiday','season','temp','atemp','windspeed','weather','humidity']]
    test   = test2[['time','holiday','season','temp','atemp','windspeed','weather','humidity']]
    dt.fit(train,target)


    predicted_probs = dt.predict(test)
    predicted_probs = pd.Series(predicted_probs)
    predicted_probs = predicted_probs.map(lambda x: int(x))

    keep = pd.read_csv('data/test.csv')
    keep = keep['datetime']
    #save to file
    submit = pd.concat([keep,predicted_probs],axis=1)
    # print(forest.feature_importances_)
    submit.columns=['datetime','count']
    submit.to_csv('data/submissiondtree.csv',index=False)
예제 #9
0
def train_lotto(num_var):
    lotto_csv = pd.read_csv(csv_filename, names=["year", "month", "day", "midday_evening", "num_1", "num_2", "num_3"])
    lotto_csv = lotto_csv.dropna()
    
    X = lotto_csv.drop(["num_1", "num_2", "num_3"], axis=1)
    y = lotto_csv[f"num_{num_var}"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=123)

    # tree_model = tree.DecisionTreeRegressor()
    tree_model = tree.ExtraTreeRegressor()
    tree_model.fit(X_train, y_train)

    with open(model_filename, "wb") as model_file:
        joblib.dump(tree_model, model_file)
        print(f"Done Training num_{num_var}")

    with open(model_filename, "rb") as model_file:
        tree_model = joblib.load(model_filename)
        result = tree_model.score(X_test, y_test)
        print(result)
예제 #10
0
def dec_tree_reg(df, test):
    dt = tree.ExtraTreeRegressor()
    #set target, train and test. train and test must have same number of features
    target = df['count']
    train = df[[
        'time', 'holiday', 'season', 'temp', 'atemp', 'windspeed', 'weather',
        'humidity'
    ]]
    test = test2[[
        'time', 'holiday', 'season', 'temp', 'atemp', 'windspeed', 'weather',
        'humidity'
    ]]
    dt.fit(train, target)

    predicted_probs = dt.predict(test)
    predicted_probs = pd.Series(predicted_probs)
    predicted_probs = predicted_probs.map(lambda x: int(x))

    keep = pd.read_csv('data/test.csv')
    keep = keep['datetime']
    #save to file
    submit = pd.concat([keep, predicted_probs], axis=1)
    # print(forest.feature_importances_)
    submit.columns = ['datetime', 'count']
    submit.to_csv('data/submissiondtree.csv', index=False)

    plt.figure()
    # pl.scatter(tr, y, c="k", label="data")
    plt.plot(train['time'], target, c="g", label="max_depth=2", linewidth=2)
    plt.plot(test['time'],
             predicted_probs,
             c="r",
             label="max_depth=5",
             linewidth=2)
    plt.xlabel("data")
    plt.ylabel("target")
    plt.title("Decision Tree Regression")
    plt.legend()
    plt.show()
예제 #11
0
def trees(x_train, x_test, y_train, y_test):

    res = []
    print("hello reg trees")
    m = tree.DecisionTreeRegressor()
    m.fit(x_train, y_train)
    print("fiting")
    predictions = m.predict(x_test)
    acc = mean_squared_error(y_test, predictions)

    res.append((acc, "DecisionTreeRegressor"))

    m = tree.ExtraTreeRegressor()
    m.fit(x_train, y_train)
    predictions = m.predict(x_test)
    acc = mean_squared_error(y_test, predictions)

    res.append((acc, "ExtraTreeRegressor"))

    print(res)

    return res
예제 #12
0
파일: test_e2e.py 프로젝트: goldv/m2cgen
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsCV()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.LinearRegression()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.PoissonRegressor()),
        regression(
            linear_model.RANSACRegressor(
                base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS),
                random_state=RANDOM_SEED)),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TweedieRegressor(power=0.0)),
        regression(linear_model.TweedieRegressor(power=1.0)),
        regression(linear_model.TweedieRegressor(power=1.5)),
        regression(linear_model.TweedieRegressor(power=2.0)),
        regression(linear_model.TweedieRegressor(power=3.0)),

        # Statsmodels Linear Regression
        classification_binary(
            utils.StatsmodelsSklearnLikeWrapper(
                sm.GLM,
class ScikitLearnModelConverterTest(tf.test.TestCase, parameterized.TestCase):

  @parameterized.parameters(
      (tree.DecisionTreeRegressor(random_state=42),),
      (tree.ExtraTreeRegressor(random_state=42),),
      (ensemble.RandomForestRegressor(random_state=42),),
      (ensemble.ExtraTreesRegressor(random_state=42),),
      (ensemble.GradientBoostingRegressor(random_state=42,),),
      (ensemble.GradientBoostingRegressor(random_state=42, init="zero"),),
      (ensemble.GradientBoostingRegressor(
          random_state=42,
          init=tree.DecisionTreeRegressor(random_state=42),
      ),),
  )
  def test_convert_reproduces_regression_model(
      self,
      sklearn_tree,
  ):
    features, labels = datasets.make_regression(
        n_samples=100,
        n_features=10,
        random_state=42,
    )
    sklearn_tree.fit(features, labels)
    tf_features = tf.constant(features, dtype=tf.float32)

    with self.subTest(msg="inference_is_reproduced_before_save"):
      tf_tree = scikit_learn_model_converter.convert(sklearn_tree)
      tf_labels = tf_tree(tf_features).numpy().ravel()
      sklearn_labels = sklearn_tree.predict(features).astype(np.float32)
      self.assertAllClose(sklearn_labels, tf_labels, rtol=1e-5)

    with self.subTest(msg="inference_is_reproduced_after_save"):
      path = pathlib.Path(self.get_temp_dir())
      tf_tree = scikit_learn_model_converter.convert(
          sklearn_tree,
          intermediate_write_path=path / "intermediate_path",
      )
      tf.saved_model.save(obj=tf_tree, export_dir=path)
      loaded_tf_tree = tf.saved_model.load(path)
      self.assertAllEqual(tf_tree(tf_features), loaded_tf_tree(tf_features))

  @parameterized.parameters((tree.DecisionTreeClassifier(random_state=42),),
                            (tree.ExtraTreeClassifier(random_state=42),),
                            (ensemble.RandomForestClassifier(random_state=42),),
                            (ensemble.ExtraTreesClassifier(random_state=42),))
  def test_convert_reproduces_classification_model(
      self,
      sklearn_tree,
  ):
    features, labels = datasets.make_classification(
        n_samples=100,
        n_features=10,
        n_classes=4,
        n_clusters_per_class=1,
        random_state=42,
    )
    sklearn_tree.fit(features, labels)
    tf_features = tf.constant(features, dtype=tf.float32)

    with self.subTest(msg="inference_is_reproduced_before_save"):
      tf_tree = scikit_learn_model_converter.convert(sklearn_tree)
      tf_labels = tf_tree(tf_features).numpy()
      sklearn_labels = sklearn_tree.predict_proba(features).astype(np.float32)
      self.assertAllClose(sklearn_labels, tf_labels, rtol=1e-5)

    with self.subTest(msg="inference_is_reproduced_after_save"):
      path = pathlib.Path(self.get_temp_dir())
      tf_tree = scikit_learn_model_converter.convert(
          sklearn_tree,
          intermediate_write_path=path / "intermediate_path",
      )
      tf.saved_model.save(obj=tf_tree, export_dir=path)
      loaded_tf_tree = tf.saved_model.load(path)
      self.assertAllEqual(tf_tree(tf_features), loaded_tf_tree(tf_features))

  def test_convert_raises_when_unrecognised_model_provided(self):
    features, labels = datasets.make_regression(
        n_samples=100,
        n_features=10,
        random_state=42,
    )
    sklearn_model = linear_model.LinearRegression().fit(features, labels)
    with self.assertRaises(NotImplementedError):
      scikit_learn_model_converter.convert(sklearn_model)

  def test_convert_raises_when_sklearn_model_is_not_fit(self):
    with self.assertRaises(
        ValueError,
        msg="Scikit-learn model must be fit to data before converting to TF.",
    ):
      _ = scikit_learn_model_converter.convert(tree.DecisionTreeRegressor())

  def test_convert_raises_when_regression_target_is_multivariate(self):
    features, labels = datasets.make_regression(
        n_samples=100,
        n_features=10,
        # This produces a two-dimensional target variable.
        n_targets=2,
        random_state=42,
    )
    sklearn_tree = tree.DecisionTreeRegressor().fit(features, labels)
    with self.assertRaisesRegex(
        ValueError,
        "Only scalar regression and single-label classification are supported.",
    ):
      _ = scikit_learn_model_converter.convert(sklearn_tree)

  def test_convert_raises_when_classification_target_is_multilabel(self):
    features, labels = datasets.make_multilabel_classification(
        n_samples=100,
        n_features=10,
        # This assigns two class labels per example.
        n_labels=2,
        random_state=42,
    )
    sklearn_tree = tree.DecisionTreeClassifier().fit(features, labels)
    with self.assertRaisesRegex(
        ValueError,
        "Only scalar regression and single-label classification are supported.",
    ):
      _ = scikit_learn_model_converter.convert(sklearn_tree)

  def test_convert_uses_intermediate_model_path_if_provided(self):
    features, labels = datasets.make_classification(
        n_samples=100,
        n_features=10,
        n_classes=4,
        n_clusters_per_class=1,
        random_state=42,
    )
    sklearn_tree = tree.DecisionTreeClassifier().fit(features, labels)
    write_path = self.create_tempdir()
    _ = scikit_learn_model_converter.convert(
        sklearn_tree,
        intermediate_write_path=write_path,
    )
    # We should be able to load the intermediate TFDF model from the given path.
    tfdf_tree = tf.keras.models.load_model(write_path)
    self.assertIsInstance(tfdf_tree, tf.keras.Model)

  def test_convert_sklearn_tree_to_tfdf_pytree_raises_if_weight_provided_for_classification_tree(
      self):
    features, labels = datasets.make_classification(random_state=42)
    sklearn_tree = tree.DecisionTreeClassifier(random_state=42).fit(
        features,
        labels,
    )
    with self.assertRaisesRegex(
        ValueError,
        "weight should not be passed for classification trees.",
    ):
      _ = scikit_learn_model_converter.convert_sklearn_tree_to_tfdf_pytree(
          sklearn_tree,
          weight=0.5,
      )

  def test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant(
      self):
    features, labels = datasets.make_regression(
        n_samples=100,
        n_features=10,
        random_state=42,
    )
    init_estimator = linear_model.LinearRegression()
    sklearn_model = ensemble.GradientBoostingRegressor(init=init_estimator)
    sklearn_model.fit(features, labels)
    with self.assertRaises(ValueError):
      _ = scikit_learn_model_converter.convert(sklearn_model)
예제 #14
0
#%% Import models's libraries
from sklearn import tree  # Canonical Decision tree & Extremely randomized tree
from sklearn import ensemble  # RF, Gradient Boosting, AdaBoost
from skopt.space import Real, Categorical, Integer

import xgboost as xgb

#%% Toggles to go through
random_state = 42

#%% base_estimator = tree.ExtraTreeRegressor
base_xt_reg = tree.ExtraTreeRegressor(
    criterion="mse",  # {"mse", "friedman_mse", "mae"} default="mse"
    splitter="random",  # {"random", "best"} default="random"
    max_depth=None,  # int, default=None
    min_samples_split=2,  # int or float, default=2
    min_samples_leaf=1,  # int or float, default=1
    min_weight_fraction_leaf=0.0,  # float, default=0.0
    max_features=None,  # int, float or {“auto”, “sqrt”, “log2”}, default=None
    random_state=random_state,
)

#%% base_estimator = tree.DecisionTreeRegressor
base_dt_reg = tree.DecisionTreeRegressor(
    criterion="mse",  # {"mse", "friedman_mse", ""mae"} default="mse"
    splitter="best",  # {"random", "best"} default="best"
    max_depth=None,  # int, default=None
    min_samples_split=2,  # int or float, default=2
    min_samples_leaf=1,  # int or float, default=1
    min_weight_fraction_leaf=0.0,  # float, default=0.0
    max_features=None,  # int, float or {“auto”, “sqrt”, “log2”}, default=None
    random_state=random_state,
예제 #15
0
파일: OASIS.py 프로젝트: mattonics/IVAM
def run(perc):
    base_string = 'D:\MriData\Data'
    excel_path = 'D:\oasis_cross-sectional.csv'
    test = 13
    dataprovider = CrossSectionalData.CrossSectionalDataProvider(
        base_string, excel_path)

    a = dataprovider.get_data_with_CDR()

    step = 5
    step_1 = 25
    step_2 = 25

    training_stop = int(len(a) * perc)
    allfeatures = []
    ally = []
    cut = 55
    randomint = random.Random(7)
    for xx in [
            randomint.randint(0,
                              len(a) - 1) for r in xrange(training_stop)
    ]:
        x = a[xx]
        cdr = dataprovider.get_CDR(x)
        ll = (dataprovider.retrieve_full_data(x))
        if cdr == None or cdr > 1:
            continue
        feat = AlzheimerFeatures.surrounding_points_discrete_with_pos(
            ll, step, step_1, [dataprovider.get_gender(x)])
        allfeatures += feat
        ally = np.append(ally, np.repeat(cdr, len(feat)))

    AlzheimerFeatures.shuffle_in_unison_scary(allfeatures, ally)

    regressor = sk.ExtraTreeRegressor(random_state=0)
    regressor.fit(allfeatures, ally)

    allfeatures1 = []
    ally1 = []
    indices = []
    for xx in [
            randomint.randint(0,
                              len(a) - 1) for r in xrange(training_stop)
            if not r == test
    ]:
        x = a[xx]
        indices.append(xx)
        cdr = dataprovider.get_CDR(x)
        ll = dataprovider.retrieve_full_data(x)
        if cdr == None or cdr > 1:
            continue
        feat = AlzheimerFeatures.surrounding_points_discrete_with_pos(
            ll, step, step_2, [dataprovider.get_gender(x)])
        allfeatures1.append(regressor.predict(feat)[0:cut])
        ally1.append(cdr)

    rbf_svc = neighbors.KNeighborsClassifier(n_neighbors=7)
    rbf_svc.fit(allfeatures1, ally1)

    errorb = 0
    error = 0
    index = 0
    for xx in xrange(len(a)):
        x = a[xx]
        cdr = dataprovider.get_CDR(x)
        if cdr == None or cdr > 1 or xx in indices:
            continue
        ll = dataprovider.retrieve_full_data(x)
        feat = AlzheimerFeatures.surrounding_points_discrete_with_pos(
            ll, step, step_2, [dataprovider.get_gender(x)])
        predictq = regressor.predict(feat)[:cut]
        suma = (rbf_svc.predict(predictq))
        if not (suma > 0 and cdr > 0) or suma == cdr:
            errorb += 1
        error += np.abs(suma - cdr)
        index += 1
    ter = 1 - (error / index)
    terb = 1 - (errorb / index)
    print(str(ter) + " , " + str(terb))
    return ter, terb
예제 #16
0
print(f"X_fit: {X_fit.shape}, {type(X_fit)}\
    \nX_train: {X_train.shape}, {type(X_train)}\
        \nX_val: {X_val.shape}, {type(X_val)}\n")

print(f"y_fit: {y_fit.shape}, {type(y_fit)}\
    \ny_train: {y_train.shape}, {type(y_train)}\
    \ny_val: {y_val.shape}, {type(y_val)}\n")

#%% Define model parameters for starting tuning
model_params = {
    "base_estimator":
    tree.ExtraTreeRegressor(
        criterion="mse",  # {"mse", "friedman_mse", ""mae"} default="mse"
        splitter="random",  # {"random", "best"} default="random"
        max_depth=None,  # default=None
        min_samples_split=2,  # default=2
        min_samples_leaf=1,  # default=1
        random_state=random_state,
    ),
    "n_estimators":
    args.n_estimators,
    "max_samples":
    args.max_samples,
    "max_features":
    args.max_features,
    "bootstrap":
    args.bootstrap,
    "bootstrap_features":
    args.bootstrap_features,
    "oob_score":
    False,
예제 #17
0
model = linear_model.RidgeCV(alphas=alphas)
model = linear_model.LassoLarsCV()
model = linear_model.LassoLars()
model = linear_model.ElasticNetCV(l1_ratio=0.8, alphas=alphas)

model = linear_model.BayesianRidge()
model = linear_model.Perceptron()

from sklearn import svm
model = svm.SVR(kernel='linear')
model = svm.SVR(kernel='poly')
model = svm.SVR(kernel='rbf')

from sklearn import tree
model = tree.DecisionTreeRegressor()
model = tree.ExtraTreeRegressor()

from sklearn import ensemble
model = ensemble.RandomForestRegressor(n_estimators=100,
                                       max_depth=None,
                                       min_samples_split=1,
                                       random_state=0)

model = ensemble.ExtraTreesRegressor(n_estimators=20,
                                     max_depth=None,
                                     min_samples_split=1,
                                     random_state=0)

model = ensemble.AdaBoostRegressor(n_estimators=100)

model = ensemble.GradientBoostingRegressor(n_estimators=100,
예제 #18
0
logger = logging.getLogger("sedesol.pipeline")

###############################################################################
# Constants, specifying possible models and metrics
###############################################################################

MODELS_MAPPING = {
    "elnet": lm.ElasticNet(),
    "sgd_class": lm.SGDClassifier(),
    "sgd_reg": lm.SGDRegressor(),
    "ridge": lm.Ridge(),
    "gp": GaussianProcess(),
    "tree_reg": tree.DecisionTreeRegressor(),
    "tree_class": tree.DecisionTreeClassifier(),
    "extra_class": ensemble.ExtraTreesClassifier(),
    "extra_reg": tree.ExtraTreeRegressor(),
    "nn_class": KNeighborsClassifier(),
    "rf_reg": ensemble.RandomForestRegressor(),
    "rf_class": ensemble.RandomForestClassifier(),
    "svc": svm.SVC(),
    "linear_svc": svm.LinearSVC(),
    "logistic_reg": lm.LogisticRegression(),
    "multitask_lasso": lm.MultiTaskLasso(),
    "linear_reg": lm.LinearRegression()
}

MULTITASK_MODELS = ["multitask_lasso"]

###############################################################################
# Helper functions
###############################################################################
예제 #19
0
        x = a[xx]
        cdr = dataprovider.get_CDR(x)
        print(cdr)
        ll = (dataprovider.retrieve_full_data(x))
        #AlzheimerFeatures.view_histogram(ll)
        #CrossSectionalData.show_slices([ll[:,:,50]])
        if cdr == None or cdr > 1:
            continue
        feat = AlzheimerFeatures.surrounding_points_discrete_with_pos(
            ll, step, step_1, [dataprovider.get_gender(x)])
        allfeatures += feat
        ally = np.append(ally, np.repeat(cdr, len(feat)))

    AlzheimerFeatures.shuffle_in_unison_scary(allfeatures, ally)

    regressor = sk.ExtraTreeRegressor(random_state=0)
    regressor.fit(allfeatures, ally)

    net = GaussianNB()
    net.fit(np.array(allfeatures), np.array(ally))

    def f(x):
        if x == 0.5:
            return 0
        if x == 0:
            return -1
        return 1

    ttt = AlzheimerFeatures.target_brain_regions_2d_z(
        dataprovider.retrieve_full_data(test), step,
        [dataprovider.get_gender(test)], lambda x: f(net.predict(x)), 50)
예제 #20
0
def model_comparison():
    data, target = load_train()

    pipeline = create_pipeline()

    data = pipeline.fit_transform(data)

    MLA = [
        #Ensemble Methods
        ensemble.AdaBoostRegressor(),
        ensemble.BaggingRegressor(),
        ensemble.ExtraTreesRegressor(),
        ensemble.GradientBoostingRegressor(),
        ensemble.RandomForestRegressor(),

        #Gaussian Processes
        gaussian_process.GaussianProcessRegressor(),

        #GLM
        linear_model.PassiveAggressiveRegressor(),
        linear_model.Ridge(),
        linear_model.Lasso(),
        linear_model.ElasticNet(),
        linear_model.SGDRegressor(),

        #Nearest Neighbor
        neighbors.KNeighborsRegressor(),

        #SVM
        svm.SVR(),
        svm.NuSVR(),
        svm.LinearSVR(),

        #Trees
        tree.DecisionTreeRegressor(),
        tree.ExtraTreeRegressor(),

        #xgboost: http://xgboost.readthedocs.io/en/latest/model.html
        XGBRegressor(),
        lgb.LGBMRegressor()
    ]



    #split dataset in cross-validation with this splitter class: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ShuffleSplit.html#sklearn.model_selection.ShuffleSplit
    #note: this is an alternative to train_test_split
    cv_split = model_selection.ShuffleSplit(n_splits = 10, test_size = .3, train_size = .6, random_state = 0 ) # run model 10x with 60/30 split intentionally leaving out 10%

    #create table to compare MLA metrics
    MLA_columns = ['MLA Name', 'MLA Parameters','MLA Train Accuracy Mean', 'MLA Test Accuracy Mean']
    MLA_compare = pd.DataFrame(columns = MLA_columns)

    #index through MLA and save performance to table
    row_index = 0
    for alg in MLA:

        #set name and parameters
        MLA_name = alg.__class__.__name__
        MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
        MLA_compare.loc[row_index, 'MLA Parameters'] = str(alg.get_params())

        #score model with cross validation: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_validate.html#sklearn.model_selection.cross_validate
        rmse_scorer = make_scorer(rmse)
        cv_results = model_selection.cross_validate(alg, data, target, cv  = cv_split, scoring = rmse_scorer)

        MLA_compare.loc[row_index, 'MLA Time'] = cv_results['fit_time'].mean()
        MLA_compare.loc[row_index, 'MLA Train Accuracy Mean'] = cv_results['train_score'].mean()
        MLA_compare.loc[row_index, 'MLA Test Accuracy Mean'] = cv_results['test_score'].mean()
        #if this is a non-bias random sample, then +/-3 standard deviations (std) from the mean, should statistically capture 99.7% of the subsets
        MLA_compare.loc[row_index, 'MLA Test Accuracy 3*STD'] = cv_results['test_score'].std()*3   #let's know the worst that can happen!

        row_index+=1


    #print and sort table: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort_values.html
    MLA_compare.sort_values(by = ['MLA Test Accuracy Mean'], inplace = True)
    MLA_compare.to_csv('mla_comparison.csv', index=True)
    print(MLA_compare)
예제 #21
0
from sklearn.model_selection import cross_val_score

#%% Árvore de Decisão
model=tree.DecisionTreeRegressor()

scores=cross_val_score(model,X,Y,cv=10)

print("*R2:")
print(scores.mean())
print("*Desvio Padrão:")
print(scores.std())

#%% Árvore de Decisão - Critério MSE
model=tree.DecisionTreeRegressor(criterion="mse")

scores=cross_val_score(model,X,Y,cv=10)

print("\nmse - R2:")
print(scores.mean())
print("mse - Desvio Padrão:")
print(scores.std())

#%% Extra Tree
model=tree.ExtraTreeRegressor(criterion="mse")

scores=cross_val_score(model,X,Y,cv=10)

print("\nExtra/mse - R2:")
print(scores.mean())
print("Extra/mse - Desvio Padrão:")
print(scores.std())
예제 #22
0
from sklearn import tree

clf = tree.ExtraTreeRegressor()

# __all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor",
#            "ExtraTreeClassifier", "ExtraTreeRegressor", "export_graphviz"]

# [height, weight, shoe_size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40],
     [190, 90, 47], [175, 64, 39],
     [177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]]

# Y = ['male', 'male', 'female', 'female', 'male', 'male', 'female', 'female',
#      'female', 'male', 'male']

Y = [1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

clf = clf.fit(X,Y)
prediction = clf.predict([[190,70,42]])
print prediction[0]
#models.append( {"name": "1.9.1. GaussianNB", \
#				"model": naive_bayes.GaussianNB()} )

# doesn't work for this dataset?
#models.append( {"name": "1.9.2. MultinomialNB", \
#				"model": naive_bayes.MultinomialNB()} )

# doesn't work for this dataset?
#models.append( {"name": "1.9.3. BernoulliNB", \
#				"model": naive_bayes.BernoulliNB()} )

## 1.10. Decision Trees
models.append( {"name": "1.10. DecisionTreeRegressor", \
    "model": tree.DecisionTreeRegressor(random_state=0)} )
models.append( {"name": "1.10. ExtraTreeRegressor", \
    "model": tree.ExtraTreeRegressor(random_state=0)} )

## 1.11. Ensemble methods
# averaging methods
models.append( {"name": "1.11.1. Bagging meta-estimator", \
    "model": ensemble.BaggingRegressor(neighbors.KNeighborsRegressor())} )
models.append( {"name": "1.11.2.1. Random Forests", \
    "model": ensemble.RandomForestRegressor()} )
models.append( {"name": "1.11.2.2. Extremely Randomized Trees", \
    "model": ensemble.ExtraTreesRegressor()} )
models.append( {"name": "1.11.3. AdaBoost", \
    "model": ensemble.AdaBoostRegressor()} )
models.append( {"name": "1.11.4. Gradient Tree Boosting", \
    "model": ensemble.GradientBoostingRegressor()} )

## 1.12. Multiclass and multilabel algorithms
예제 #24
0
        classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification(linear_model.RidgeClassifierCV()),
        classification(linear_model.SGDClassifier(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegression(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)),
        classification_binary(
            linear_model.RidgeClassifier(random_state=RANDOM_SEED)),
        classification_binary(linear_model.RidgeClassifierCV()),
        classification_binary(
            linear_model.SGDClassifier(random_state=RANDOM_SEED)),

        # Decision trees
        regression(tree.DecisionTreeRegressor(**TREE_PARAMS)),
        regression(tree.ExtraTreeRegressor(**TREE_PARAMS)),
        classification(tree.DecisionTreeClassifier(**TREE_PARAMS)),
        classification(tree.ExtraTreeClassifier(**TREE_PARAMS)),
        classification_binary(tree.DecisionTreeClassifier(**TREE_PARAMS)),
        classification_binary(tree.ExtraTreeClassifier(**TREE_PARAMS)),

        # Random forest
        regression(ensemble.RandomForestRegressor(**FOREST_PARAMS)),
        regression(ensemble.ExtraTreesRegressor(**FOREST_PARAMS)),
        classification(ensemble.RandomForestClassifier(**FOREST_PARAMS)),
        classification(ensemble.ExtraTreesClassifier(**FOREST_PARAMS)),
        classification_binary(
            ensemble.RandomForestClassifier(**FOREST_PARAMS)),
        classification_binary(ensemble.ExtraTreesClassifier(**FOREST_PARAMS)),
    ],
    classifier = tree.DecisionTreeClassifier()
    classifier.fit(X=X_train, y=y_train)
    predicted = classifier.predict(X_test)
    DecisionTreeClassifier_accuracy.append(accuracy_score(y_test, predicted))

    classifier = tree.DecisionTreeRegressor()
    classifier.fit(X=X_train, y=y_train)
    predicted = classifier.predict(X_test)
    DecisionTreeRegressor_accuracy.append(accuracy_score(y_test, predicted))

    classifier = tree.ExtraTreeClassifier()
    classifier.fit(X=X_train, y=y_train)
    predicted = classifier.predict(X_test)
    ExtraTreeClassifier_accuracy.append(accuracy_score(y_test, predicted))

    classifier = tree.ExtraTreeRegressor()
    classifier.fit(X=X_train, y=y_train)
    predicted = classifier.predict(X_test)
    ExtraTreeRegressor_accuracy.append(accuracy_score(y_test, predicted))
'''    
percentages = np.arange(0.05, 0.95, 0.05)
BernoulliNB_accuracy = []
#CategoricalNB_accuracy = []
ComplementNB_accuracy = []
GaussianNB_accuracy = []
MultinomialNB_accuracy = []

DecisionTreeClassifier_accuracy = []
DecisionTreeRegressor_accuracy = []
ExtraTreeClassifier_accuracy = []
ExtraTreeRegressor_accuracy = []
예제 #26
0
print("KNN:%f" % mse)

model = ensemble.RandomForestRegressor(n_estimators=20, random_state=1)
predict_y = model.fit(train_X, train_gpa_y).predict(test_X)
mse = mean_squared_error(test_gpa_y, predict_y)
print("随机森林:%f" % mse)

model = ensemble.GradientBoostingRegressor(n_estimators=100, random_state=1)
predict_y = model.fit(train_X, train_gpa_y).predict(test_X)
mse = mean_squared_error(test_gpa_y, predict_y)
print("GBRT:%f" % mse)

model = ensemble.BaggingRegressor(random_state=1)
predict_y = model.fit(train_X, train_gpa_y).predict(test_X)
mse = mean_squared_error(test_gpa_y, predict_y)
print("Bagging:%f" % mse)

model = tree.ExtraTreeRegressor(random_state=1)
predict_y = model.fit(train_X, train_gpa_y).predict(test_X)
mse = mean_squared_error(test_gpa_y, predict_y)
print("ExtraTree:%f" % mse)

model = ensemble.AdaBoostRegressor(n_estimators=50, random_state=random_state)
predict_y = model.fit(train_X, train_gpa_y).predict(test_X)
mse = mean_squared_error(test_gpa_y, predict_y)
print("Adaboost:%f" % mse)

model = svm.SVR(C=10)
predict_y = model.fit(train_X, train_gpa_y).predict(test_X)
mse = mean_squared_error(test_gpa_y, predict_y)
print("SVC:%f" % mse)
예제 #27
0
    #        'pls':cross_decomposition.PLSRegression(),报错
    'gradient boosting': ensemble.GradientBoostingRegressor(),
    #        'gaussian':gaussian_process.GaussianProcessRegressor(),报错
    #        'isotonic':isotonic.IsotonicRegression(),报错
    'kernelridge': kernel_ridge.KernelRidge(),
    'ARD': linear_model.ARDRegression(),
    'bayesianridge': linear_model.BayesianRidge(),
    #        'elasticnet':linear_model.ElasticNet(),#报错
    'HuberRegressor': linear_model.HuberRegressor(),
    'LinearRegression': linear_model.LinearRegression(),
    #        'logistic':linear_model.LogisticRegression(),报错
    #        'linear_model.RidgeClassifier':linear_model.RidgeClassifier(),报错
    'k-neighbor': neighbors.KNeighborsRegressor(),
    'SVR': svm.LinearSVR(),
    'NUSVR': svm.NuSVR(),
    'extra tree': tree.ExtraTreeRegressor(),
    'decesion tree': tree.DecisionTreeRegressor(),
    #        'random losgistic':linear_model.RandomizedLogisticRegression(),报错
    #        'dummy':dummy.DummyRegressor()报错
}

#回归分析
cv = StratifiedKFold(n_splits=5)
i = 0
X = train_data
y = probs
z = labels[:, 5]
clf = ExtraTreesClassifier()
from sklearn.ensemble import ExtraTreesClassifier
for name, rgs in Regressors.items():
    regressor = rgs
예제 #28
0
    ('bag', BaggingRegressor()),
    ('etr', ExtraTreesRegressor()),
    ('gbr', GradientBoostingRegressor()),
    ('xgbr', xgb.XGBRegressor(max_depth=3)),  # xgb.XGBRegressor()),    #
    ('rfr', RandomForestRegressor(n_estimators=50)),

    #Nearest Neighbor
    ('knr', neighbors.KNeighborsRegressor(n_neighbors=3)),

    #SVM
    ('svr', svm.SVR(kernel='rbf', gamma=0.1)),
    ('lsvr', svm.LinearSVR()),

    #Trees
    ('dtr', tree.DecisionTreeRegressor()),
    ('etr2', tree.ExtraTreeRegressor()),
]
ESTS_PARAM_GRID = {
    'lasso': [{
        'alpha': [0.0005],
        'random_state': [1]
    }],  # first model is used for meta model in StackingAveragedModels
    'xgbr': [{
        'colsample_bytree': [0.4603],
        'gamma': [0.0468],
        'learning_rate': [0.05],
        'max_depth': [3],
        'min_child_weight': [1.7817],
        'n_estimators': [2200],
        'reg_alpha': [0.4640],
        'reg_lambda': [0.8571],
예제 #29
0
from sklearn import neighbors
from sklearn import ensemble
import xgboost as xgb  #Xgboost Regressor
model_DecisionTreeRegressor = tree.DecisionTreeRegressor(
)  #Decision Tree Regressor
model_SVR = svm.SVR(gamma='auto')  #SVM Regressor
model_KNeighborsRegressor = neighbors.KNeighborsRegressor(
)  #K Neighbors Regressor
model_RandomForestRegressor = ensemble.RandomForestRegressor(
    n_estimators=20)  #Random Forest Regressor
model_AdaBoostRegressor = ensemble.AdaBoostRegressor(
    n_estimators=50)  #Adaboost Regressor
model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor(
    n_estimators=100)  #Gradient Boosting Random Forest Regressor
model_BaggingRegressor = ensemble.BaggingRegressor()  #Bagging Regressor
model_ExtraTreeRegressor = tree.ExtraTreeRegressor()  #ExtraTree Regressor


def linear_model(X_train, y_train):
    regr = LinearRegression()
    regr.fit(X_train, y_train)
    y_pred = regr.predict(X_train)
    y_test = y_train
    print("linear score on training set: ",
          mean_absolute_error(y_test, y_pred))


    #plt.figure(figsize=(14,4))
    #plt.scatter(X_train, y_train, color='g')
    #plt.plot(X_train, y_pred, color='r')
    #plt.xlabel('time(0-24)')
예제 #30
0
    def generate_prediction(cls, race):
        """Generate a prediction for the specified race"""

        prediction = {
            'race_id': race['_id'],
            'earliest_date': cls.get_earliest_date(),
            'prediction_version': cls.PREDICTION_VERSION,
            'seed_version': Seed.SEED_VERSION,
            'results': None,
            'score': None,
            'train_seeds': None,
            'test_seeds': None,
            'estimator': None
        }

        predictor = None
        generate_predictor = False

        segment = tuple(race['entry_conditions']) + tuple(
            [race['track_condition']])
        with cls.predictor_cache_lock:
            if segment in cls.predictor_cache:
                predictor = cls.predictor_cache[segment]
            else:
                cls.predictor_cache[segment] = None
                generate_predictor = True

        if generate_predictor:

            similar_races = pyracing.Race.find({
                'entry_conditions':
                race['entry_conditions'],
                'track_condition':
                race['track_condition'],
                'start_time': {
                    '$lt': race.meet['date']
                }
            })
            if len(similar_races) >= (1 / cls.TEST_SIZE):

                try:

                    train_races, test_races = cross_validation.train_test_split(
                        similar_races, test_size=cls.TEST_SIZE)

                    train_X = []
                    train_y = []
                    for train_race in train_races:
                        for seed in train_race.seeds:
                            if seed['result'] is not None:
                                train_X.append(seed.normalized_data)
                                train_y.append(seed['result'])

                    test_X = []
                    test_y = []
                    for test_race in test_races:
                        for seed in test_race.seeds:
                            if seed['result'] is not None:
                                test_X.append(seed.normalized_data)
                                test_y.append(seed['result'])

                    predictor = {
                        'classifier': None,
                        'score': None,
                        'train_seeds': len(train_y),
                        'test_seeds': len(test_y),
                        'estimator': None
                    }
                    dual = len(train_X) < len(train_X[0])
                    kernel = 'linear'
                    loss = 'epsilon_insensitive'
                    if not dual:
                        loss = 'squared_epsilon_insensitive'
                    for estimator in (
                            linear_model.BayesianRidge(),
                            linear_model.ElasticNet(),
                            linear_model.LinearRegression(),
                            linear_model.LogisticRegression(),
                            linear_model.OrthogonalMatchingPursuit(),
                            linear_model.PassiveAggressiveRegressor(),
                            linear_model.Perceptron(), linear_model.Ridge(),
                            linear_model.SGDRegressor(),
                            svm.SVR(kernel=kernel),
                            svm.LinearSVR(dual=dual,
                                          loss=loss), svm.NuSVR(kernel=kernel),
                            tree.DecisionTreeRegressor(),
                            tree.ExtraTreeRegressor()):
                        logging.debug(
                            'Trying {estimator} for {segment}'.format(
                                estimator=estimator.__class__.__name__,
                                segment=segment))

                        try:
                            classifier = pipeline.Pipeline([
                                ('feature_selection',
                                 feature_selection.SelectFromModel(
                                     estimator, 'mean')),
                                ('regression', estimator)
                            ])
                            classifier.fit(train_X, train_y)
                            score = classifier.score(test_X, test_y)

                            if predictor['classifier'] is None or predictor[
                                    'score'] is None or score > predictor[
                                        'score']:
                                logging.debug(
                                    'Using {estimator} ({score}) for {segment}'
                                    .format(
                                        estimator=estimator.__class__.__name__,
                                        score=score,
                                        segment=segment))
                                predictor['classifier'] = classifier
                                predictor['score'] = score
                                predictor[
                                    'estimator'] = estimator.__class__.__name__

                        except BaseException as e:
                            logging.debug(
                                'Caught exception while trying {estimator} for {segment}: {exception}'
                                .format(estimator=estimator.__class__.__name__,
                                        segment=segment,
                                        exception=e))
                            continue

                    cls.predictor_cache[segment] = predictor

                except:

                    del cls.predictor_cache[segment]
                    raise

            else:

                del cls.predictor_cache[segment]

        else:

            while predictor is None:
                try:
                    predictor = cls.predictor_cache[segment]
                    time.sleep(10)
                except KeyError:
                    break

        if predictor is not None:

            reverse = False
            if 'score' in predictor and predictor['score'] is not None:
                reverse = predictor['score'] < 0
                prediction['score'] = abs(predictor['score'])

            if 'classifier' in predictor and predictor[
                    'classifier'] is not None:
                raw_results = {}
                for seed in race.seeds:
                    raw_result = predictor['classifier'].predict(
                        numpy.array(seed.normalized_data).reshape(1, -1))[0]
                    if raw_result is not None:
                        if not raw_result in raw_results:
                            raw_results[raw_result] = []
                        raw_results[raw_result].append(seed.runner['number'])
                for key in sorted(raw_results.keys(), reverse=reverse):
                    if prediction['results'] is None:
                        prediction['results'] = []
                    prediction['results'].append(
                        sorted([number for number in raw_results[key]]))

            if 'train_seeds' in predictor:
                prediction['train_seeds'] = predictor['train_seeds']

            if 'test_seeds' in predictor:
                prediction['test_seeds'] = predictor['test_seeds']

            if 'estimator' in predictor:
                prediction['estimator'] = predictor['estimator']

        return prediction