Beispiel #1
0
def test_real():
    a = Real(1, 25)
    for i in range(50):
        yield (check_limits, a.rvs(random_state=i), 1, 25)
    random_values = a.rvs(random_state=0, n_samples=10)
    assert_array_equal(random_values.shape, (10))
    assert_array_equal(a.transform(random_values), random_values)
    assert_array_equal(a.inverse_transform(random_values), random_values)

    log_uniform = Real(10**-5, 10**5, prior="log-uniform")
    for i in range(50):
        random_val = log_uniform.rvs(random_state=i)
        yield (check_limits, random_val, 10**-5, 10**5)
    random_values = log_uniform.rvs(random_state=0, n_samples=10)
    assert_array_equal(random_values.shape, (10))
    transformed_vals = log_uniform.transform(random_values)
    assert_array_equal(transformed_vals, np.log10(random_values))
    assert_array_equal(
        log_uniform.inverse_transform(transformed_vals), random_values)
NODES =       [numVars,numVars*10]
PATTERN =     ['static', 'dynamic']
BATCH_POW =   [8,11] # used as 2 ^ BATCH_POW
LRATE =       [1e-5,1e-2]
REGULATOR =   ['none', 'dropout', 'normalization', 'both']
ACTIVATION =  ['relu','softplus','elu']

### Optimization parameters
NCALLS =      50
NSTARTS =     30

space = [
  Integer(HIDDEN[0],       HIDDEN[1],                     name = "hidden_layers"),
  Integer(NODES[0],        NODES[1],                      name = "initial_nodes"),
  Integer(BATCH_POW[0],    BATCH_POW[1],                  name = "batch_power"),
  Real(LRATE[0],           LRATE[1],       "log-uniform", name = "learning_rate"),
  Categorical(PATTERN,                                    name = "node_pattern"),
  Categorical(REGULATOR,                                  name = "regulator"),
  Categorical(ACTIVATION,                                 name = "activation_function")
]

######################################################
######################################################
######                                          ######
######         M O R E   M E T H O D S          ######
######                                          ######
######################################################
######################################################

@use_named_args(space)
def objective(**X):
Beispiel #3
0
from gaussian_process import GaussianProcessSearch

file_dir = str(pathlib.Path(__file__).resolve().parents[0])


def signal_handler(sig, frame):
    print('\nSIGINT signal received: killing instances...')
    for instance in instances:
        instance.kill()
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)

search_space = [
    Real(low=0.01, high=1., name='reproduction_reward'),
    Real(low=1e-7, high=1e-4, name='time_step_modifier'),
    Real(low=0., high=1e-5, name='pop_reward_modifier'),
]

if __name__ == "__main__":
    num_instances = 4

    # Paths and files
    gpro_input_file = None  # Use None to start from zero
    env_dir = "envs/"
    env_path = os.path.join(env_dir, "optimization_test.x86_64")
    log_files_dir = os.path.join(file_dir, "logs/")
    output_files_dir = "out_files/"
    config_file = "config/chickens.yaml"
Beispiel #4
0
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.plots import plot_evaluations, plot_objective, plot_convergence
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from models import Model
from run_model import make_model

with open('opt_results.txt', 'w') as f:
    f.write('Hyper OPT Results')

dim_batch_size = Categorical(categories=[4, 8, 12, 24, 32], name='batch_size')
dim_lookback = Integer(low=5, high=20, prior='uniform', name='lookback')
dim_learning_rate = Real(low=1e-7, high=1e-3, prior='uniform', name='lr')
dim_lstm_units = Categorical(categories=[16, 32, 64, 128], name='lstm_units')
dim_act1_f = Categorical(
    categories=['relu', 'tanh', 'elu', 'LeakyRelu', 'none'], name='lstm1_act')
dim_act2_f = Categorical(
    categories=['relu', 'tanh', 'elu', 'LeakyRelu', 'none'], name='lstm2_act')

default_values = [12, 10, 0.00001, 32, 'none', 'none']

dimensions = [
    dim_batch_size, dim_lookback, dim_learning_rate, dim_lstm_units,
    dim_act1_f, dim_act2_f
]


def objective_fn(**kwargs):
Beispiel #5
0
def test_real():
    a = Real(1, 25)
    for i in range(50):
        yield (check_limits, a.rvs(random_state=i), 1, 25)
    random_values = a.rvs(random_state=0, n_samples=10)
    assert_array_equal(random_values.shape, (10))
    assert_array_equal(a.transform(random_values), random_values)
    assert_array_equal(a.inverse_transform(random_values), random_values)

    log_uniform = Real(10**-5, 10**5, prior="log-uniform")
    assert_not_equal(log_uniform, Real(10**-5, 10**5))
    for i in range(50):
        random_val = log_uniform.rvs(random_state=i)
        yield (check_limits, random_val, 10**-5, 10**5)
    random_values = log_uniform.rvs(random_state=0, n_samples=10)
    assert_array_equal(random_values.shape, (10))
    transformed_vals = log_uniform.transform(random_values)
    assert_array_equal(transformed_vals, np.log10(random_values))
    assert_array_equal(log_uniform.inverse_transform(transformed_vals),
                       random_values)
###############################################################################
if __name__ == "__main__":
    ###################################
    #   Select Optimization Options   #
    ###################################
    #=== Number of Iterations ===#
    n_calls = 10

    #=== Select Hyperparameters of Interest ===#
    hyperp_of_interest_dict = {}
    hyperp_of_interest_dict['num_hidden_layers_encoder'] = Integer(5, 10,
            name='num_hidden_layers_encoder')
    hyperp_of_interest_dict['num_hidden_nodes_encoder'] = Integer(100, 1000,
            name='num_hidden_nodes_encoder')
    hyperp_of_interest_dict['activation'] = Categorical(['relu', 'elu', 'sigmoid', 'tanh'], name='activation')
    hyperp_of_interest_dict['penalty_js'] = Real(0, 1, name='penalty_js')
    #hyperp_of_interest_dict['batch_size'] = Integer(100, 500, name='batch_size')

    #####################
    #   Initial Setup   #
    #####################
    #=== Generate skopt 'space' list ===#
    space = []
    for key, val in hyperp_of_interest_dict.items():
        space.append(val)

    #=== Hyperparameters ===#
    with open('../config_files/hyperparameters_vae_full.yaml') as f:
        hyperp = yaml.safe_load(f)
    hyperp = AttrDict(hyperp)
Beispiel #7
0
 def sell_indicator_space() -> List[Dimension]:
     return [
         Integer(30, 90, name='sell-adx'),
         Real(0, 1.0, name='sell-fisher')
     ]
Beispiel #8
0
             (
                 "post_process",
                 OptionedPostProcessTransformer(dict_pipelines_post_process),
             ),
             ("resample", SMOTE()),
         ]
     ),
     "search_space": [
         Categorical([True, False], name="undersampling_majority_class"),
         Integer(5, 6, name="max_k_undersampling"),
         Categorical(["minority", "all"], name="resample__sampling_strategy"),
         Categorical(
             ["option_1", "option_2", "option_3"], name="post_process__option"
         ),
         Integer(5, 15, name="model__max_depth"),
         Real(0.05, 0.31, prior="log-uniform", name="model__learning_rate"),
         Integer(1, 10, name="model__min_child_weight"),
         Real(0.8, 1, prior="log-uniform", name="model__subsample"),
         Real(0.13, 0.8, prior="log-uniform", name="model__colsample_bytree"),
         Real(0.1, 10, prior="log-uniform", name="model__scale_pos_weight"),
         Categorical(["binary:logistic"], name="model__objective"),
     ],
 },
 "random_forest": {
     "model": RandomForestClassifier(),
     "pipeline_post_process": None,
     "search_space": [
         Categorical([True, False], name="undersampling_majority_class"),
         Integer(0, 1, name="model__bootstrap"),
         Integer(10, 100, name="model__n_estimators"),
         Integer(2, 10, name="model__max_depth"),
                        activation='relu'))
    model.add(K.layers.Flatten())
    model.add(K.layers.Dense(n_classes, activation='softmax'))
    optimizer = K.optimizers.Adam(lr=learning_rate)
    model.compile(loss='mean_absolute_error',
                  optimizer=optimizer,
                  metrics=['mae'])
    return model


# Set up dimensions --> Edited to integers
dim_filter1 = Integer(low=16, high=200, name='filter1', dtype=int)
dim_filter2 = Integer(low=16, high=200, name='filter2', dtype=int)
dim_filter3 = Integer(low=16, high=200, name='filter3', dtype=int)
dim_learning_rate = Real(low=1e-4,
                         high=1e-2,
                         prior='log-uniform',
                         name='learning_rate')
dim_batch_size = Integer(
    low=10, high=64, name='batch_size',
    dtype=int)  # NOTE: Must be int so it works for KerasRegressor copy

dimensions = [
    dim_filter1, dim_filter2, dim_filter3, dim_learning_rate, dim_batch_size
]

print("Number of dimensions:", len(dimensions))

default_parameters = [64, 64, 64, 1e-4, 16]


@use_named_args(dimensions=dimensions)
Beispiel #10
0
def test_searchcv_sklearn_compatibility():
    """
    Test whether the WeightedBayesSearchCV is compatible with base sklearn methods
    such as clone, set_params, get_params.
    """

    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.75, random_state=0
    )

    # used to try different model classes
    pipe = Pipeline([
        ('model', SVC())
    ])

    # single categorical value of 'model' parameter sets the model class
    lin_search = {
        'model': Categorical([LinearSVC()]),
        'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    }

    dtc_search = {
        'model': Categorical([DecisionTreeClassifier()]),
        'model__max_depth': Integer(1, 32),
        'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'),
    }

    svc_search = {
        'model': Categorical([SVC()]),
        'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
        'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
        'model__degree': Integer(1, 8),
        'model__kernel': Categorical(['linear', 'poly', 'rbf']),
    }

    opt = WeightedBayesSearchCV(
        pipe,
        [(lin_search, 1), svc_search],
        n_iter=2
    )

    opt_clone = clone(opt)

    params, params_clone = opt.get_params(), opt_clone.get_params()
    assert params.keys() == params_clone.keys()

    for param, param_clone in zip(params.items(), params_clone.items()):
        assert param[0] == param_clone[0]
        assert isinstance(param[1], type(param_clone[1]))

    opt.set_params(search_spaces=[(dtc_search, 1)])

    opt.fit(X_train, y_train)
    opt_clone.fit(X_train, y_train)

    total_evaluations = len(opt.cv_results_['mean_test_score'])
    total_evaluations_clone = len(opt_clone.cv_results_['mean_test_score'])

    # test if expected number of subspaces is explored
    assert total_evaluations == 1
    assert total_evaluations_clone == 1 + 2
Beispiel #11
0
 def get_dimensions():
     return [Real(low=0.01, high=1.0, name='sigma')]
from sklearn.externals import joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from sklearn_pandas import DataFrameMapper

from utility import HyperParameters, Runner
from model import load_clean_data_frame, ordinal_data_mapper

sample = None
iterations = 24

hyper_parameters = HyperParameters(
    search_space={
        'xgb__n_estimators': Integer(100, 500),
        'xgb__learning_rate': Real(0.1, 0.3),
        'xgb__gamma': Real(0.0001, 100.0, prior='log-uniform'),
        'xgb__max_depth': Integer(3, 7),
        'xgb__colsample_bytree': Real(0.4, 0.8),
        'xgb__colsample_bylevel': Real(0.4, 0.8),
        'xgb__colsample_bynode': Real(0.4, 0.8)
    })

# Features were selected based on feature importance from experiments.
data_mapper = DataFrameMapper([(['iucr'], [MinMaxScaler()]),
                               (['location'], [MinMaxScaler()]),
                               (['latitude'], [StandardScaler()]),
                               (['hour'], [MinMaxScaler()]),
                               (['longitude'], [StandardScaler()]),
                               (['type'], [MinMaxScaler()]),
                               (['month'], [MinMaxScaler()]),
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values
    X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values

    # log-uniform: understand as search over p = exp(x) by varying x
    opt = BayesSearchCV(
        estimator=xgb.XGBClassifier(),

        # ref: https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/xgradient_boosting.py
        search_spaces={
            'learning_rate': Real(0.001, 1.0, 'log-uniform'),
            'min_child_weight': Integer(0, 20),
            'max_depth': Integer(0, 50),
            'max_delta_step': Integer(0, 20),
            'subsample': Real(0.01, 1.0, 'uniform'),
            'colsample_bytree': Real(0.01, 1.0, 'uniform'),
            'colsample_bylevel': Real(0.01, 1.0, 'uniform'),
            'reg_lambda': Real(1e-10, 1e-1, 'log-uniform'),
            'reg_alpha': Real(1e-10, 1e-1, 'log-uniform'),
            'gamma': Real(1e-9, 0.5, 'log-uniform'),
            'n_estimators': Integer(50, 512),
            'scale_pos_weight': Real(1e-6, 500, 'log-uniform'),
            'booster': ["gbtree", "dart"],
            'sample_type': ['uniform', 'weighted'],
            'normalize_type': ['tree', 'forest'],
            'rate_drop': Real(1e-10, 1 - (1e-10), 'uniform')
        },
        cv=StratifiedKFold(n_splits=10, shuffle=True),
        n_jobs=3,
        n_iter=100,
        verbose=0,
        refit=True,
        random_state=42)

    def status_print(_):
        """Status callback durring bayesian hyperparameter search"""

        # Get all the models tested so far in DataFrame format
        all_models = pd.DataFrame(opt.cv_results_)

        best_parap_copy = copy.deepcopy(opt.best_params_)
        for k, v in opt.best_params_.items():
            best_parap_copy[k] = v if isinstance(v, str) or isinstance(
                v, float) else v.item()
        param_list = []
        for each in json.dumps(best_parap_copy)[1:-1].split(', '):
            param_list.append('='.join(each[1:].split('": ')))

        if hasattr(opt.estimator, 'verbose'):
            param_list.append('verbose=True')

        param = opt.estimator.__class__.__name__ + \
            '(' + ', '.join(param_list) + ')'

        # Get current parameters and the best parameters
        print('Model #{}\nBest roc_auc: {}\nBest params: {}\n'.format(
            len(all_models), np.round(opt.best_score_, 4), param))

    opt.fit(X_train, y_train, callback=status_print)

    print("val. score: %s" % opt.best_score_)
    print("test score: %s" % opt.score(X_test, y_test))
    def roi_space() -> List[Dimension]:
        """
        Create a ROI space.

        Defines values to search for each ROI steps.

        This method implements adaptive roi hyperspace with varied
        ranges for parameters which automatically adapts to the
        ticker interval used.

        It's used by Freqtrade by default, if no custom roi_space method is defined.
        """

        # Default scaling coefficients for the roi hyperspace. Can be changed
        # to adjust resulting ranges of the ROI tables.
        # Increase if you need wider ranges in the roi hyperspace, decrease if shorter
        # ranges are needed.
        roi_t_alpha = 1.0
        roi_p_alpha = 1.0

        timeframe_min = timeframe_to_minutes(IHyperOpt.ticker_interval)

        # We define here limits for the ROI space parameters automagically adapted to the
        # timeframe used by the bot:
        #
        # * 'roi_t' (limits for the time intervals in the ROI tables) components
        #   are scaled linearly.
        # * 'roi_p' (limits for the ROI value steps) components are scaled logarithmically.
        #
        # The scaling is designed so that it maps exactly to the legacy Freqtrade roi_space()
        # method for the 5m ticker interval.
        roi_t_scale = timeframe_min / 5
        roi_p_scale = math.log1p(timeframe_min) / math.log1p(5)
        roi_limits = {
            'roi_t1_min': int(10 * roi_t_scale * roi_t_alpha),
            'roi_t1_max': int(120 * roi_t_scale * roi_t_alpha),
            'roi_t2_min': int(10 * roi_t_scale * roi_t_alpha),
            'roi_t2_max': int(60 * roi_t_scale * roi_t_alpha),
            'roi_t3_min': int(10 * roi_t_scale * roi_t_alpha),
            'roi_t3_max': int(40 * roi_t_scale * roi_t_alpha),
            'roi_p1_min': 0.01 * roi_p_scale * roi_p_alpha,
            'roi_p1_max': 0.04 * roi_p_scale * roi_p_alpha,
            'roi_p2_min': 0.01 * roi_p_scale * roi_p_alpha,
            'roi_p2_max': 0.07 * roi_p_scale * roi_p_alpha,
            'roi_p3_min': 0.01 * roi_p_scale * roi_p_alpha,
            'roi_p3_max': 0.20 * roi_p_scale * roi_p_alpha,
        }
        logger.debug(f"Using roi space limits: {roi_limits}")
        p = {
            'roi_t1': roi_limits['roi_t1_min'],
            'roi_t2': roi_limits['roi_t2_min'],
            'roi_t3': roi_limits['roi_t3_min'],
            'roi_p1': roi_limits['roi_p1_min'],
            'roi_p2': roi_limits['roi_p2_min'],
            'roi_p3': roi_limits['roi_p3_min'],
        }
        logger.info(
            f"Min roi table: {round_dict(IHyperOpt.generate_roi_table(p), 5)}")
        p = {
            'roi_t1': roi_limits['roi_t1_max'],
            'roi_t2': roi_limits['roi_t2_max'],
            'roi_t3': roi_limits['roi_t3_max'],
            'roi_p1': roi_limits['roi_p1_max'],
            'roi_p2': roi_limits['roi_p2_max'],
            'roi_p3': roi_limits['roi_p3_max'],
        }
        logger.info(
            f"Max roi table: {round_dict(IHyperOpt.generate_roi_table(p), 5)}")

        return [
            Integer(roi_limits['roi_t1_min'],
                    roi_limits['roi_t1_max'],
                    name='roi_t1'),
            Integer(roi_limits['roi_t2_min'],
                    roi_limits['roi_t2_max'],
                    name='roi_t2'),
            Integer(roi_limits['roi_t3_min'],
                    roi_limits['roi_t3_max'],
                    name='roi_t3'),
            Real(roi_limits['roi_p1_min'],
                 roi_limits['roi_p1_max'],
                 name='roi_p1'),
            Real(roi_limits['roi_p2_min'],
                 roi_limits['roi_p2_max'],
                 name='roi_p2'),
            Real(roi_limits['roi_p3_min'],
                 roi_limits['roi_p3_max'],
                 name='roi_p3'),
        ]
class BaseModelConfig(metaclass=ABCMeta):
    """Base class for each account model.

    Implements generic default configuration and pipeline steps that can be overridden and extended in sub-classes.

    Runtime state should be managed via :class:`~forecasting_platform.services.Services` and not as instance attributes.

    Any :func:`~abc.abstractmethod` must be defined in the account model config classes,
    therefore these abstract methods are the minimum required configuration for a new account model.

    Args:
        runtime_config: All configuration of the current run.
        data_loader: Service for loading data.

    """
    def __init__(self, runtime_config: RuntimeConfig, data_loader: DataLoader):
        self._runtime_config = runtime_config
        self._data_loader = data_loader

        self.forecast_path = absolute_path(
            runtime_config.output_path / "08 Predictions" /
            f"Forecast {self.MODEL_NAME}" /
            f"{runtime_config.prediction_month.strftime(PREDICTION_MONTH_FORMAT)}"
            / f"{runtime_config.run_timestamp}"
            f"_{runtime_config.prediction_month.strftime(PREDICTION_MONTH_FORMAT)}"
            f"_T{runtime_config.test_periods}"
            f"_P{runtime_config.predict_periods}"
            f"_{'_'.join(self.GROUPING)}")

    def __str__(self) -> str:
        return f"{self.MODEL_NAME}: {self.forecast_path}"

    @property
    @abstractmethod
    def MODEL_NAME(self) -> str:
        """Model identifier used to reference this model in created files and log messages."""
        ...

    @property
    @abstractmethod
    def CONTRACTS(self) -> List[str]:
        """List of contracts to be included in the model."""
        ...

    @property
    def model_data_query(self) -> Any:
        """Database query used to split training data for this model."""
        return CleanedData.c.Contract_ID.in_(self.CONTRACTS)

    @property
    @abstractmethod
    def TRAINING_START(self) -> pd.Timestamp:
        """Account-specific :class:`pandas.Timestamp` to include training data newer than this month."""
        ...

    @property
    def GROUPING(self) -> List[str]:
        """Define the granularity of aggregations and the identifier/index level for this model."""
        return ["Contract_ID", "Item_ID"]

    @property
    @abstractmethod
    def POSTPROCESS_DEPTH(self) -> int:
        """Consider these number of months when performing certain postprocessing steps."""
        ...

    @property
    @abstractmethod
    def DEFAULT_FEATURES(self) -> List[Feature]:
        """Create the list of account-specific features with :func:`~owforecasting.features.default_features`."""
        ...

    HYPER_SPACE: List[Dimension] = [
        Integer(5, 30, name="max_depth"),
        Real(1e-5, 1e-1, name="learn_rate", prior="log-uniform"),
        Real(0.9, 0.9999, name="learn_rate_annealing"),
        Integer(10, 25, name="min_rows"),
        Integer(50, 200, name="ntrees"),
        Real(1e-4, 1e-2, name="stopping_tolerance"),
        Integer(2, 10, name="stopping_rounds"),
        Real(1e-1, 1, name="sample_rate"),
        Real(0.99, 1, name="col_sample_rate"),
        Integer(100, 500, name="nbins"),
        Real(1e-10, 1e-3, name="min_split_improvement"),
        Integer(10, 11, name="nfolds"),
    ]

    #: Apply weighting feature to training data. Disable this feature, when set to 0.
    WEIGHTING: Optional[int] = None

    PREPROCESS_OUTLIERS = False

    PREPROCESS_UNIT_COST_AGGREGATION = "mean"

    EXCLUDE_PROJECTS: List[str] = []
    ONLY_INCLUDE_PROJECTS: List[str] = []

    EXCLUDE_ITEMS: List[int] = []
    ONLY_INCLUDE_ITEMS: List[int] = []

    SALES_MIN_PERIOD = 5

    DEFAULT_HYPER_PARAMS = {
        "weights_column": "W_Weight",
        "score_each_iteration": True,  # Avoid non-deterministic behavior
        "score_tree_interval": 1,  # Avoid non-deterministic behavior,
        # see http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/algo-params/score_tree_interval.html
    }

    #: Number of points to evaluate by :func:`~owforecasting.models.optimize_bayes`.
    OPTIMIZE_HYPER_PARAMETERS_N_CALLS = 20

    _OPTIMIZE_BAYES_GP_ARGS = None  # Only exists so it can be changed to speed-up integration tests

    @property
    def OVERRIDE_HYPER_PARAMS(self) -> Dict[str, object]:
        """Set this to extend or overwrite values from DEFAULT_HYPER_PARAMS."""
        return {}

    def configure_features(
        self, cleaned_data_run_id: int
    ) -> Tuple[InternalFeatures, ExogenousFeatures]:
        """Override this to configure account-specific internal or exogenous features, e.g. build-rates.

        Args:
            cleaned_data_run_id: ID of the run that created the cleaned data set in the internal database.
        """
        return {}, {}

    def calculate_weights(self) -> Weights:
        """Override this to configure account-specific weighting functions."""
        assert self.WEIGHTING is not None, f"Must define WEIGHTING for default calculate_weights function on {self}"
        return calculate_default_weights(self.TRAINING_START,
                                         self._runtime_config.forecast_end,
                                         self.WEIGHTING)

    def preprocess_account_data(
            self, sales_raw: pd.DataFrame, grouping: List[str],
            internal_features: InternalFeatures) -> pd.DataFrame:
        """Preprocess account data by filtering and aggregating the raw sales data and adding internal features.

        Args:
            sales_raw: Raw sales data to pre-process.
            grouping: Grouping columns to use.
            internal_features: Internal features to add.

        Returns:
            :class:`~pandas.DataFrame` with pre-processing applied.

        """
        sales = filter_sales(
            sales_raw,
            exclude_projects=self.EXCLUDE_PROJECTS,
            only_include_projects=self.ONLY_INCLUDE_PROJECTS,
            exclude_items=self.EXCLUDE_ITEMS,
            only_include_items=self.ONLY_INCLUDE_ITEMS,
        )
        sales = add_sales_features(sales, internal_features)
        return group_sales(
            sales,
            grouping,
            unit_cost_aggregation=self.PREPROCESS_UNIT_COST_AGGREGATION)

    def prepare_training_data(
        self,
        sales: pd.DataFrame,
        grouping: List[str],
        exo_features: ExogenousFeatures,
    ) -> TimeSeries:
        """Prepare sales data for training by configuring :class:`~owforecasting.timeseries.TimeSeries`.

        This includes selection of training data from all sales and weighting of provided exogenous features.

        Args:
            sales: Sales data to prepare for training.
            grouping: Grouping columns to use for the time series.
            exo_features: Exogenous features to add to the data.
            weights: Weighting feature based on dates.

        Returns:
            Prepared training data.

        """
        sales_train = preprocess_grouped_sales(
            sales,
            self._runtime_config.forecast_start,
            self._runtime_config.test_periods,
            self.TRAINING_START,
            grouping,
            sales_min_period=self.SALES_MIN_PERIOD,
        )

        if sales_train.empty:
            raise ConfigurationException(
                f"Not enough training data for {self}. "
                "Please check configuration of --forecast-periods and TRAINING_START."
            )

        if self.PREPROCESS_OUTLIERS:
            ts_tmp = TimeSeries(
                sales_train,
                datetime_col="Date",
                response_col="Order_Quantity",
                grouping_cols=grouping,
                granularity=Granularity.MONTH,
                test_periods=self._runtime_config.test_periods,
                predict_periods=self._runtime_config.predict_periods,
                standardize_response=False,
            )
            sales_train = preprocess_outliers(
                ts_tmp._data, self.GROUPING,
                self._runtime_config.forecast_start).reset_index(drop=True)

        # Create TimeSeries
        ts = TimeSeries(
            sales_train,
            datetime_col="Date",
            response_col="Order_Quantity",
            grouping_cols=grouping,
            granularity=Granularity.MONTH,
            test_periods=self._runtime_config.test_periods,
            predict_periods=self._runtime_config.predict_periods,
        ).add_features(self.DEFAULT_FEATURES)

        # Add exogenous features
        for i, (name, (exo, default)) in enumerate(exo_features.items()):
            ts.add_exogenous_feature(f"EXO{i}", exo, default)

        # Add weighting
        weights = self.calculate_weights()
        ts.add_exogenous_feature("W", weights)

        return ts

    def postprocess_forecast(self, ts: TimeSeries, ts_pred: TimeSeries,
                             sales: pd.DataFrame,
                             grouping: List[str]) -> pd.DataFrame:
        """Postprocess account forecast, including filtering and grouping.

        Args:
            ts: :class:`~owforecasting.timeseries.TimeSeries` containing actual values.
            ts_pred: :class:`~owforecasting.timeseries.TimeSeries` containing predicted values.
            sales: :class:`~pandas.DataFrame` with actual sales data.
            grouping: Grouping columns to use.

        Returns:
            Post-processed forecast.

        """
        return postprocess_forecast_results(
            ts_pred.result_data, grouping, self._runtime_config.forecast_start,
            self.POSTPROCESS_DEPTH)
Beispiel #16
0
    Categorical([1, 10, 20, None], name='max_depth'),
    Categorical(['balanced', 'balanced_subsample'], name='class_weight'),
    Categorical([200, 400, 800, 1600, 3200], name='n_estimators'),
    Categorical(['auto', 'log2'], name='max_features'),
    Categorical([True, False], name='bootstrap'),
    Integer(2, 10, name='min_samples_split'),
    Integer(1, 5, name='min_samples_leaf')
]

lgbm_space = [
    Categorical([1, 5, 10, 15, 20, -1], name='max_depth'),
    Integer(5, 50, name='num_leaves'),
    Categorical(['gbdt', 'dart'], name='boosting_type'),
    Categorical([True, False], name='is_unbalance'),
    Categorical([200, 400, 800, 1600, 3200], name='n_estimators'),
    Real(0.01, 0.5, name='learning_rate'),
    Integer(25, 500, name='max_bin'),
    Integer(10, 50, name='min_data_in_leaf')
]
svm_space = [
    Categorical(['optimal', 'invscaling'], name='learning_rate'),
    Integer(500, 100000, name='max_iter'),
    Categorical([
        'hinge',
    ], name='loss'),
    Categorical(['l2', 'elasticnet'], name='penalty'),
    Real(0.00001, 0.001, name='eta0'),
    Real(0.00001, 0.001, name='alpha'),
    Categorical(['balanced', None], name='class_weight'),
    Categorical([False, 5, 10, 20, True], name='average')
]
    EncodingDecodingNeuronLayer.get_simple_constructor(
        epsilons=epsilons, lambdas=lambdas, quantizer=quantizer))

N_PARAMETER_SEARCH_EPOCHS = 1

# ======================================================================================================================
# Try best parameters from demo_mnist_quantized_find_best_params

X = X_1hid_quantized_scheduled.add_config_root_variant(
    'poly_schedule',
    epsilons=lambda eps_init, eps_exp: f'{eps_init}/t**{eps_exp}',
    lambdas=lambda lambda_init, lambda_exp: f'{lambda_init}/t**{lambda_exp}')
X.add_root_variant(n_epochs=N_PARAMETER_SEARCH_EPOCHS,
                   epoch_checkpoint_period=None,
                   quantizer='sigma_delta').add_parameter_search(
                       space=dict(eps_init=Real(0, 1, 'uniform'),
                                  eps_exp=Real(0, 1, 'uniform'),
                                  lambda_init=Real(0, 1, 'uniform'),
                                  lambda_exp=Real(0, 1, 'uniform')),
                       scalar_func=lambda result: result[-1, 'val_error'],
                       n_calls=500)
X = X_3hid_quantized_scheduled.add_config_root_variant(
    'poly_schedule',
    epsilons=lambda eps_init, eps_exp: f'{eps_init}/t**{eps_exp}',
    lambdas=lambda lambda_init, lambda_exp: f'{lambda_init}/t**{lambda_exp}')
X_3hid_paramsearch_base = X.add_root_variant(
    n_epochs=N_PARAMETER_SEARCH_EPOCHS,
    epoch_checkpoint_period=None,
    quantizer='sigma_delta')
for X in (X_3hid_paramsearch_base,
          X_3hid_paramsearch_base.add_root_variant(n_negative_steps=100,
settings['out_path_process'] = "/mnt/Datos/BML_CNCRS/Spoc/ECoG_STN/"

settings['frequencyranges'] = [[4, 8], [8, 12], [13, 20], [20, 35], [13, 35],
                               [60, 80], [90, 200], [60, 200]]
settings['seglengths'] = [1, 2, 2, 3, 3, 3, 10, 10, 10]
# settings['num_patients']=['000', '004', '005', '007', '008', '009', '010', '013', '014']
settings['num_patients'] = [
    '000', '001', '004', '005', '006', '007', '008', '009', '010', '013', '014'
]

settings['BIDS_path'] = settings['BIDS_path'].replace("\\", "/")
settings['out_path'] = settings['out_path'].replace("\\", "/")

#%%
space_LM = [
    Real(1e-6, 1, "uniform", name='alpha'),
    Real(1e-6, 1, "uniform", name='l1_ratio')
]


#%%
def func(y, time_stamps=5):
    y_ = y.copy()
    # y_[:time_stamps]=np.zeros((time_stamps,1))

    return y_[time_stamps:]


def inverse_func(x, time_stamps=5):
    x_ = x.copy()
    x_ = np.vstack((np.zeros((time_stamps, 1)), x))
"""
Code for a gender balanced  explicit ALS algorithm.
"""

from skopt.space import Integer, Real
from lenskit.algorithms.als import BiasedMF

from bookgender.rerank.GenderCalibratedRecommender import GenderCalibratedRecommender
from bookgender.rerank.fastForceGenderBalanceRecommender import FastForceGenderBalanceRecommender
from bookgender.rerank.rerankUtil import getBookGender
from bookgender.rerank.slowForceGenderBalanceRecommender import SlowForceGenderBalanceRecommender
from bookgender.rerank.slowForceGenderTargetRecommender import SlowForceGenderTargetRecommender

dimensions = [Integer(5, 300), Real(1.0e-6, 5), Real(1.0e-6, 5), Real(0, 25)]


def instantiate(opts, implicit):
    feats, ureg, ireg, damp = opts
    return GenderCalibratedRecommender(
        BiasedMF(feats, reg=(ureg, ireg), damping=damp), getBookGender(), 0.5)
    #return SlowForceGenderTargetRecommender(BiasedMF(feats, reg=(ureg, ireg), damping=damp), getBookGender())


update = None

sweep_points = [BiasedMF(nf) for nf in range(25, 250, 25)]
sweep_attrs = ['features']
Beispiel #20
0
class LightGBMRegressor(Estimator):
    """LightGBM Regressor"""
    name = "LightGBM Regressor"
    hyperparameter_ranges = {
        "learning_rate": Real(0.000001, 1),
        "boosting_type": ["gbdt", "dart", "goss", "rf"],
        "n_estimators": Integer(10, 100),
        "max_depth": Integer(0, 10),
        "num_leaves": Integer(2, 100),
        "min_child_samples": Integer(1, 100),
        "bagging_fraction": Real(0.000001, 1),
        "bagging_freq": Integer(0, 1)
    }
    model_family = ModelFamily.LIGHTGBM
    supported_problem_types = [ProblemTypes.REGRESSION]

    SEED_MIN = 0
    SEED_MAX = SEED_BOUNDS.max_bound

    def __init__(self,
                 boosting_type="gbdt",
                 learning_rate=0.1,
                 n_estimators=20,
                 max_depth=0,
                 num_leaves=31,
                 min_child_samples=20,
                 n_jobs=-1,
                 random_state=None,
                 random_seed=0,
                 bagging_fraction=0.9,
                 bagging_freq=0,
                 **kwargs):
        random_seed = deprecate_arg("random_state", "random_seed",
                                    random_state, random_seed)

        parameters = {
            "boosting_type": boosting_type,
            "learning_rate": learning_rate,
            "n_estimators": n_estimators,
            "max_depth": max_depth,
            "num_leaves": num_leaves,
            "min_child_samples": min_child_samples,
            "n_jobs": n_jobs,
            "bagging_freq": bagging_freq,
            "bagging_fraction": bagging_fraction
        }
        parameters.update(kwargs)
        lg_parameters = copy.copy(parameters)
        # when boosting type is random forest (rf), LightGBM requires bagging_freq == 1 and  0 < bagging_fraction < 1.0
        if boosting_type == "rf":
            lg_parameters['bagging_freq'] = 1
        # when boosting type is goss, LightGBM requires bagging_fraction == 1
        elif boosting_type == "goss":
            lg_parameters['bagging_fraction'] = 1
        # avoid lightgbm warnings having to do with parameter aliases
        if lg_parameters['bagging_freq'] is not None or lg_parameters[
                'bagging_fraction'] is not None:
            lg_parameters.update({'subsample': None, 'subsample_freq': None})

        lgbm_error_msg = "LightGBM is not installed. Please install using `pip install lightgbm`."
        lgbm = import_or_raise("lightgbm", error_msg=lgbm_error_msg)
        self._ordinal_encoder = None

        lgbm_regressor = lgbm.sklearn.LGBMRegressor(random_state=random_seed,
                                                    **lg_parameters)

        super().__init__(parameters=parameters,
                         component_obj=lgbm_regressor,
                         random_seed=random_seed)

    def _encode_categories(self, X, fit=False):
        """Encodes each categorical feature using ordinal encoding."""
        X = infer_feature_types(X)
        cat_cols = list(X.select('category').columns)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        if fit:
            self.input_feature_names = list(X.columns)
        X_encoded = _rename_column_names_to_numeric(X)
        rename_cols_dict = dict(zip(X.columns, X_encoded.columns))
        cat_cols = [rename_cols_dict[col] for col in cat_cols]

        if len(cat_cols) == 0:
            return X_encoded
        if fit:
            self._ordinal_encoder = OrdinalEncoder()
            encoder_output = self._ordinal_encoder.fit_transform(
                X_encoded[cat_cols])
        else:
            encoder_output = self._ordinal_encoder.transform(
                X_encoded[cat_cols])
        X_encoded[cat_cols] = pd.DataFrame(encoder_output)
        X_encoded[cat_cols] = X_encoded[cat_cols].astype('category')
        return X_encoded

    def fit(self, X, y=None):
        X_encoded = self._encode_categories(X, fit=True)
        if y is not None:
            y = infer_feature_types(y)
            y = _convert_woodwork_types_wrapper(y.to_series())
        self._component_obj.fit(X_encoded, y)
        return self

    def predict(self, X):
        X_encoded = self._encode_categories(X)
        return super().predict(X_encoded)
import pytest
from skopt.space import Real, Integer, Categorical

from src.utilities import conf_to_params, bayesianMLP_param_space


@pytest.mark.parametrize(
    "c, expected", [({
        "name": "learning_rate",
        "type": "Real",
        "low": 0.1,
        "high": 0.2
    }, Real(low=0.1, high=0.2, prior='uniform', transform='identity')),
                    ({
                        "name": "num_hidden_layer",
                        "type": "Integer",
                        "low": 0,
                        "high": 3
                    }, Integer(low=0, high=3)),
                    ({
                        "name": "categorical",
                        "type": "Categorical",
                        "categories": [0, 1, 2]
                    }, Categorical([0, 1, 2], name="categorical"))])
def test_conf_to_params(c, expected):
    assert conf_to_params(c) == expected


# def test_bayesianMLP_param_space():
#     BOconfig = {
#         "nBayesianOptCall": 10,
Beispiel #22
0
def test_normalize():
    a = Real(2.0, 30.0, transform="normalize")
    for i in range(50):
        yield (check_limits, a.rvs(random_state=i), 2, 30)

    rng = np.random.RandomState(0)
    X = rng.randn(100)
    X = 28 * (X - X.min()) / (X.max() - X.min()) + 2

    # Check transformed values are in [0, 1]
    assert_true(np.all(a.transform(X) <= np.ones_like(X)))
    assert_true(np.all(np.zeros_like(X) <= a.transform(X)))

    # Check inverse transform
    assert_array_almost_equal(a.inverse_transform(a.transform(X)), X)

    # log-uniform prior
    a = Real(10**2.0, 10**4.0, prior="log-uniform", transform="normalize")
    for i in range(50):
        yield (check_limits, a.rvs(random_state=i), 10**2, 10**4)

    rng = np.random.RandomState(0)
    X = np.clip(10**3 * rng.randn(100), 10**2.0, 10**4.0)

    # Check transform
    assert_true(np.all(a.transform(X) <= np.ones_like(X)))
    assert_true(np.all(np.zeros_like(X) <= a.transform(X)))

    # Check inverse transform
    assert_array_almost_equal(a.inverse_transform(a.transform(X)), X)

    a = Integer(2, 30, transform="normalize")
    for i in range(50):
        yield (check_limits, a.rvs(random_state=i), 2, 30)
    assert_array_equal(a.transformed_bounds, (0, 1))

    X = rng.randint(2, 31)
    # Check transformed values are in [0, 1]
    assert_true(np.all(a.transform(X) <= np.ones_like(X)))
    assert_true(np.all(np.zeros_like(X) <= a.transform(X)))

    # Check inverse transform
    X_orig = a.inverse_transform(a.transform(X))
    assert_equal(X_orig.dtype, "int64")
    assert_array_equal(X_orig, X)
Beispiel #23
0
# parametric tuning for info-detection
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

from util import TPR_TNR
from util import Lymphography
from info_detection import InfoOutlierDetector

SPACE = [Real(0.01, 0.2, prior='uniform', name='gamma')]
data, labels = Lymphography()


@use_named_args(SPACE)
def objective(**params):
    global data, labels
    ic = InfoOutlierDetector(affinity='laplacian', **params)
    y_predict = ic.fit_predict(data)
    tpr, tnr = TPR_TNR(labels, y_predict)
    if (tpr < 0.9):
        return 10
    return 0.9 - tpr - tnr


if __name__ == '__main__':
    res_gp = gp_minimize(objective, SPACE, n_calls=40, random_state=0)
    print(res_gp)
    def param_search_and_cross_validation(
            num_estimators=5000,
            early_stopping_rounds=15,
            data_train=data_train,
            data_train__target=data_train__target,
            data_test=data_test,
            data_test__target=data_test__target,
            data_eval=data_eval,
            data_eval__target=data_eval__target,
            num_cv_folds=num_cv_folds):
        """
      Trains a sequence of weak, boosted learners on training data
      Stops after error no measured imcustomervements on prediction accuracy 
      after n stopping rounds

      Best params identified using log loss function via SK Opt library
 
      Function returns a series of best params on which to train a usable model on
    """
        """
      Additional params to consider if models overfit
    
      ## Learning Params
      early_stopping_rounds : will stop training if one metric of one validation data doesn’t imcustomerve in last early_stopping_round rounds

      ## IO Params
        max_bin : (default is 255, decrease the number to mitigate over fitting.  Risks drop in accuracy)
    """
        hyperparameters = [
            Integer(4, 96,
                    name='num_leaves'),  # max number of leaves in one tree
            Integer(3, 5,
                    name='max_depth'),  # max depth of an individual stump
            Real(
                2**-8, 2**-2, 'log-uniform', name='learning_rate'
            ),  # booster's learning rate, array of 50 numbers uniformly spaced in [ 1/(2^8), 1/(2^2) ]
            Integer(
                2, 96,
                name='min_data_in_leaf'),  # minimal number of data in one leaf
            Real(.5, 1.0, 'uniform', name='bagging_fraction'
                 ),  # will randomly select part of data without resampling,
            Real(
                0.5, 1.0, 'uniform', name='feature_fraction'
            ),  # will select n% of features w/o resampling before training each stump
            Real(0.3, 1.0, "uniform", name='colsample_bytree'
                 ),  # subsample ratio of columns when constructing each tree
            Integer(
                25, 150, name='max_bin'
            )  # max number of bins that feature values will be bucketed in
        ]

        clf = lgb.LGBMClassifier(
            n_estimators=num_estimators,  # number of boosted trees to build
            objective='binary',
            silent=False,
            importance_type=
            'gain',  # gain in _some metric_ when a feature is included
            seed=
            12759081,  # setting this, but underlying C++ seeds may overwrite
            num_threads=
            4,  # number of real CPUs available on the playground machine
            class_weight=
            'balanced'  # uses the values of y to automatically adjust weights inversely customerportional to class frequencies in the input data
        )

        # Per LGBM docs  https://scikit-optimize.github.io/#skopt.gp_minimize :
        # hyper params can be passed to a pre-defined fitted model's objective function via the following decorator
        @use_named_args(hyperparameters)
        def objective_fxn(**params):
            """
      Trains a series of models across 7 (hardcoded) folds of the training data
       & over the various pre-defined hyper params
      Leveraging SKLearn's `cross_val_score()` method for this
      """
            print('Training a model using the following params:')
            print(params)
            clf.set_params(**params)

            # Using cross val score() for now, but consider using cross_validate() for more info in a later iteration
            # https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation
            # Scoring docs also available at link above
            cv_score_scores__mean = -1.0 * cross_val_score(
                clf,  # model
                data_train,  # data to fit a model to
                data_train__target,  # target variable
                cv=num_cv_folds,  # number of folds to iterate over
                scoring='neg_log_loss',
                fit_params={
                    'early_stopping_rounds': early_stopping_rounds,
                    'eval_set': (data_eval, data_eval__target),
                    'verbose': True
                }).mean()
            print(
                'Cross Validation Complete on Hyperparam Permutation\nMean Log Loss: {0}'
                .format(cv_score_scores__mean))
            print('Logging results')

            num_leaves.append(params['num_leaves'])
            max_depth.append(params['max_depth'])
            learning_rate.append(params['learning_rate'])
            min_data_in_leaf.append(params['min_data_in_leaf'])
            bagging_fraction.append(params['bagging_fraction'])
            feature_fraction.append(params['feature_fraction'])
            colsample_bytree.append(params['colsample_bytree'])
            max_bins.append(params['max_bin'])
            cross_val_score__mean.append(cv_score_scores__mean)
            table_record_udpate_timestamp.append(datetime.now())

            return (cv_score_scores__mean)

        # Leveraging SK Opt's Gaussian process Bayesian Optimization `gp_minimize()` method to apcustomerximate the 'best params' to
        # use in a final model
        # https://scikit-optimize.github.io/#skopt.gp_minimize
        # Method returns an OptimizeResult object.  See link above for full docs on all the data returned
        print(
            'Starting Cross Validation via Random Search, with early stopping in place'
        )

        gaussian_process_results_array = gp_minimize(
            objective_fxn,
            hyperparameters,  # list of search space dimensions
            n_calls=30,  # number of calls to make against the objective function
            random_state=
            215235  # seeding the optimizer for recustomerducible results
        )

        return (gaussian_process_results_array)
Beispiel #25
0
    def stoploss_space() -> List[Dimension]:

        return [
            Real(-0.05, -0.01, name='stoploss'),
        ]
Beispiel #26
0
nr_fold = 5
random_state = 42

train = pd.read_csv('../input/train_input_2yaM34J.csv', parse_dates=['Date'])
y = pd.read_csv('../input/train_output_2kCtjpF.csv')['Score']

train = get_dates(train)

train = train.drop(['ID', 'Date'], axis=1)

space = [
    Integer(3, 200, name='max_depth'),
    Integer(2, 2056, name='num_leaves'),
    Integer(3, 200, name='min_child_samples'),
    Real(0.2, 0.90, name='subsample'),
    Real(0.2, 0.90, name='colsample_bytree'),
    Real(0.001, 0.2, name='learning_rate'),
    Real(0.0001, 100, name='reg_alpha'),
    Real(0.0001, 100, name='reg_lambda'),
    Integer(2, 1000, name='min_child_weight'),
    Real(0.001, 1, name='min_split_gain'),
    Categorical(['gbdt', 'dart', 'goss'], name='boosting_type')
]


def objective(values):
    params = {
        'device': 'cpu',
        'objective': 'multiclass',
        'num_class': 5,
Beispiel #27
0
# -3.0 -> 0.001
def pow10map(x):
    return 10.0**x


def pow2intmap(x):
    return int(2.0**x)


def nop(x):
    return x


nnparams = {
    # up to 1024 neurons
    'hidden_layer_sizes': (Real(1.0, 10.0), pow2intmap),
    'activation': (Categorical(['identity', 'logistic', 'tanh', 'relu']), nop),
    'solver': (Categorical(['lbfgs', 'sgd', 'adam']), nop),
    'alpha': (Real(-5.0, -1), pow10map),
    'batch_size': (Real(5.0, 10.0), pow2intmap),
    'learning_rate': (Categorical(['constant', 'invscaling',
                                   'adaptive']), nop),
    'max_iter': (Real(5.0, 8.0), pow2intmap),
    'learning_rate_init': (Real(-5.0, -1), pow10map),
    'power_t': (Real(0.01, 0.99), nop),
    'momentum': (Real(0.1, 0.98), nop),
    'nesterovs_momentum': (Categorical([True, False]), nop),
    'beta_1': (Real(0.1, 0.98), nop),
    'beta_2': (Real(0.1, 0.9999999), nop),
}
Beispiel #28
0

@pytest.mark.fast_test
@pytest.mark.parametrize("dimensions, normalizations", [
    (((1, 3), (1., 3.)), ('normalize', 'normalize')),
    (((1, 3), ('a', 'b', 'c')), ('normalize', 'onehot')),
])
def test_normalize_dimensions(dimensions, normalizations):
    space = normalize_dimensions(dimensions)
    for dimension, normalization in zip(space, normalizations):
        assert dimension.transform_ == normalization


@pytest.mark.fast_test
@pytest.mark.parametrize(
    "dimension, name", [(Real(1, 2, name="learning rate"), "learning rate"),
                        (Integer(1, 100, name="no of trees"), "no of trees"),
                        (Categorical(["red, blue"], name="colors"), "colors")])
def test_normalize_dimensions(dimension, name):
    space = normalize_dimensions([dimension])
    assert space.dimensions[0].name == name


@pytest.mark.fast_test
def test_use_named_args():
    """
    Test the function wrapper @use_named_args which is used
    for wrapping an objective function with named args so it
    can be called by the optimizers which only pass a single
    list as the arg.
Beispiel #29
0
                        verbose=0,
                        class_weight=classWeight,
                        callbacks=[early_stopping, model_checkpoint],
                        validation_split=0.25)
    Y_predict = model.predict(X_test)
    fpr, tpr, thresholds = roc_curve(Y_test, Y_predict)
    roc_auc = auc(fpr, tpr)
    return roc_auc
    #best_acc = max(history.history['val_acc'])
    #return best_acc


space = [
    Integer(2, 4, name='hidden_layers'),
    Integer(32, 256, name='initial_nodes'),
    Real(10**-5, 10**-1, "log-uniform", name='l2_lambda'),
    Real(0.15, 0.5, name='dropout'),
    Integer(256, 4096, name='batch_size'),
    Real(10**-5, 10**-1, "log-uniform", name='learning_rate'),
]


@use_named_args(space)
def objective(**X):
    global best_auc
    global best_config
    print("New configuration: {}".format(X))

    model = build_custom_model(num_hiddens=X['hidden_layers'],
                               initial_node=X['initial_nodes'],
                               dropout=X['dropout'],
Beispiel #30
0
"""
Code for optimizing the implicit ALS algorithm.
"""

from skopt.space import Integer, Real
from lenskit.algorithms.als import ImplicitMF

dimensions = [Integer(5, 500), Real(1.0e-6, 5), Real(1.0e-6, 5), Real(1, 50)]


def instantiate(opts, implicit):
    feats, ureg, ireg, weight = opts
    return ImplicitMF(feats, reg=(ureg, ireg), weight=weight)


def default(implicit):
    return ImplicitMF(50)


update = None

sweep_points = [ImplicitMF(nf) for nf in range(25, 250, 25)]
sweep_attrs = ['features']
Beispiel #31
0
def test_real():
    a = Real(1, 25)
    for i in range(50):
        r = a.rvs(random_state=i)
        check_limits(r, 1, 25)
        assert r in a

    random_values = a.rvs(random_state=0, n_samples=10)
    assert len(random_values) == 10
    assert_array_equal(a.transform(random_values), random_values)
    assert_array_equal(a.inverse_transform(random_values), random_values)

    log_uniform = Real(10**-5, 10**5, prior="log-uniform")
    assert log_uniform != Real(10**-5, 10**5)
    for i in range(50):
        random_val = log_uniform.rvs(random_state=i)
        check_limits(random_val, 10**-5, 10**5)
    random_values = log_uniform.rvs(random_state=0, n_samples=10)
    assert len(random_values) == 10
    transformed_vals = log_uniform.transform(random_values)
    assert_array_equal(transformed_vals, np.log10(random_values))
    assert_array_equal(log_uniform.inverse_transform(transformed_vals),
                       random_values)