Ejemplo n.º 1
0
    def __init__(self, low, high, prior="uniform", transform=None):
        """Search space dimension that can take on any real value.

        Parameters
        ----------
        * `low` [float]:
            Lower bound (inclusive).

        * `high` [float]:
            Upper bound (exclusive).

        * `prior` ["uniform" or "log-uniform", default="uniform"]:
            Distribution to use when sampling random points for this dimension.
            - If `"uniform"`, points are sampled uniformly between the lower
              and upper bounds.
            - If `"log-uniform"`, points are sampled uniformly between
              `log10(lower)` and `log10(upper)`.`

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.
        """
        self.low = low
        self.high = high
        self.prior = prior

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if self.transform_ not in ["normalize", "identity"]:
            raise ValueError(
                "transform should be 'normalize' or 'identity' got %s" %
                self.transform_)

        # Define _rvs and transformer spaces.
        # XXX: The _rvs is for sampling in the transformed space.
        # The rvs on Dimension calls inverse_transform on the points sampled
        # using _rvs
        if self.transform_ == "normalize":
            self._rvs = uniform(0, 1)
            if self.prior == "uniform":
                self.transformer = Pipeline([Identity(), Normalize(low, high)])
            else:
                self.transformer = Pipeline(
                    [Log10(),
                     Normalize(np.log10(low), np.log10(high))])
        else:
            if self.prior == "uniform":
                self._rvs = uniform(self.low, self.high - self.low)
                self.transformer = Identity()
            else:
                self._rvs = uniform(np.log10(self.low),
                                    np.log10(self.high) - np.log10(self.low))
                self.transformer = Log10()
Ejemplo n.º 2
0
    def __init__(self, low, high, prior="uniform"):
        """Search space dimension that can take on any real value.

        Parameters
        ----------
        * `low` [float]:
            Lower bound (inclusive).

        * `high` [float]:
            Upper bound (exclusive).

        * `prior` ["uniform" or "log-uniform", default="uniform"]:
            Distribution to use when sampling random points for this dimension.
            - If `"uniform"`, points are sampled uniformly between the lower
              and upper bounds.
            - If `"log-uniform"`, points are sampled uniformly between
              `log10(lower)` and `log10(upper)`.`
        """
        self._low = low
        self._high = high
        self.prior = prior

        if prior == "uniform":
            self._rvs = uniform(self._low, self._high - self._low)
            self.transformer = _Identity()

        elif prior == "log-uniform":
            self._rvs = uniform(np.log10(self._low),
                                np.log10(self._high) - np.log10(self._low))
            self.transformer = _Log10()

        else:
            raise ValueError(
                "Prior should be either 'uniform' or 'log-uniform', "
                "got '%s'." % self._rvs)
Ejemplo n.º 3
0
def random_search():
    from scipy.stats.distributions import uniform
    from sklearn.model_selection import ParameterSampler
    param_grid = {
        'alpha': uniform(0.1,
                         1.5),  # np.linspace(0.1, 1, 10), range(1, 10, 2), #
        'beta': uniform(0.1, 1.5),  # np.linspace(0.05, 0.25, 5),[0.05], #
    }
    param_list = list(ParameterSampler(param_grid, n_iter=20))
    return [dict((k, round(v, 1)) for (k, v) in d.items()) for d in param_list]
Ejemplo n.º 4
0
def bm25_parameter_space(n_trials):
    rng = np.random.RandomState(42)

    return ParameterSampler(
        dict(tf_method=["binary", "raw", "freq", "log_norm", "double_norm"]
             , idf_method=["smooth", "probabilistic"],
             drop_stopwords=[True, False],
             drop_suffix=[True, False], drop_punct=[True, False],
             lowercase=[True, False],
             k1=uniform(1.2, 2.0),
             b=uniform(0.5, 0.8), delta=uniform(0, 2)), n_iter=n_trials, random_state=rng)
Ejemplo n.º 5
0
def test_uniform():
    """
    Testing uniform distribution
    """
    np.random.seed(12)
    a = np.sqrt(3) * sigma
    data_uniform = uniform(-a, a).rvs(N)
    x = np.linspace(-vmax, vmax, 10000)
    fapprox = data_to_pdf(data_uniform, x)
    ftrue = uniform(-a, a).pdf(x)
    error = relative_L2_error(fapprox, ftrue, x)
    assert_almost_equal(error, 0, decimal=1)
Ejemplo n.º 6
0
def test_uniform():
    """
    Testing uniform distribution
    """
    np.random.seed(12)
    a = np.sqrt(3) * sigma
    data_uniform = uniform(-a, a).rvs(N)
    x = np.linspace(-vmax, vmax, 10000)
    fapprox = data_to_pdf(data_uniform, x)
    ftrue = uniform(-a, a).pdf(x)
    error = relative_L2_error(fapprox, ftrue, x)
    assert_almost_equal(error, 0, decimal=1)
Ejemplo n.º 7
0
def random_search():
    param_grid = {
        'noise_factor_cafe': uniform(3, 1),
        'noise_factor_car': uniform(15, 2),
        'noise_factor_white': uniform(0.05, 0.02),
        'noise_file': [0, 1, 2],
        'speed_factor': uniform(0.8, 0.4),
    }
    param_list = list(ParameterSampler(param_grid, n_iter=10))
    return [
        dict((k, round(v, 4) if not isinstance(v, int) else v)
             for (k, v) in d.items()) for d in param_list
    ]
Ejemplo n.º 8
0
    def __init__(self, low, high, prior="uniform", transform=None):
        """Search space dimension that can take on any real value.

        Parameters
        ----------
        * `low` [float]:
            Lower bound (inclusive).

        * `high` [float]:
            Upper bound (exclusive).

        * `prior` ["uniform" or "log-uniform", default="uniform"]:
            Distribution to use when sampling random points for this dimension.
            - If `"uniform"`, points are sampled uniformly between the lower
              and upper bounds.
            - If `"log-uniform"`, points are sampled uniformly between
              `log10(lower)` and `log10(upper)`.`

        * `transform` [None or "normalize", optional]:
            If `transform=normalize`, calling `transform` on X scales X to
            [0, 1]
        """
        self.low = low
        self.high = high
        self.prior = prior
        self.transform_ = transform

        if self.transform_ and self.transform_ != "normalize":
            raise ValueError("transform should be normalize, got %s" %
                             self.transform_)

        # Define _rvs and transformer spaces.
        # XXX: The _rvs is for sampling in the transformed space.
        # The rvs on Dimension calls inverse_transform on the points sampled
        # using _rvs
        if self.transform_ == "normalize":
            self._rvs = uniform(0, 1)
            if self.prior == "uniform":
                self.transformer = Pipeline([Identity(), Normalize(low, high)])
            else:
                self.transformer = Pipeline(
                    [Log10(),
                     Normalize(np.log10(low), np.log10(high))])
        else:
            if self.prior == "uniform":
                self._rvs = uniform(self.low, self.high - self.low)
                self.transformer = Identity()
            else:
                self._rvs = uniform(np.log10(self.low),
                                    np.log10(self.high) - np.log10(self.low))
                self.transformer = Log10()
Ejemplo n.º 9
0
def _xgboost_hyperband_model(task, numeric_features, categoric_features,
                             learning_rate):
    param_space = {
        'max_depth': randint(2, 11),
        'min_child_weight': randint(1, 11),
        'subsample': uniform(0.5, 0.5),
        'colsample_bytree': uniform(0.5, 0.5),
        'colsample_bylevel': uniform(0.5, 0.5),
        'gamma': uniform(0, 1),
        'reg_alpha': uniform(0, 1),
        'reg_lambda': uniform(0, 10),
        'base_score': uniform(0.1, 0.9),
        'scale_pos_weight': uniform(0.1, 9.9)
    }

    model = XGBClassifier(learning_rate=learning_rate) \
        if task == 'classification' else XGBRegressor(learning_rate=learning_rate)

    return make_pipeline(
        make_union(
            make_pipeline(ColumnsSelector(categoric_features), FillNaN('nan'),
                          ColumnApplier(TolerantLabelEncoder())),
            make_pipeline(ColumnsSelector(numeric_features),
                          Imputer(strategy='mean'), StandardScaler())),
        Hyperband(model, feat_space=param_space, task=task))
Ejemplo n.º 10
0
    def add_set_value_random_uniform(self, variable, means, scale ):
        """ Add a 'Set Value' macro command where the value is chosen from a
        random uniform distribution.

        Parameters
        ----------
        variable: string
            An AnyScript variable or a list of AnyScript variables.
        means: int,float, numpy.ndarray
            The mean value of the random number
        scale: The range of the random variable [ means-scale/2 , means+scale/2]


        Examples
        --------
            Set variable across different macros

        >>> seed(1)
        >>> mg = MonteCarloMacroGenerator(number_of_macros=5)
        >>> mg.add_set_value_random_uniform('Main.Study.myvar', means = 2, scale = 0.1)
        >>> for line in mg.generate_macros(): pprint(line)
        ['classoperation Main.Study.myvar "Set Value" --value="2"']
        ['classoperation Main.Study.myvar "Set Value" --value="1.99170220047"']
        ['classoperation Main.Study.myvar "Set Value" --value="2.02203244934"']
        ['classoperation Main.Study.myvar "Set Value" --value="1.95001143748"']
        ['classoperation Main.Study.myvar "Set Value" --value="1.98023325726"']

        """
        dist = distributions.uniform(means-scale/2.0,scale)
        self.add_set_value_random(variable,dist)
Ejemplo n.º 11
0
def _compute_thresh(this_data, ch_type, cv=10):
    """ Compute the rejection threshold for one channel.

    Parameters
    ----------
    this_data: array (n_epochs, n_times)
        Data for one channel.
    ch_type: str
        'mag', 'grad' or 'eeg'.
    cv : iterator
        Iterator for cross-validation.
    """
    est = ChannelAutoReject()

    Limits = namedtuple('Limits', 'low high')
    limits = dict(eeg=Limits(low=20e-7, high=400e-6),
                  grad=Limits(low=400e-13, high=20000e-13),
                  mag=Limits(low=400e-15, high=20000e-15))

    param_dist = dict(thresh=uniform(limits[ch_type].low,
                                     limits[ch_type].high))
    rs = RandomizedSearchCV(est,  # XXX : is random really better than grid?
                            param_distributions=param_dist,
                            n_iter=20, cv=cv)
    rs.fit(this_data)
    best_thresh = rs.best_estimator_.thresh

    return best_thresh
Ejemplo n.º 12
0
 def sampleLHS(self):
     sampledParams = lhs(2, samples=self.n_pf)
     lb = np.array([0, 0])
     width = np.array([self.gloEnv.maxRate, self.gloEnv.maxRate])
     for i in range(2):
         sampledParams[:, i] = uniform(loc=lb[i], scale=width[i]).ppf(sampledParams[:, i])
     return sampledParams
Ejemplo n.º 13
0
    def test_with_randomizedsearchcv(self):
        import numpy as np
        from scipy.stats.distributions import uniform
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.model_selection import RandomizedSearchCV

        lr = LogisticRegression()
        ranges, cat_idx = lr.get_param_ranges()
        # specify parameters and distributions to sample from
        # the loguniform distribution needs to be taken care of properly
        param_dist = {
            "solver": ranges["solver"],
            "C": uniform(0.03125, np.log(32768))
        }
        # run randomized search
        n_iter_search = 5
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            random_search = RandomizedSearchCV(
                lr,
                param_distributions=param_dist,
                n_iter=n_iter_search,
                cv=5,
                scoring=make_scorer(accuracy_score),
            )
            iris = load_iris()
            random_search.fit(iris.data, iris.target)
Ejemplo n.º 14
0
    def __init__(self, low: int, high: int, transform=None, name=None):
        if (type(low) != int) or (type(high) != int):
            raise TypeError("low, high have to be int")

        if low % 2 != 0:
            raise ValueError("low has to be even int")

        if high % 2 != 0:
            raise ValueError("high has to be even int")

        if high <= low:
            raise ValueError("the lower bound {} has to be less than the"
                             " upper bound {}".format(low, high))
        super().__init__(low, high, transform=transform, name=name)

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform == "normalize":
            self._rvs = uniform(0, 1)
            self.transformer = EvenNormalize(low, high, is_int=True)
        else:
            self._rvs = randint(self.low / 2, (self.high) / 2 + 1)
            self.transformer = EvenIdentity()
Ejemplo n.º 15
0
    def add_set_value_random_uniform(self, variable, means, scale):
        """ Add a 'Set Value' macro command where the value is chosen from a
        random uniform distribution.

        Parameters
        ----------
        variable: string
            An AnyScript variable or a list of AnyScript variables.
        means: int,float, numpy.ndarray
            The mean value of the random number
        scale: The range of the random variable [ means-scale/2 , means+scale/2]


        Examples
        --------
            Set variable across different macros

        >>> seed(1)
        >>> mg = MonteCarloMacroGenerator(number_of_macros=5)
        >>> mg.add_set_value_random_uniform('Main.Study.myvar', means = 2, scale = 0.1)
        >>> for line in mg.generate_macros(): pprint(line)
        ['classoperation Main.Study.myvar "Set Value" --value="2"']
        ['classoperation Main.Study.myvar "Set Value" --value="1.99170220047"']
        ['classoperation Main.Study.myvar "Set Value" --value="2.02203244934"']
        ['classoperation Main.Study.myvar "Set Value" --value="1.95001143748"']
        ['classoperation Main.Study.myvar "Set Value" --value="1.98023325726"']

        """
        dist = distributions.uniform(means - scale / 2.0, scale)
        self.add_set_value_random(variable, dist)
Ejemplo n.º 16
0
def _compute_thresh(this_data, ch_type, cv=10):
    """ Compute the rejection threshold for one channel.

    Parameters
    ----------
    this_data: array (n_epochs, n_times)
        Data for one channel.
    ch_type: str
        'mag', 'grad' or 'eeg'.
    cv : iterator
        Iterator for cross-validation.
    """
    est = ChannelAutoReject()

    Limits = namedtuple('Limits', 'low high')
    limits = dict(eeg=Limits(low=20e-7, high=400e-6),
                  grad=Limits(low=400e-13, high=20000e-13),
                  mag=Limits(low=400e-15, high=20000e-15))

    param_dist = dict(
        thresh=uniform(limits[ch_type].low, limits[ch_type].high))
    rs = RandomizedSearchCV(
        est,  # XXX : is random really better than grid?
        param_distributions=param_dist,
        n_iter=20,
        cv=cv)
    rs.fit(this_data)
    best_thresh = rs.best_estimator_.thresh

    return best_thresh
Ejemplo n.º 17
0
 def test_with_randomizedsearchcv(self):
     from sklearn.model_selection import RandomizedSearchCV
     from sklearn.datasets import load_iris
     from sklearn.metrics import accuracy_score, make_scorer
     from scipy.stats.distributions import uniform
     import numpy as np
     lr = LogisticRegression()
     parameters = {'solver': ('liblinear', 'lbfgs'), 'penalty': ['l2']}
     ranges, cat_idx = lr.get_param_ranges()
     min_C, max_C, default_C = ranges['C']
     # specify parameters and distributions to sample from
     #the loguniform distribution needs to be taken care of properly
     param_dist = {
         "solver": ranges['solver'],
         "C": uniform(min_C, np.log(max_C))
     }
     # run randomized search
     n_iter_search = 5
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         random_search = RandomizedSearchCV(
             lr,
             param_distributions=param_dist,
             n_iter=n_iter_search,
             cv=5,
             scoring=make_scorer(accuracy_score))
         iris = load_iris()
         random_search.fit(iris.data, iris.target)
Ejemplo n.º 18
0
def LHS(n, loc, upc, dist):
    """
    Latin hypercube sampling.

    Parameters:
        n:    integer; size of desired sampling
        loc:  scalar; lower bound of desired distribution
        upc:  scalar; upper bound of desired distribution
        dist: string; either 'uniform' or 'normal'
    Returns:
        lhs: 1D array
    """

    lower_limits = np.arange(0, n) / n
    higher_limits = np.arange(1, n + 1) / n

    points = np.random.uniform(low=lower_limits, high=higher_limits, size=n)
    np.random.shuffle(points)

    scale = upc - loc
    if dist == 'uniform':
        rv = distributions.uniform(loc=loc, scale=scale)
    elif dist == 'normal':
        rv = distributions.norm(loc=loc, scale=scale)

    lhs = rv.ppf(points)

    return lhs
Ejemplo n.º 19
0
def init_distributions(pkey, kind='dpm', nrvs=25, tb=.65, force_normal=False):
    """ sample random parameter sets to explore global minima (called by
    Optimizer method __hop_around__())
    """
    loc, scale = get_theta_params(pkey, kind=kind)
    bounds = get_bounds(kind=kind)[pkey]
    lower = np.min(bounds)
    upper = np.max(bounds)
    normal_params = ['a', 'tr', 'v', 'vd', 'ssv', 'sso', 'xb', 'z', 'Beta']
    uniform_params = ['vi', 'BX', 'AX', 'PX', 'si']

    # init and freeze dist shape
    if pkey in normal_params:
        dist = norm(loc, scale)
    # elif pkey in gamma_params:
    #     dist = gamma(1.0, loc, scale)
    elif pkey in uniform_params:
        dist = uniform(loc, scale)
    # generate random variates
    rvinits = dist.rvs(nrvs)
    while rvinits.min() < lower:
        # apply lower limit
        ix = rvinits.argmin()
        rvinits[ix] = dist.rvs()
    while rvinits.max() > upper:
        # apply upper limit
        ix = rvinits.argmax()
        rvinits[ix] = dist.rvs()
    if pkey =='tr':
        rvinits = np.abs(rvinits)
    rvinits[rvinits<lower] = lower
    rvinits[rvinits>upper] = upper
    return rvinits
Ejemplo n.º 20
0
def UniformPrior(low=0., high=1.):
    """
    Constant prior over a finite range.

    low, high : min, max of range

    """
    return Prior(distributions.uniform(loc=low, scale=(high-low)))
Ejemplo n.º 21
0
def gen_sample(loc, scale, sample, distribution_type):
    if distribution_type == NORMAL_DISTRIBUTION_TYPE:
        return norm(loc=loc, scale=scale).ppf(sample)
    elif distribution_type == UNIFORM_DISTRIBUTION_TYPE:
        return uniform(loc=loc, scale=scale).ppf(sample)
    else:
        raise Exception(
            "Invalid distribution type: {}".format(distribution_type))
Ejemplo n.º 22
0
 def rvs(self, size=1, random_state=None):
     uniform_values = uniform(loc=self.loc, scale=self.scale)
     exp_values = np.power(
         self.base, uniform_values.rvs(size=size,
                                       random_state=random_state))
     if len(exp_values) == 1:
         return exp_values[0]
     else:
         return exp_values
Ejemplo n.º 23
0
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"], "C": distributions.uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)
Ejemplo n.º 24
0
def main():
    print('Programming starting ...')
    
    args= arg_parser()
    torch.manual_seed(args.seed)
    train_loader,test_loader = mnist_loader(root=args.data,train_batch_size=args.batch_size,
                 valid_batch_size=args.batch_size,
                 train_shuffle=True,
                 valid_shuffle=False)

    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    
    model =Net(num_classes=10)
    checkpointer=CheckPoints(model,'./data/checkpoint')
    checkpointer.load_checkpoint_from_filename('model-best.chkpt')
    if args.use_cuda:
        model.cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=0.1)
    trainer = Trainer(model,optimizer,train_loader,test_loader,args=args)
    

    #param_grid = {'lr':[0.01,0.001,0.0025,0.005,0.0075,0.0001,0.00001],'momentum':uniform(0.5,0.45),'weight_decay':[1e-3,1e-5,1e-7]}
    param_grid = {'lr':[1e-4,1e-5,1e-6],'momentum':uniform(0.,0.2),'weight_decay':[1e-5,1e-7,1e-8]}
    n_iters=500
    param_list = list(ParameterSampler(param_grid, n_iter=n_iters))

    train_loss_epochs=np.zeros((n_iters,))
    train_acc_epochs=np.zeros((n_iters,))
    test_loss_epochs=np.zeros((n_iters,))
    test_acc_epochs=np.zeros((n_iters,))
    i=0
    for parm in param_list:
        args.lr=parm['lr']
        args.momentum=parm['momentum']
        args.weight_decay=parm['weight_decay']
        trainer = Trainer(model,optimizer,train_loader,test_loader,args=args)
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum)
        #scheduler.step()
        train_loss, train_acc=trainer.train(i)
        train_loss_epochs[i]=train_loss
        train_acc_epochs[i]=train_acc
        trainer.print_msg('train',i,train_loss,train_acc)
        test_loss, test_acc=trainer.validate(i)
        test_loss_epochs[i]=test_loss
        test_acc_epochs[i]=test_acc
        trainer.print_msg('test',i,test_loss,test_acc)
        is_best=checkpointer.save_checkpoint(i,train_loss,train_acc,test_loss,test_acc,save_best=True)
        if not is_best:
            checkpointer.load_checkpoint_from_filename('model-best.chkpt')
        i+=1
    
   

    data ={'train_loss':train_loss_epochs,'train_acc':train_acc_epochs,'test_loss':test_loss_epochs,'test_acc':test_acc_epochs}
    torch.save(data,'./data/Template/data.pt')
    print('Finished...')
Ejemplo n.º 25
0
def _compute_thresh(this_data,
                    thresh_range,
                    method='bayesian_optimization',
                    cv=10,
                    random_state=None):
    """ Compute the rejection threshold for one channel.

    Parameters
    ----------
    this_data: array (n_epochs, n_times)
        Data for one channel.
    thresh_range : tuple
        The range (low, high) of thresholds over which to optimize.
    method : str
        'bayesian_optimization' or 'random_search'
    cv : iterator
        Iterator for cross-validation.
    random_state : int seed, RandomState instance, or None (default)
        The seed of the pseudo random number generator to use.

    Returns
    -------
    rs : instance of RandomizedSearchCV
        The RandomizedSearchCV object.

    Notes
    -----
    For method='random_search', the random_state parameter gives deterministic
    results only for scipy versions >= 0.16. This is why we recommend using
    autoreject with scipy version 0.16 or greater.
    """
    est = _ChannelAutoReject()

    if method == 'random_search':
        param_dist = dict(thresh=uniform(thresh_range[0], thresh_range[1]))
        rs = RandomizedSearchCV(est,
                                param_distributions=param_dist,
                                n_iter=20,
                                cv=cv,
                                random_state=random_state)
        rs.fit(this_data)
    elif method == 'bayesian_optimization':
        from skopt import gp_minimize
        from sklearn.cross_validation import cross_val_score

        def objective(thresh):
            est.set_params(thresh=thresh)
            return -np.mean(cross_val_score(est, this_data, cv=cv))

        space = [(thresh_range[0], thresh_range[1])]
        rs = gp_minimize(objective,
                         space,
                         n_calls=50,
                         random_state=random_state)

    return rs
def test_param_sampler():
    # test basic properties of param sampler
    param_distributions = {"kernel": ["rbf", "linear"],
                           "C": distributions.uniform(0, 1)}
    sampler = ParameterSampler(param_distributions=param_distributions,
                               n_iter=10, random_state=0)
    samples = [x for x in sampler]
    assert_equal(len(samples), 10)
    for sample in samples:
        assert_true(sample["kernel"] in ["rbf", "linear"])
        assert_true(0 <= sample["C"] <= 1)
Ejemplo n.º 27
0
def _xgboost_gridsearch_model(
    task,
    numeric_features,
    categoric_features,
    learning_rate,
    use_dask,
    n_iter,
    scoring,
):
    param_space = {
        'clf__max_depth': randint(2, 11),
        'clf__min_child_weight': randint(1, 11),
        'clf__subsample': uniform(0.5, 0.5),
        'clf__colsample_bytree': uniform(0.5, 0.5),
        'clf__colsample_bylevel': uniform(0.5, 0.5),
        'clf__gamma': uniform(0, 1),
        'clf__reg_alpha': uniform(0, 1),
        'clf__reg_lambda': uniform(0, 10),
        'clf__base_score': uniform(0.1, 0.9),
        'clf__scale_pos_weight': uniform(0.1, 9.9),
    }

    model = (xgbsk.XGBClassifier(learning_rate=learning_rate)
             if task == 'classification' else xgbsk.XGBRegressor(
                 learning_rate=learning_rate))

    pipe = Pipeline([
        (
            'preprocessing',
            simple_proc_for_tree_algoritms(numeric_features,
                                           categoric_features),
        ),
        ('clf', model),
    ])

    if use_dask:
        from dask_ml.model_selection import RandomizedSearchCV

        return RandomizedSearchCV(pipe,
                                  param_space,
                                  n_iter=n_iter,
                                  scoring=scoring,
                                  cv=5)
    else:
        from sklearn.model_selection import RandomizedSearchCV

        return RandomizedSearchCV(pipe,
                                  param_space,
                                  n_iter=n_iter,
                                  scoring=scoring,
                                  cv=5)
    def _make_distribution(self) -> rv_generic:
        """Build a distribution to randomly sample points within the space

        Returns
        -------
        rv_generic
            `uniform` distribution between 0 and 1 if :attr:`transform_` == "normalize". Else, a
            `randint` distribution between :attr:`low` and (:attr:`high` + 1)"""
        if self.transform_ == "normalize":
            return uniform(0, 1)
        else:
            return randint(self.low, self.high + 1)
Ejemplo n.º 29
0
def _mlp_gridsearch_model(
    task,
    numeric_features,
    categoric_features,
    learning_rate,
    use_dask,
    n_iter,
    scoring,
):
    param_space = {
        'clf__hidden_layer_sizes': [
            (24, ),
            (12, 12),
            (6, 6, 6, 6),
            (4, 4, 4, 4, 4, 4),
            (12, 6, 3, 3),
        ],
        'clf__activation': ['relu', 'logistic', 'tanh'],
        'clf__batch_size': [16, 32, 64, 128, 256, 512],
        'clf__alpha':
        uniform(0.0001, 0.9),
        'clf__learning_rate': ['constant', 'adaptive'],
    }

    model = (MLPClassifier(learning_rate_init=learning_rate)
             if task == 'classification' else MLPRegressor(
                 learning_rate_init=learning_rate))

    pipe = Pipeline([
        (
            'preprocessing',
            simple_proc_for_linear_algoritms(numeric_features,
                                             categoric_features),
        ),
        ('clf', model),
    ])

    if use_dask:
        from dask_ml.model_selection import RandomizedSearchCV

        return RandomizedSearchCV(pipe,
                                  param_space,
                                  n_iter=n_iter,
                                  scoring=scoring,
                                  cv=5)
    else:
        from sklearn.model_selection import RandomizedSearchCV

        return RandomizedSearchCV(pipe,
                                  param_space,
                                  n_iter=n_iter,
                                  scoring=scoring,
                                  cv=5)
Ejemplo n.º 30
0
def _lgbm_hyperband_model(task,
                          numeric_features,
                          categoric_features,
                          learning_rate=0.08):
    param_space = {
        'num_leaves': randint(3, 99),
        'max_depth': randint(2, 11),
        'subsample': uniform(0.5, 0.5),
        'colsample_bytree': uniform(0.5, 0.5),
        'reg_alpha': uniform(0, 1),
        'reg_lambda': uniform(0, 10),
        'max_bin': randint(100, 400),
        'min_child_weight': randint(1, 10),
        'min_child_samples': randint(1, 11)
    }

    model = ContinuableLGBMClassifier(learning_rate=learning_rate) \
        if task == 'classification' else ContinuableLGBMRegressor(learning_rate=learning_rate)

    return make_pipeline(
        simple_proc_for_tree_algoritms(numeric_features, categoric_features),
        Hyperband(model, feat_space=param_space, task=task))
Ejemplo n.º 31
0
def lexrank_parameter_space(n_trials):
    rng = np.random.RandomState(42)

    return ParameterSampler(dict(
        tf_method=["binary", "raw", "freq", "log_norm", "double_norm"],
        idf_method=["smooth", "probabilistic"],
        drop_stopwords=[True, False],
        drop_suffix=[True, False],
        drop_punct=[True, False],
        lowercase=[True, False],
        threshold=uniform(0, 1)),
                            n_iter=n_trials,
                            random_state=rng)
Ejemplo n.º 32
0
    def __init__(self, low, high, prior="uniform"):
        """Search space dimension that can take on any real value.

        Parameters
        ----------
        * `low` [float]:
            Lower bound (inclusive).

        * `high` [float]:
            Upper bound (exclusive).

        * `prior` ["uniform" or "log-uniform", default="uniform"]:
            Distribution to use when sampling random points for this dimension.
            - If `"uniform"`, points are sampled uniformly between the lower
              and upper bounds.
            - If `"log-uniform"`, points are sampled uniformly between
              `log10(lower)` and `log10(upper)`.`
        """
        self._low = low
        self._high = high
        self.prior = prior

        if prior == "uniform":
            self._rvs = uniform(self._low, self._high - self._low)
            self.transformer = _Identity()

        elif prior == "log-uniform":
            self._rvs = uniform(
                np.log10(self._low),
                np.log10(self._high) - np.log10(self._low))
            self.transformer = _Log10()

        else:
            raise ValueError(
                "Prior should be either 'uniform' or 'log-uniform', "
                "got '%s'." % self._rvs)
Ejemplo n.º 33
0
    def add_set_value_LHS_uniform(self, variable, loc, scale ) :
        """ Add a 'Set Value' macro command where the values are uniformly
        chosen from the  interval [loc - loc + scale]
        using a Latin Hyper Cube sampler.

        Parameters
        ----------
        variable: string
            An AnyScript variable or a list of AnyScript variables.
        loc: int,float, numpy.ndarray
            The start of the interval for uniform sampling.
        scale: The range of the sample interval


        Examples
        --------
        Set variable across different macros

        >>> seed(1)
        >>> mg = LatinHyperCubeMacroGenerator(number_of_macros=8)
        >>> mg.add_set_value_LHS_uniform('Main.myvar1',1,2)
        >>> mg.add_set_value_LHS_uniform('Main.myvar2',10,10)
        >>> pprint( mg.generate_macros() )
        [['classoperation Main.myvar1 "Set Value" --value="2"',
          'classoperation Main.myvar2 "Set Value" --value="15"'],
         ['classoperation Main.myvar1 "Set Value" --value="2.09919186856"',
          'classoperation Main.myvar2 "Set Value" --value="12.6154232435"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.79656505284"',
          'classoperation Main.myvar2 "Set Value" --value="15.6735209175"'],
         ['classoperation Main.myvar1 "Set Value" --value="2.3547986286"',
          'classoperation Main.myvar2 "Set Value" --value="14.1819509088"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.5366889727"',
          'classoperation Main.myvar2 "Set Value" --value="10.9004056168"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.10425550118"',
          'classoperation Main.myvar2 "Set Value" --value="18.5976467955"'],
         ['classoperation Main.myvar1 "Set Value" --value="2.55111306243"',
          'classoperation Main.myvar2 "Set Value" --value="19.5880843877"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.2500285937"',
          'classoperation Main.myvar2 "Set Value" --value="17.1065243755"']]

        """
        if isinstance(loc,list):
            loc = np.array(loc)
        if isinstance(scale,list):
            scale = np.array(scale)
        dist = distributions.uniform(loc,scale)
        self.add_set_value_LHS(variable,dist)
Ejemplo n.º 34
0
    def add_set_value_LHS_uniform(self, variable, loc, scale):
        """ Add a 'Set Value' macro command where the values are uniformly
        chosen from the  interval [loc - loc + scale]
        using a Latin Hyper Cube sampler.

        Parameters
        ----------
        variable: string
            An AnyScript variable or a list of AnyScript variables.
        loc: int,float, numpy.ndarray
            The start of the interval for uniform sampling.
        scale: The range of the sample interval


        Examples
        --------
        Set variable across different macros

        >>> seed(1)
        >>> mg = LatinHyperCubeMacroGenerator(number_of_macros=8)
        >>> mg.add_set_value_LHS_uniform('Main.myvar1',1,2)
        >>> mg.add_set_value_LHS_uniform('Main.myvar2',10,10)
        >>> pprint( mg.generate_macros() )
        [['classoperation Main.myvar1 "Set Value" --value="2"',
          'classoperation Main.myvar2 "Set Value" --value="15"'],
         ['classoperation Main.myvar1 "Set Value" --value="2.09919186856"',
          'classoperation Main.myvar2 "Set Value" --value="12.6154232435"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.79656505284"',
          'classoperation Main.myvar2 "Set Value" --value="15.6735209175"'],
         ['classoperation Main.myvar1 "Set Value" --value="2.3547986286"',
          'classoperation Main.myvar2 "Set Value" --value="14.1819509088"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.5366889727"',
          'classoperation Main.myvar2 "Set Value" --value="10.9004056168"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.10425550118"',
          'classoperation Main.myvar2 "Set Value" --value="18.5976467955"'],
         ['classoperation Main.myvar1 "Set Value" --value="2.55111306243"',
          'classoperation Main.myvar2 "Set Value" --value="19.5880843877"'],
         ['classoperation Main.myvar1 "Set Value" --value="1.2500285937"',
          'classoperation Main.myvar2 "Set Value" --value="17.1065243755"']]

        """
        if isinstance(loc, list):
            loc = np.array(loc)
        if isinstance(scale, list):
            scale = np.array(scale)
        dist = distributions.uniform(loc, scale)
        self.add_set_value_LHS(variable, dist)
Ejemplo n.º 35
0
 def add_set_value_LHS_uniform(self, variable, means, scale ) :
     """ Add a 'Set Value' macro command where the values are uniformly
     chosen using Latin Hyper Cube Sampling.
     
     Parameters
     ----------
     variable: string
         An AnyScript variable or a list of AnyScript variables. 
     means: int,float, numpy.ndarray
         The mean value of the sampled space
     scale: The range of the variable from means-scale/2 to means+scale/2]
     
                                      
     Examples:
     ---------                 
         Set variable across different macros
         
     >>> seed(1)
     >>> mg = LatinHyperCubeMacroGenerator(number_of_macros=8)
     >>> mg.add_set_value_LHS_uniform('Main.myvar1',1,2)
     >>> mg.add_set_value_LHS_uniform('Main.myvar2',10,10)
     >>> pprint( mg.generate_macros() )
     [['classoperation Main.myvar1 "Set Value" --value="1"',
       'classoperation Main.myvar2 "Set Value" --value="10"'],
      ['classoperation Main.myvar1 "Set Value" --value="{1.09919186856}"',
       'classoperation Main.myvar2 "Set Value" --value="{7.61542324346}"'],
      ['classoperation Main.myvar1 "Set Value" --value="{0.796565052844}"',
       'classoperation Main.myvar2 "Set Value" --value="{10.6735209175}"'],
      ['classoperation Main.myvar1 "Set Value" --value="{1.3547986286}"',
       'classoperation Main.myvar2 "Set Value" --value="{9.1819509088}"'],
      ['classoperation Main.myvar1 "Set Value" --value="{0.536688972704}"',
       'classoperation Main.myvar2 "Set Value" --value="{5.9004056168}"'],
      ['classoperation Main.myvar1 "Set Value" --value="{0.104255501176}"',
       'classoperation Main.myvar2 "Set Value" --value="{13.5976467955}"'],
      ['classoperation Main.myvar1 "Set Value" --value="{1.55111306243}"',
       'classoperation Main.myvar2 "Set Value" --value="{14.5880843877}"'],
      ['classoperation Main.myvar1 "Set Value" --value="{0.250028593704}"',
       'classoperation Main.myvar2 "Set Value" --value="{12.1065243755}"']]
         
     """        
     if isinstance(means,list):
         means = np.array(means)
     if isinstance(scale,list):
         scale = np.array(scale)
     dist = distributions.uniform(means-scale/2.0,scale)
     self.add_set_value_LHS(variable,dist)
Ejemplo n.º 36
0
    def simulate_random_ics(self):
        """
        Randomize initial concentration parameters using latin hypercube sampling
        and run a time course

        """
        if self.from_pickle and os.path.isfile(self.pickle_file):
            return pd.read_pickle(self.pickle_file)

        ics = [
            i.replace('[', '').replace(']', '')
            for i in self.rr.getFloatingSpeciesConcentrationIds()
        ]

        original_ics = dict(
            zip(ics, self.rr.getFloatingSpeciesConcentrations()))
        sample = lhs(n=len(original_ics),
                     samples=self.n,
                     iterations=1,
                     criterion=None)
        sample = uniform(self.lower_bound, self.upper_bound).ppf(sample)

        print('Simulating time series data')
        simulations = {}
        for i in range(sample.shape[0]):
            print('Percent Complete: {}%'.format(
                round(i / sample.shape[0] * 100, 2)))
            self.rr.reset()
            for j in range(sample.shape[1]):
                setattr(self.rr, ics[j], sample[i, j])
            data = self.rr.simulate(0, self.end_time,
                                    self.num_simulation_points)
            df = pd.DataFrame(data)
            df.columns = [
                i.replace('[', '').replace(']', '') for i in data.colnames
            ]
            simulations[i] = df.set_index('time')

        df = pd.concat(simulations)

        df.to_pickle(self.pickle_file)

        if self.subtract_ic_normalisation:
            df = self.normalise(df)
        return df
Ejemplo n.º 37
0
    def generateLHS(hyperParamRange, nSamples):
        indexhash = {}  # for all hyperparameter
        lowBounds = []
        highBounds = []
        index = 0

        interestedHPlen = 0
        for key, eachHpRng in hyperParamRange.iteritems():
            if isinstance(eachHpRng, Sequence) and eachHpRng[1] > eachHpRng[0]:
                lowBounds.append(eachHpRng[0])
                highBounds.append(eachHpRng[1])
                indexhash[key] = index
                interestedHPlen += 1
                index += 1

        design = lhs(interestedHPlen, samples=nSamples)

        for i in xrange(interestedHPlen):
            design[:, i] = uniform(loc=lowBounds[i],
                                   scale=highBounds[i] - lowBounds[i]).ppf(
                                       design[:, i])

        design = np.array(design)

        for key, eachHpRng in hyperParamRange.iteritems():
            if not isinstance(eachHpRng, Sequence):
                design = np.concatenate(
                    (design, np.full((nSamples, 1), eachHpRng)), axis=1)
                indexhash[key] = index
                index += 1
            elif eachHpRng[1] <= eachHpRng[0]:
                design = np.concatenate(
                    (design, np.full((nSamples, 1), eachHpRng[0])), axis=1)
                indexhash[key] = index
                index += 1

        samples = Samples([])
        for point in design:
            hyperParam = HyperParameter({
                k: point[indexhash[k]]
                for k in hyperParamRange.get_param_names()
            })
            samples.append(Sample(hyperParam, 0.0, 0.0, 0.0))
        return samples
Ejemplo n.º 38
0
    def __init__(self, low, high, transform=None, name=None):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [int]:
            Lower bound (inclusive).

        * `high` [int]:
            Upper bound (inclusive).

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.

        * `name` [str or None]:
            Name associated with dimension, e.g., "number of trees".
        """
        if high <= low:
            raise ValueError("the lower bound {} has to be less than the"
                             " upper bound {}".format(low, high))
        self.low = low
        self.high = high
        self.name = name

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))
        if transform == "normalize":
            self._rvs = uniform(0, 1)
            self.transformer = Normalize(low, high, is_int=True)
        else:
            self._rvs = randint(self.low, self.high + 1)
            self.transformer = Identity()
Ejemplo n.º 39
0
def _xgboost_hyperband_model(task, numeric_features, categoric_features,
                             learning_rate):
    param_space = {
        'max_depth': randint(2, 11),
        'min_child_weight': randint(1, 11),
        'subsample': uniform(0.5, 0.5),
        'colsample_bytree': uniform(0.5, 0.5),
        'colsample_bylevel': uniform(0.5, 0.5),
        'gamma': uniform(0, 1),
        'reg_alpha': uniform(0, 1),
        'reg_lambda': uniform(0, 10),
        'base_score': uniform(0.1, 0.9),
        'scale_pos_weight': uniform(0.1, 9.9)
    }

    model = ContinuableXGBClassifier(learning_rate=learning_rate) \
        if task == 'classification' else ContinuableXGBRegressor(learning_rate=learning_rate)

    return make_pipeline(
        simple_proc_for_tree_algoritms(numeric_features, categoric_features),
        Hyperband(model, feat_space=param_space, task=task))
Ejemplo n.º 40
0
def _compute_thresh(this_data, thresh_range, cv=10):
    """ Compute the rejection threshold for one channel.

    Parameters
    ----------
    this_data: array (n_epochs, n_times)
        Data for one channel.
    cv : iterator
        Iterator for cross-validation.
    """
    est = _ChannelAutoReject()

    param_dist = dict(thresh=uniform(thresh_range[0],
                                     thresh_range[1]))
    rs = RandomizedSearchCV(est,  # XXX : is random really better than grid?
                            param_distributions=param_dist,
                            n_iter=20, cv=cv)
    rs.fit(this_data)
    best_thresh = rs.best_estimator_.thresh

    return best_thresh
Ejemplo n.º 41
0
def init_distributions(pkey, kind='dpm', mu = None, sigma = None, nrvs=25, tb=.65):
    """ sample random parameter sets to explore global minima (called by
    Optimizer method __hop_around__())
    """
    if mu is None:
        mu = {'a': .15, 'tr': .02, 'v': 1., 'ssv': -1., 'z': .1, 'xb': 1., 'sso': .15, 'vi': .35, 'vd': .5}
    if sigma is None:
        sigma = {'a': .35, 'tr': .25, 'v': .5, 'ssv': .5, 'z': .05, 'xb': .5, 'sso': .01, 'vi': .4, 'vd': .5}
    normal_params = ['tr', 'v', 'vd', 'ssv', 'z', 'xb', 'sso']
    gamma_params = ['a', 'tr']
    uniform_params = ['vd', 'vi']
    if 'race' in kind:
        sigma['ssv'] = abs(mu['ssv'])
    bounds = get_bounds(kind=kind)[pkey]
    loc = mu[pkey]
    scale = sigma[pkey]
    # init and freeze dist shape
    if pkey in normal_params:
        dist = norm(loc, scale)
    elif pkey in gamma_params:
        dist = gamma(1.0, loc, scale)
    elif pkey in uniform_params:
        dist = uniform(loc, scale)
    # generate random variates
    rvinits = dist.rvs(nrvs)
    while rvinits.min() < bounds[0]:
        # apply lower limit
        ix = rvinits.argmin()
        rvinits[ix] = dist.rvs()
    while rvinits.max() > bounds[1]:
        # apply upper limit
        ix = rvinits.argmax()
        rvinits[ix] = dist.rvs()
    if pkey =='tr':
        rvinits = np.abs(rvinits)
    return rvinits
Ejemplo n.º 42
0
    def __init__(self, low, high, transform=None):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [int]:
            Lower bound (inclusive).

        * `high` [int]:
            Upper bound (inclusive).

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.
        """
        self.low = low
        self.high = high

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))
        if transform == "normalize":
            self._rvs = uniform(0, 1)
            self.transformer = Normalize(low, high, is_int=True)
        else:
            self._rvs = randint(self.low, self.high + 1)
            self.transformer = Identity()
 def __init__(self,min=0,max=1):
     self.min=min
     self.max=max
     self.default=(min+max)/2.0
     self.D=D.uniform(min,max-min)
def uniform(min=0,max=1):
    return D.uniform(min,max-min)
Ejemplo n.º 45
0
def _compute_thresh(this_data, method='bayesian_optimization',
                    cv=10, random_state=None):
    """Compute the rejection threshold for one channel.

    Parameters
    ----------
    this_data: array (n_epochs, n_times)
        Data for one channel.
    method : str
        'bayesian_optimization' or 'random_search'
    cv : iterator
        Iterator for cross-validation.
    random_state : int seed, RandomState instance, or None (default)
        The seed of the pseudo random number generator to use.

    Returns
    -------
    best_thresh : float
        The best threshold.

    Notes
    -----
    For method='random_search', the random_state parameter gives deterministic
    results only for scipy versions >= 0.16. This is why we recommend using
    autoreject with scipy version 0.16 or greater.
    """
    est = _ChannelAutoReject()
    all_threshes = np.sort(np.ptp(this_data, axis=1))

    if method == 'random_search':
        param_dist = dict(thresh=uniform(all_threshes[0],
                                         all_threshes[-1]))
        rs = RandomizedSearchCV(est,
                                param_distributions=param_dist,
                                n_iter=20, cv=cv,
                                random_state=random_state)
        rs.fit(this_data)
        best_thresh = rs.best_estimator_.thresh
    elif method == 'bayesian_optimization':
        from sklearn.cross_validation import cross_val_score
        cache = dict()

        def func(thresh):
            idx = np.where(thresh - all_threshes >= 0)[0][-1]
            thresh = all_threshes[idx]
            if thresh not in cache:
                est.set_params(thresh=thresh)
                obj = -np.mean(cross_val_score(est, this_data, cv=cv))
                cache.update({thresh: obj})
            return cache[thresh]

        n_epochs = all_threshes.shape[0]
        idx = np.concatenate((
            np.linspace(0, n_epochs, 40, endpoint=False, dtype=int),
            [n_epochs - 1]))  # ensure last point is in init
        idx = np.unique(idx)  # linspace may be non-unique if n_epochs < 40
        initial_x = all_threshes[idx]
        best_thresh, _ = bayes_opt(func, initial_x,
                                   all_threshes,
                                   expected_improvement,
                                   max_iter=10, debug=False,
                                   random_state=random_state)

    return best_thresh
Ejemplo n.º 46
0
from scipy.stats.distributions import norm
from scipy.stats.distributions import uniform
from sklearn.metrics.pairwise import euclidean_distances
from matplotlib.pylab import plt
import numpy as np
from numpy.random import choice
from uuid import uuid4
from collections import defaultdict



y_pos_dist = norm(300, 10)

cluster_x_dists = {
    'A': uniform(0, 50),
    'B': uniform(30, 50),
    'C': uniform(60, 50)
}

cluster_sizes = {
    'A': 8,
    'B': 10,
    'C': 8
}

cluster_colors = {
    'A': 'r',
    'B': 'b',
    'C': 'g'
}
Ejemplo n.º 47
0
def uniform(lower=0.0, upper=1.0):
    return dists.uniform(loc=lower, scale=upper-lower)
Ejemplo n.º 48
0
n_k = 5 # number of random variables

D = 26 * 1.0e-6 # m
A = ( D / 2.0 ) ** 2 * pi

# set the mean and standard deviation of the two random variables
la_mean, la_stdev = 0.0, 0.2
xi_mean, xi_stdev = 0.019027, 0.0022891
E_mean, E_stdev = 70.0e+9, 15.0e+9
th_mean, th_stdev = 0.0, 0.01
A_mean, A_stdev = A * 0.3, 0.7 * A
print A_mean, A_mean + A_stdev

# construct the normal distributions and get the methods
# for the evaluation of the probability density functions
g_la = uniform( loc = la_mean, scale = la_stdev )
g_xi = norm( loc = xi_mean, scale = xi_stdev )
g_E = uniform( loc = E_mean, scale = E_stdev )
g_th = uniform( loc = th_mean, scale = th_stdev )
g_A = uniform( loc = A_mean, scale = A_stdev )

# generate the grids for integration covering major part of the random domains
Theta_la = linspace( la_mean + 0.5 * la_stdev / n_int, la_mean + la_stdev - 0.5 * la_stdev / n_int, n_int )
delta = ( xi_mean + ( 4 * xi_stdev ) - xi_mean + ( 4 * xi_stdev ) ) / n_int
Theta_xi = linspace( xi_mean - ( 4 * xi_stdev ) + 0.5 * delta, xi_mean + ( 4 * xi_stdev ) - 0.5 * delta, n_int )
Theta_E = linspace( E_mean + 0.5 * E_stdev / n_int, E_mean + E_stdev - 0.5 * E_stdev / n_int, n_int )
Theta_th = linspace( th_mean + 0.5 * th_stdev / n_int, th_mean + th_stdev - 0.5 * th_stdev / n_int, n_int )
Theta_A = linspace( A_mean + 0.5 * A_stdev / n_int, A_mean + A_stdev - 0.5 * A_stdev / n_int, n_int )
# LHS generate the grids for integration covering major part of the random domains
T_la = g_la.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) )
T_xi = g_xi.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) )
Ejemplo n.º 49
0
from time import clock
from scipy.interpolate import interp1d

from scipy.weave import inline, converters


n_int = 10 # number of discretization points
n_k = 2 # number of random variables 

# set the mean and standard deviation of the two random variables
la_mean, la_stdev = 0.0, 0.2
xi_mean, xi_stdev = 0.019027, 0.0022891

# construct the normal distributions and get the methods
# for the evaluation of the probability density functions
g_la = uniform( loc = la_mean, scale = la_stdev )
g_xi = weibull_min( 10., scale = 0.02 )

# generate the grids for integration covering major part of the random domains
Theta_la = linspace( la_mean + 0.5 * la_stdev / n_int, la_mean + la_stdev - 0.5 * la_stdev / n_int, n_int )
delta_xi = ( xi_mean + ( 4 * xi_stdev ) - xi_mean + ( 4 * xi_stdev ) ) / n_int
Theta_xi = linspace( xi_mean - ( 4 * xi_stdev ) + 0.5 * delta_xi, xi_mean + ( 4 * xi_stdev ) - 0.5 * delta_xi, n_int )
# LHS generate the grids for integration covering major part of the random domains
T_la = g_la.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) )
T_xi = g_xi.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) )
# MC generation
T_la_MC = g_la.rvs( n_int ** n_k )
T_xi_MC = g_xi.rvs( n_int ** n_k )
#T_la_MC = array( zip( *sorted( zip( random( n_int ** n_k ), g_la.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ** n_k ) ) ) ) )[1] )
#T_xi_MC = array( zip( *sorted( zip( random( n_int ** n_k ), g_xi.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ** n_k ) ) ) ) )[1] )
print diff( sort( g_la.cdf( g_la.rvs( 10 ) ) ) )
Ejemplo n.º 50
0
def _uniform_inclusive(loc=0.0, scale=1.0):
    # like scipy.stats.distributions but inclusive of `high`
    # XXX scale + 1. might not actually be a float after scale if
    # XXX scale is very large.
    return uniform(loc=loc, scale=np.nextafter(scale, scale + 1.))
Ejemplo n.º 51
0
epochs_gt = clean_by_interp(epochs)

picks = mne.pick_types(epochs.info, meg='grad', eeg=False, stim=False,
                       eog=False, include=include, exclude='bads')

X = epochs.get_data()
X_gt = epochs_gt.get_data()
X = np.concatenate((X, X_gt), axis=0)
np.random.seed(42)
cv = KFold(X.shape[0], 10, random_state=42)

low, high = 4e-13, 900e-13
best_threshes = np.zeros((len(picks), ))
for idx, pick in enumerate(picks):
    est = ChannelAutoReject()
    param_dist = dict(thresh=uniform(low, high))
    rs = RandomizedSearchCV(est,
                            param_distributions=param_dist,
                            n_iter=20, cv=cv)
    rs.fit(X[:, pick])
    best_thresh = rs.best_estimator_.thresh
    best_threshes[idx] = best_thresh

unit = r'fT/cm'
scaling = 1e13

plt.figure(figsize=(6, 5))
plt.tick_params(axis='x', which='both', bottom='off', top='off')
plt.tick_params(axis='y', which='both', left='off', right='off')

counts, bins, _ = plt.hist(scaling * best_threshes, 30, color='g', alpha=0.4)
Ejemplo n.º 52
0

def RMSE(predicted, expected):
    return np.linalg.norm(predicted - expected) / np.sqrt(len(predicted))

distributions = [
    norm(),
    t(df=5),
    gamma(a=2),
    gamma(a=4),
    gamma(a=8),
    expon(scale=1/0.5),
    expon(scale=1/1),
    expon(scale=1/2),
    rayleigh(),
    uniform(),
]

errors = []

for distribution in distributions:
    parameters = [k + '=' + str(v) for k, v in distribution.kwds.items()]
    name = "{name}({parameters})".format(
        name=distribution.dist.name,
        parameters=', '.join(parameters)
        )
    l, lm, lt, m, ut, um, u = distribution.ppf([0.05, 0.2625, 0.342, 0.5, 0.658, 0.7375, 0.95])
    candidates = [
        between(l, u),
        between(l, m, u),
        between(l, lt, ut, u),
Ejemplo n.º 53
0
def sample_pspace(model, param_list=None, bounds=None, samples=100, seed=None):
    """
    A DataFrame where each row represents a location in the parameter
    space, locations distributed to exercise the full range of values
    that each parameter can take on.

    This is useful for quick and dirty application of tests to a bunch
    of locations in the sample space. Kind-of a fuzz-testing for
    the model.

    Uses latin hypercube sampling, with random values within
    the sample bins. The LHS sampler shuffles the bins each time,
    so a subsequent call will yield a different sample from the
    parameter space.

    When a variable has both upper and lower bounds, use a uniform
    sample between those bounds.

    When a variable has only one bound, use an exponential distribution
    with the scale set to be the difference between the bound and the
    current model value (1 if they are the same)

    When the variable has neither bound, use a normal distribution centered
    on the current model value, with scale equal to the absolute value
    of the model value (1 if that magnitude is 0)

    Parameters
    ----------
    model: pysd.Model object

    param_list: None or list of strings
        The real names of parameters to include in the explored parameter
        space.
        If None, uses all of the constants in the model except TIME STEP,
        INITIAL TIME, etc.

    bounds: DataFrame, string filename, or None
        A range test matrix as used for bounds checking.
        If None, creates one from the model
        These bounds can also place artificial limits on the
        parameter space you want to explore, even if the theoretical
        bounds on the variable are infinite.

    samples: int
        How many samples to include in the iterator?

    Returns
    -------
    lhs : pandas DataFrame
        distribution-weighted latin hypercube samples

    Note
    ----
    Executes the model by 1 time-step to get the current value of parameters.

    """
    if param_list is None:
        doc = model.doc()
        param_list = sorted(list(set(doc[doc['Type'] == 'constant']['Real Name']) -
                            {'FINAL TIME', 'INITIAL TIME', 'TIME STEP', 'TIME STEP'}))

    if isinstance(bounds, _pd.DataFrame):
        bounds = bounds.set_index('Real Name')
    elif bounds is None:
        bounds = create_bounds_test_matrix(model).set_index('Real Name')
    elif isinstance(bounds, str):
        if bounds.split('.')[-1] in ['xls', 'xlsx']:
            bounds = _pd.read_excel(bounds, sheetname='Bounds', index_col='Real Name')
        elif bounds.split('.')[-1] == 'csv':
            bounds = _pd.read_csv(bounds, index_col='Real Name', encoding='UTF-8')
        elif bounds.split('.')[-1] == 'tab':
            bounds = _pd.read_csv(bounds, sep='\t', index_col='Real Name', encoding='UTF-8')
        else:
            raise ValueError('Unknown file type: bounds')
    else:
        raise ValueError('Unknown type: bounds')

    if seed is not None:
        _np.random.seed(seed)

    unit_lhs = _pd.DataFrame(_pyDOE.lhs(n=len(param_list), samples=samples),
                             columns=param_list)  # raw latin hypercube sample

    res = model.run(return_timestamps=[model.components.initial_time()])
    lhs = _pd.DataFrame(index=unit_lhs.index)
    for param in param_list:
        lower, upper = bounds[['Min', 'Max']].loc[param]
        value = res[param].iloc[0]

        if lower == upper:
            lhs[param] = lower

        elif _np.isfinite(lower) and _np.isfinite(upper):  # np.isfinite(0)==True
            scale = upper - lower
            lhs[param] = _dist.uniform(lower, scale).ppf(unit_lhs[param])

        elif _np.isfinite(lower) and _np.isinf(upper):
            if lower == value:
                scale = 1
            else:
                scale = value - lower
            lhs[param] = _dist.expon(lower, scale).ppf(unit_lhs[param])

        elif _np.isinf(lower) and _np.isfinite(upper):  # np.isinf(-np.inf)==True
            if upper == value:
                scale = 1
            else:
                scale = upper - value
            lhs[param] = upper - _dist.expon(0, scale).ppf(unit_lhs[param])

        elif _np.isinf(lower) and _np.isinf(upper):  # np.isinf(-np.inf)==True
            if value == 0:
                scale = 1
            else:
                scale = abs(value)
            lhs[param] = _dist.norm(value, scale).ppf(unit_lhs[param])

        else:
            raise ValueError('Problem with lower: %s or upper: %s bounds' % (lower, upper))

    return lhs
Ejemplo n.º 54
0
def design_lhs_exp(variables, maps, offsets=None, samples=int(1e4), project_linear=True):
    """ Design an LHS experiment """

    design = lhs(len(variables), samples=samples, criterion="m", iterations=100)
    z_design = np.zeros_like(design)

    print "Computing LHS design..."
    if project_linear:
        print "   using linear re-projection for log variables"
    else:
        print "   using original variable coordinate"
    for i, v in enumerate(variables):
        dist, a, b = v[3]

        if project_linear:  # Re-sample in linear space
            if v[0].startswith("ln"):
                ## 9/4/2014
                ## This is an experimental correction to re-project the
                ## logarithmic variables into their normal coordinate
                ## system. It should only effect the sampling, and hopefully
                ## improve it by forcing it to even things out over the
                ## actually range we care about
                a = np.exp(a)
                b = np.exp(b)
                offsets[i] = np.exp(offsets[i])

            elif v[0].startswith("log"):
                ## 10/26/2014
                ## In accordance with above, but for log10 vars
                a = 10.0 ** a
                b = 10.0 ** b
                offsets[i] = 10.0 ** offsets[i]

        if offsets:
            ## These corrections with "offsets" re-center the interval
            ## so that the left endpoint is 0. I found that if arbitrary
            ## lower/upper limits were used, sometimes the PPF routines
            ## would really mess up in inverting the CDF.
            a, b = a - offsets[i], b - offsets[i]
        if dist == "uniform":
            design[:, i] = uniform(a, b).ppf(design[:, i])
        elif dist == "normal":
            design[:, i] = norm(a, b).ppf(design[:, i])
        elif dist == "loguniform":
            design[:, i] = loguni_ppf(design[:, i], a, b)
        else:
            raise ValueError("no dist defined for %s" % dist)

        if offsets:
            ## Project back in to the correct limits
            design[:, i] += offsets[i]
            a, b = a + offsets[i], b + offsets[i]

        if project_linear:
            if v[0].startswith("ln"):
                ## 9/4/2014
                ## Second half of correction
                a = np.log(a)
                b = np.log(b)
                design[:, i] = np.log(design[:, i])
            elif v[0].startswith("log"):
                ## 10/26/2014
                a = np.log10(a)
                b = np.log10(b)
                design[:, i] = np.log10(design[:, i])

        z_design[:, i] = maps[i](design[:, i], a, b)
    design = design.T  # in x-coords
    z_design = z_design.T

    return design, z_design