Ejemplo n.º 1
0
def random_PolynomialLosses(dom, T, M, L, m_max, exponents, dist=uniform(), high_ratio=False):
    """ Creates T random L-Lipschitz PolynomialLossFunctions uniformly bounded
        (in dual norm) by M, with Lipschitz constant uniformly bounded by L.
        Here exponents is a (finite) set of possible exponents.
        This is a brute force implementation and horribly inefficient.
    """
    lossfuncs = []
    while len(lossfuncs) < T:
        if high_ratio:
            weights = np.ones(len(exponents))
        else:
            weights = np.linspace(1, 10, len(exponents))
        expon = [tuple(np.random.choice(exponents, size=dom.n, p=weights/np.sum(weights)))
                 for i in range(np.random.choice(np.arange(2,m_max)))]
        if high_ratio:
            coeffs = np.array([uniform(scale=np.max(expo)).rvs(1) for expo in expon]).flatten()
        else:
            coeffs = dist.rvs(len(expon))
        lossfunc = PolynomialLossFunction(dom, coeffs, expon)
        Ml, Ll = lossfunc.max(grad=True)
        ml = lossfunc.min()
        if (Ml-ml)>0:
            scaling = dist.rvs()*np.minimum(M/(Ml-ml), L/Ll)
        else:
            scaling = 1
        lossfuncs.append(scaling*(lossfunc + PolynomialLossFunction(dom, [-ml], [(0,)*dom.n])))
    return lossfuncs
Ejemplo n.º 2
0
def fit_estimator(estimator, positive_data_matrix=None, negative_data_matrix=None, target=None, cv=10, n_jobs=-1, n_iter_search=40, random_state=1):
    # hyperparameter optimization
    param_dist = {"n_iter": randint(5, 100),
                  "power_t": uniform(0.1),
                  "alpha": uniform(1e-08, 1e-03),
                  "eta0": uniform(1e-03, 1),
                  "penalty": ["l1", "l2", "elasticnet"],
                  "learning_rate": ["invscaling", "constant", "optimal"]}
    scoring = 'roc_auc'
    n_iter_search = n_iter_search
    random_search = RandomizedSearchCV(estimator,
                                       param_distributions=param_dist,
                                       n_iter=n_iter_search,
                                       cv=cv,
                                       scoring=scoring,
                                       n_jobs=n_jobs,
                                       random_state=random_state,
                                       refit=True)
    X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
                            negative_data_matrix=negative_data_matrix,
                            target=target)
    random_search.fit(X, y)

    logger.debug('\nClassifier:')
    logger.debug('%s' % random_search.best_estimator_)
    logger.debug('\nPredictive performance:')
    # assess the generalization capacity of the model via a 10-fold cross validation
    for scoring in ['accuracy', 'precision', 'recall', 'f1', 'average_precision', 'roc_auc']:
        scores = cross_validation.cross_val_score(random_search.best_estimator_, X, y, cv=cv, scoring=scoring, n_jobs=n_jobs)
        logger.debug('%20s: %.3f +- %.3f' % (scoring, np.mean(scores), np.std(scores)))

    return random_search.best_estimator_
Ejemplo n.º 3
0
def train(XTrain, yTrain, testsize):
    XTrain = np.array(XTrain, dtype=float)
    yTrain = np.array(yTrain, dtype=float)
    params = {'C': uniform(1, 99),
              'gamma': uniform(0.01, 0.29),
              'kernel': ['rbf', 'poly']}
    kfold = cross_validation.KFold(len(XTrain), n_folds=4, shuffle=False)
    models = []
    for i in range(len(yTrain[0])):
        svr = svm.SVR()
        clf = grid_search.RandomizedSearchCV(svr, param_distributions=params, n_iter=30, cv=kfold,
                                             scoring='mean_squared_error', n_jobs=-1,verbose=1)
        clf.fit(transArray(XTrain), yTrain[:, i])
        models.append(clf.best_estimator_)
    yPredict = []
    XPredict = copy(XTrain[-1])
    for i in range(testsize):
        XPredict = np.delete(XPredict, 0, axis=0)
        XPredict = np.insert(XPredict, len(XPredict), yTrain[-1], axis=0)
        subyPredict = np.array([])
        for j in range(len(models)):
            models[j].fit(transArray(XTrain), yTrain[:, j])  # 重复训练模型
            newPredict = models[j].predict([transRow(XPredict)])
            subyPredict = np.hstack((subyPredict, newPredict))
        XTrain = np.delete(XTrain, 0, axis=0)
        XTrain = np.insert(XTrain, len(XTrain), copy(XPredict), axis=0)
        yTrain = np.delete(yTrain, 0, axis=0)
        yTrain = np.insert(yTrain, len(yTrain), copy(subyPredict), axis=0)
        yPredict.append(copy(subyPredict[0]))
    return np.array(yPredict)
Ejemplo n.º 4
0
def nb_params(text, names):
    tfidf_base_params = [('__tfidf__min_df', [2, 3]), 
                         ('__tfidf__max_df', ss.uniform(.8, .2)),
                         ('__tfidf__use_idf', [True, False]),
                         #('__tfidf__ngram_range', [(1,1), (2,2), (3,3)],
                        # ('__tfidf__norm', ['l1', None]),
                         ]

    params = {}
    for x in text:
        for param, dist in tfidf_base_params:
            if (('thomas' not in x) and ((x.split('_')[1] in names))):
               params['data__'+x+param] = dist

    for x in ['speaking_order']:#, 'text_advocate_petitioner', 'text_advocate_respondent']:
        for param, dist in tfidf_base_params:
            params['data__'+x+param] = dist

    params.update({'select__param': ss.uniform(0.05, .25)})

    params.update({#'predict__n_estimators':[5,10],
                   'predict__max_samples':ss.uniform(.8, .2),
                   'predict__max_features':ss.uniform(.5, .5)})

    return params
Ejemplo n.º 5
0
def test_randomizedsearchcv_same_splits():
    """Ensure that all parameter combinations are tested on the same splits (we
    check their RMSE scores are the same once averaged over the splits, which
    should be enough). We use as much parallelism as possible."""

    data_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_file(data_file, reader=Reader('ml-100k'),
                                  rating_scale=(1, 5))
    kf = KFold(3, shuffle=True, random_state=4)

    # all RMSE should be the same (as param combinations are the same)
    param_distributions = {'n_epochs': [5], 'lr_all': uniform(.2, 0),
                           'reg_all': uniform(.4, 0), 'n_factors': [5],
                           'random_state': [0]}
    rs = RandomizedSearchCV(SVD, param_distributions, measures=['RMSE'], cv=kf,
                            n_jobs=1)
    rs.fit(data)

    rmse_scores = [m for m in rs.cv_results['mean_test_rmse']]
    assert len(set(rmse_scores)) == 1  # assert rmse_scores are all equal

    # Note: actually, even when setting random_state=None in kf, the same folds
    # are used because we use product(param_comb, kf.split(...)). However, it's
    # needed to have the same folds when calling fit again:
    rs.fit(data)
    rmse_scores += [m for m in rs.cv_results['mean_test_rmse']]
    assert len(set(rmse_scores)) == 1  # assert rmse_scores are all equal
Ejemplo n.º 6
0
def train(array, embedDim, interval):
    XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1)
    kfold = cross_validation.KFold(len(XTrain), n_folds=5, shuffle=False)
    params = {'n_estimators': randint(20, 200),
              'loss': ['ls', 'lad', 'huber'],
              'learning_rate': uniform(0.01, 0.19),
              'subsample': uniform(0.5, 0.5),
              'max_depth': randint(1, 5),
              'min_samples_split': randint(1, 3),
              'min_samples_leaf': randint(1, 3),
              'max_features': randint(1, len(XTrain[0]))}
    bestModels = []
    for i in range(len(yTrain[0])):
        gbrt = GradientBoostingRegressor()
        clf = grid_search.RandomizedSearchCV(gbrt, param_distributions=params, n_iter=20,
                                             scoring='mean_squared_error', cv=kfold, n_jobs=-1)
        clf.fit(XTrain, yTrain[:, i])
        bestModels.append(clf.best_estimator_)

    for i in range(1, 12):
        XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, i)  # 模型的预测天数递增
        XPredict = pp.makeXPredict(array, embedDim, interval, i)  # 待预测的输入递增
        subyPredict = []
        for j in range(len(yTrain[0])):
            bestModels[j].fit(XTrain, yTrain[:, j])
            subyPredict.append(bestModels[j].predict(XPredict))
        array = np.hstack((array, np.array(copy(subyPredict))))  # 将一个模型的预测值作为已知数据,训练下一个模型
    yPredict = array[0, -65:-5]  # 一共可以预测66天,取其中对应的数据
    return yPredict
Ejemplo n.º 7
0
def main():
    #TODO Choose new random seed after testing
    np.random.seed(917)

    steps = 5
    eps = 0.25
    min_part = 10

    #stars = pickle.load(file('stars.pkl'))
    stars = pickle.load(file('stars_trimmed.pkl'))
    #obs = pickle.load(file('data.pkl'))

    model = simple_model.MyModel(stars)
    model.set_prior([stats.uniform(0.5, 1.0),
                    stats.uniform(0, 1.0)])

    theta = (0.513265306122, 0.1)

    obs = model.generate_data(theta)
    model.set_data(obs)





    n_procs = [1, 2, 3, 4, 5, 6, 7, 8]

    start = time.time()
    OT = simple_abc.pmc_abc(model, obs, epsilon_0=eps, min_particles=min_part, steps=steps,
    						 target_epsilon=eps, parallel=False, plot=True)
    end = time.time()
    print 'Serial took {}s'.format(end - start)
    out_pickle = file('simptest.pkl', 'w')
    pickle.dump(OT, out_pickle)
    out_pickle.close()
Ejemplo n.º 8
0
def f3TruncNormRVSnp(parameters):
    N = parameters['N']
    target = parameters['target']
    rv1, rv2, rv3 = ndarray(shape = (N,), dtype=float), ndarray(shape = (N,), dtype=float), ndarray(shape = (N,), dtype=float)

    # if parameters['ncpu']:
    #     ncpu = parameters['ncpu']
    # else:
    #     ncpu = mp.cpu_count()
    #
    # pool = mp.Pool(ncpu)
    # workers = []
    if not parameters['distribution']:
        print 'No distribution set...abort'
        exit(1)
    elif parameters['distribution'] == 'truncnorm':
        a1, b1 = (parameters['min_intrv1'] - parameters['mu1']) / parameters['sigma1'], (parameters['max_intrv1'] - parameters['mu1']) / parameters['sigma1']
        a2, b2 = (parameters['min_intrv2'] - parameters['mu2']) / parameters['sigma2'], (parameters['max_intrv2'] - parameters['mu2']) / parameters['sigma2']
        a3, b3 = (parameters['min_intrv3'] - parameters['mu3']) / parameters['sigma3'], (parameters['max_intrv3'] - parameters['mu3']) / parameters['sigma3']
        rv1 = truncnorm(a1, b1, loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = truncnorm(a2, b2, loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = truncnorm(a3, b3, loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'norm':
        rv1 = norm(loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = norm(loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = norm(loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'uniform':
        rv1 = uniform(loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = uniform(loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = uniform(loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'beta':
        rv1 = beta(a=parameters['min_intrv1'], b=parameters['max_intrv1'], loc=parameters['mu1'], scale=parameters['sigma1']).rvs(N)
        rv2 = beta(a=parameters['min_intrv2'], b=parameters['max_intrv2'], loc=parameters['mu2'], scale=parameters['sigma2']).rvs(N)
        rv3 = beta(a=parameters['min_intrv3'], b=parameters['max_intrv3'], loc=parameters['mu3'], scale=parameters['sigma3']).rvs(N)
    elif parameters['distribution'] == 'triang':
        rv1 = triang(loc=parameters['min_intrv1'], scale=parameters['max_intrv1'], c=parameters['mu1']).rvs(N)
        rv2 = triang(loc=parameters['min_intrv2'], scale=parameters['max_intrv2'], c=parameters['mu2']).rvs(N)
        rv3 = triang(loc=parameters['min_intrv3'], scale=parameters['max_intrv3'], c=parameters['mu3']).rvs(N)
    else:
        print 'Distribution not recognized...abort'
        exit(1)

    if parameters['scaling']:
        #scale the values of Qs in the allowed range such that sum(Q_i) = A
        r = ABS(parameters['Q1']) + ABS(parameters['Q2']) + ABS(parameters['Q3'])
        if r == 0.0:
            r = 1.

        # rounding the values, the sum could exceed A
        Q1 = ABS(parameters['Q1']) * parameters['A'] / r
        Q2 = ABS(parameters['Q2']) * parameters['A'] / r
        Q3 = parameters['A'] - Q1 - Q2
    else:
        # print "scaling = False"
        Q1 = parameters['Q1']
        Q2 = parameters['Q2']
        Q3 = parameters['Q3']

    return _f3(rv1, rv2, rv3, Q1, Q2, Q3, target)
Ejemplo n.º 9
0
 def __setstate__(self, state):
     np.random.seed()
     self.__dict__ = state
     new_prior = [stats.uniform(**state['prior'][0]),
                  stats.uniform(**state['prior'][1]),
                  stats.uniform(**state['prior'][2]),
                  stats.uniform(**state['prior'][3])]
     self.__dict__['prior'] = new_prior
Ejemplo n.º 10
0
def greedy_allocation3(parameters):
    """
    Greedy heuristic for 3 supplier (the same as heu_allocation3 but with different parameters)
    Does not write on the file but returns the solution
    :param df: dataframe containing the data from the excel file
    :param parameters: parameters dict
    :return: write o the df and save on the file
    """
    if not parameters['distribution']:
        print 'No distribution set...abort'
        exit(1)
    elif parameters['distribution'] == 'truncnorm':
        rv1 = truncnorm_custom(parameters['min_intrv1'], parameters['max_intrv1'], parameters['mu1'], parameters['sigma1'])
        rv2 = truncnorm_custom(parameters['min_intrv2'], parameters['max_intrv2'], parameters['mu2'], parameters['sigma2'])
        rv3 = truncnorm_custom(parameters['min_intrv3'], parameters['max_intrv3'], parameters['mu3'], parameters['sigma3'])
    elif parameters['distribution'] == 'norm':
        rv1 = norm(parameters['mu1'], parameters['sigma1'])
        rv2 = norm(parameters['mu2'], parameters['sigma2'])
        rv3 = norm(parameters['mu3'], parameters['sigma3'])
    elif parameters['distribution'] == 'uniform':
        rv1 = uniform(loc=parameters['mu1'], scale=parameters['sigma1'])
        rv2 = uniform(loc=parameters['mu2'], scale=parameters['sigma2'])
        rv3 = uniform(loc=parameters['mu3'], scale=parameters['sigma3'])
    elif parameters['distribution'] == 'beta':
        rv1 = beta(a=parameters['min_intrv1'], b=parameters['max_intrv1'], loc=parameters['mu1'], scale=parameters['sigma1'])
        rv2 = beta(a=parameters['min_intrv2'], b=parameters['max_intrv2'], loc=parameters['mu2'], scale=parameters['sigma2'])
        rv3 = beta(a=parameters['min_intrv3'], b=parameters['max_intrv3'], loc=parameters['mu3'], scale=parameters['sigma3'])
    elif parameters['distribution'] == 'triang':
        rv1 = triang(loc=parameters['min_intrv1'], scale=parameters['max_intrv1'], c=parameters['mu1'])
        rv2 = triang(loc=parameters['min_intrv2'], scale=parameters['max_intrv2'], c=parameters['mu2'])
        rv3 = triang(loc=parameters['min_intrv3'], scale=parameters['max_intrv3'], c=parameters['mu3'])
    else:
        print 'Distribution not recognized...abort'
        exit(1)

    A = parameters['A']
    Q = {i: 0 for i in xrange(3)}

    while A > 0:
        best_probability = -1
        best_retailer = -1
        for n, r in enumerate([rv1, rv2, rv3]):
            p = 1 - r.cdf(Q[n]+1)
            if p > best_probability:
                best_probability = p
                best_retailer = n

        Q[best_retailer] += 1
        A -= 1

    parameters['Q1'] = Q[0]
    parameters['Q2'] = Q[1]
    parameters['Q3'] = Q[2]

    return {'Q1': Q[0],
            'Q2': Q[1],
            'Q3': Q[2],
            'PROB': f3TruncNormRVSnp(parameters)}
Ejemplo n.º 11
0
def test_randomizedsearchcv_cv_results():
    """Test the cv_results attribute"""

    f = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_file(f, Reader('ml-100k'), rating_scale=(1, 5))
    kf = KFold(3, shuffle=True, random_state=4)
    param_distributions = {'n_epochs': [5], 'lr_all': uniform(.2, .3),
                           'reg_all': uniform(.4, .3), 'n_factors': [5],
                           'random_state': [0]}
    n_iter = 5
    rs = RandomizedSearchCV(SVD, param_distributions, n_iter=n_iter,
                            measures=['RMSE', 'mae'], cv=kf,
                            return_train_measures=True)
    rs.fit(data)

    # test keys split*_test_rmse, mean and std dev.
    assert rs.cv_results['split0_test_rmse'].shape == (n_iter,)
    assert rs.cv_results['split1_test_rmse'].shape == (n_iter,)
    assert rs.cv_results['split2_test_rmse'].shape == (n_iter,)
    assert rs.cv_results['mean_test_rmse'].shape == (n_iter,)
    assert np.allclose(rs.cv_results['mean_test_rmse'],
                       np.mean([rs.cv_results['split0_test_rmse'],
                                rs.cv_results['split1_test_rmse'],
                                rs.cv_results['split2_test_rmse']], axis=0))
    assert np.allclose(rs.cv_results['std_test_rmse'],
                       np.std([rs.cv_results['split0_test_rmse'],
                               rs.cv_results['split1_test_rmse'],
                               rs.cv_results['split2_test_rmse']], axis=0))

    # test keys split*_train_mae, mean and std dev.
    assert rs.cv_results['split0_train_rmse'].shape == (n_iter,)
    assert rs.cv_results['split1_train_rmse'].shape == (n_iter,)
    assert rs.cv_results['split2_train_rmse'].shape == (n_iter,)
    assert rs.cv_results['mean_train_rmse'].shape == (n_iter,)
    assert np.allclose(rs.cv_results['mean_train_rmse'],
                       np.mean([rs.cv_results['split0_train_rmse'],
                                rs.cv_results['split1_train_rmse'],
                                rs.cv_results['split2_train_rmse']], axis=0))
    assert np.allclose(rs.cv_results['std_train_rmse'],
                       np.std([rs.cv_results['split0_train_rmse'],
                               rs.cv_results['split1_train_rmse'],
                               rs.cv_results['split2_train_rmse']], axis=0))

    # test fit and train times dimensions.
    assert rs.cv_results['mean_fit_time'].shape == (n_iter,)
    assert rs.cv_results['std_fit_time'].shape == (n_iter,)
    assert rs.cv_results['mean_test_time'].shape == (n_iter,)
    assert rs.cv_results['std_test_time'].shape == (n_iter,)

    assert rs.cv_results['params'] is rs.param_combinations

    # assert that best parameter in rs.cv_results['rank_test_measure'] is
    # indeed the best_param attribute.
    best_index = np.argmin(rs.cv_results['rank_test_rmse'])
    assert rs.cv_results['params'][best_index] == rs.best_params['rmse']
    best_index = np.argmin(rs.cv_results['rank_test_mae'])
    assert rs.cv_results['params'][best_index] == rs.best_params['mae']
Ejemplo n.º 12
0
    def _random_pos(self, z_offset=0):
        """Random within-extent position generator.

        Returns:
            (tuple) of X,Y,Z uniform distributions.
        """
        return (stats.uniform(self.extent[0], self.extent[1]),
                stats.uniform(self.extent[2], self.extent[3]),
                stats.uniform(self.extent[4] + z_offset, self.extent[5]))
Ejemplo n.º 13
0
def optimize_predictor(predictor = None, data_matrix = None, target = None, n_iter_search = 20, cv = 3, scoring = "roc_auc", n_jobs = -1):
	param_dist = {"n_iter": randint(5, 100),
		"power_t": uniform(0.1),
		"alpha": uniform(1e-08,1e-03),
		"eta0" : uniform(1e-03,10),
		"penalty": ["l1", "l2", "elasticnet"],
		"learning_rate": ["invscaling", "constant","optimal"]}
	optclf = RandomizedSearchCV(predictor, param_distributions = param_dist, n_iter = n_iter_search, cv = cv, scoring = scoring, refit = True, n_jobs = n_jobs)
	optclf.fit(data_matrix, target)
	return optclf.best_estimator_
Ejemplo n.º 14
0
    def rvs(self, random_state=None):
        if random_state is None:
            exp = uniform(loc=self.lo, scale=self.scale).rvs()
        else:
            exp = uniform(loc=self.lo, scale=self.scale).rvs(random_state=random_state)

        if self.mass_on_zero > 0.0 and np.random.uniform() < self.mass_on_zero:
            return 0.0

        return self.base ** exp
Ejemplo n.º 15
0
    def test_ad_test(self):
        # Versus ad.test() from R goftest.
        result = ad_test((.1, .4, .7), uniform(0, 1))
        assert allclose(result, (.366028, .875957))
        result = ad_test((.1, .4, .7), norm(0, 1))
        assert allclose(result, (.921699, .390938))

        # Poles of the weight function.
        result = ad_test((0., .5), uniform(0, 1))
        assert allclose(result, (float('inf'), 0))
        result = ad_test((1., .5), uniform(0, 1))
        assert allclose(result, (float('inf'), 0))
Ejemplo n.º 16
0
def train(XTrain, yTrain, XPredict):
    params = {'C': uniform(1, 999),
              'gamma': uniform(0.01, 0.29),
              'kernel': ['rbf', 'poly']}
    kfold = cross_validation.KFold(len(XTrain), n_folds=3, shuffle=False)
    svr = svm.SVR()
    clf = grid_search.RandomizedSearchCV(svr, param_distributions=params, n_iter=20,
                                         cv=kfold, scoring='mean_squared_error', n_jobs=-1)
    clf.fit(XTrain, yTrain)  # 一次性训练模型
    # print clf.best_score_, clf.best_estimator_
    yPredict = clf.predict(XPredict)
    return yPredict, clf.best_params_
Ejemplo n.º 17
0
 def testUniform4(self):
     distV = uniform()
     distW = uniform(scale=2)
     v = 0.7
     w = 1.4
     expected = ([0,1],[0,1.2])
     bids = [v,w]
     distributions = [distV,distW]
     obtained = myersonAuction(bids,distributions)
     self.assertAlmostEqual(expected, obtained, 
                            msg="Myerson auction with inputs: " + str(bids) + ", " +
                                  str(distributions) + ". Expected " + str(expected) + 
                                 "but obtained " + str(obtained) + ".")
Ejemplo n.º 18
0
def test_legendre_consistency():
    import scipy.stats as stats
    dist = stats.uniform(-1, 1 - -1)
    p = LegendrePolynomials(normalised=False)
    _check_poly_consistency(p, dist)

    p = LegendrePolynomials(a=2, b=5, normalised=False)
    dist = stats.uniform(2, 5 - 2)
    _check_poly_consistency(p, dist)

    p = LegendrePolynomials(a= -2.5, b= -1.2, normalised=True)
    assert_equal(p.norm(4, False), 1)
    dist = stats.uniform(-2.5, -1.2 - -2.5)
    _check_poly_consistency(p, dist)
Ejemplo n.º 19
0
def train(XTrain, yTrain, XPredict):
    XTrain = np.array(XTrain, dtype=float)
    yTrain = np.array(yTrain, dtype=float)
    params = {'C': uniform(1, 999),
              'gamma': uniform(0.01, 0.29),
              'kernel': ['rbf', 'poly']}
    kfold = cross_validation.KFold(len(XTrain), n_folds=4, shuffle=False)
    svr = svm.SVR()
    clf = grid_search.RandomizedSearchCV(svr, param_distributions=params, n_iter=20,
                                         cv=kfold, scoring='mean_squared_error', n_jobs=-1)
    yPredict = []
    for i in range(yTrain.shape[1]):
        clf.fit(XTrain, yTrain[:, i])  # 训练distance个模型
        yPredict.extend(clf.predict(XPredict))
    return np.array(yPredict)
Ejemplo n.º 20
0
def generateToy():

  np.random.seed(12345)

  fig,ax = plt.subplots(4,sharex=True)
  #fig,ax = plt.subplots(2)

  powerlaw_arg = 2
  triang_arg=0.7
  n_samples = 500
  #generate simple line with slope 1, from 0 to 1
  frozen_powerlaw = powerlaw(powerlaw_arg) #powerlaw.pdf(x, a) = a * x**(a-1)
  #generate triangle with peak at 0.7
  frozen_triangle = triang(triang_arg) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
  frozen_uniform = uniform(0.2,0.5)
  frozen_uniform2 = uniform(0.3,0.2)

  x = np.linspace(0,1)

  signal = np.random.normal(0.5, 0.1, n_samples/2)

  data_frame = pd.DataFrame({'powerlaw':powerlaw.rvs(powerlaw_arg,size=n_samples),
    'triangle':triang.rvs(triang_arg,size=n_samples),
    'uniform':np.concatenate((uniform.rvs(0.2,0.5,size=n_samples/2),uniform.rvs(0.3,0.2,size=n_samples/2))),
    'powerlaw_signal':np.concatenate((powerlaw.rvs(powerlaw_arg,size=n_samples/2),signal))})

  ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf')
  hist(data_frame['powerlaw'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[0])
  #hist(data_frame['powerlaw'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[0])
  ax[0].legend(loc = 'best')

  ax[1].plot(x, frozen_triangle.pdf(x), 'k-', lw=2, label='triangle pdf')
  hist(data_frame['triangle'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[1])
  hist(data_frame['triangle'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[1])
  ax[1].legend(loc = 'best')

  #ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf')
  hist(data_frame['powerlaw_signal'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[2])
  #hist(data_frame['powerlaw_signal'],bins='blocks',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[2])
  ax[2].legend(loc = 'best')

  ax[3].plot(x, frozen_uniform.pdf(x)+frozen_uniform2.pdf(x), 'k-', lw=2, label='uniform pdf')
  hist(data_frame['uniform'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[3])
  #hist(data_frame['uniform'],bins='blocks',fitness = 'poly_events',p0=0.05,normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[3])
  ax[3].legend(loc = 'best')

  plt.show()
  fig.savefig('plots/toy_plots.png')
Ejemplo n.º 21
0
    def unpack(self, relation_a_b_beginning, relation_a_b_ending):
        before_a_b_beginning, same_a_b_beginning, after_a_b_beginning = relation_a_b_beginning
        before_a_b_ending, same_a_b_ending, after_a_b_ending = relation_a_b_ending

        if almost_equals(before_a_b_beginning + same_a_b_beginning + same_a_b_ending + after_a_b_ending, 2.0, epsilon):
            return [], uniform_reference   # Inconsistent

        if almost_equals(before_a_b_beginning + before_a_b_ending, 2.0, epsilon):
            return [], uniform_reference

        if almost_equals(after_a_b_beginning + after_a_b_ending, 2.0, epsilon):
            return [], uniform_reference

        if almost_equals(before_a_b_ending, 1.0, epsilon):
            return self.unpack_partial(relation_a_b_beginning), uniform_reference

        if almost_equals(after_a_b_beginning, 1.0, epsilon):
            return self.unpack_partial(relation_a_b_ending), uniform_reference

        a_possibilities = self.unpack_partial(relation_a_b_beginning)
        if len(a_possibilities) == 1:
            a_possibility = a_possibilities[0]

            return
        else:
            a_possibility_1, a_possibility_2 = a_possibilities
            a_possibility = a_possibility_1
            if a_possibility.args[1] < a_possibility_2.args[1]:
                a_possibility = a_possibility_2

        a_start_point, length_a = a_possibility.args

        if before_a_b_ending * same_a_b_ending * after_a_b_ending > 0:
            if almost_equals(before_a_b_beginning, 0, epsilon):
                length_b_ending = length_a * same_a_b_ending ** 2
            else:
                length_b_ending = same_a_b_ending ** 2 / same_a_b_beginning ** 2
            b_ending = uniform(a_start_point + before_a_b_ending * (length_a - length_b_ending) /
                               (before_a_b_ending + after_a_b_ending), length_b_ending)
            return [a_possibility], b_ending
        else:
            denominator = length_a * same_a_b_ending ** 2
            length_b_ending_lower_bound = (length_a * same_a_b_ending ** 2) ** 2 / denominator
            length_b_ending_upper_bound = (a_start_point + length_a - 1) ** 2 / denominator
            length_b_ending = (length_b_ending_lower_bound + length_b_ending_upper_bound) / 2.0
            b_start_point = a_start_point + length_a - same_a_b_ending * sqrt(length_b_ending * length_a)
            b_ending = uniform(b_start_point, length_b_ending)
            return [a_possibility], b_ending
Ejemplo n.º 22
0
def test_random_grid():
    # get our train/test
    X_train, X_test, y_train, y_test = train_test_split(X, iris.target, train_size=0.75, random_state=42)

    # default CV does not shuffle, so we define our own
    custom_cv = KFold(n=y_train.shape[0], n_folds=5, shuffle=True, random_state=42)

    # build a pipeline
    pipe = Pipeline([
        ('retainer'    , FeatureRetainer()), # will retain all
        ('dropper'     , FeatureDropper()),  # won't drop any
        ('mapper'      , FunctionMapper()),  # pass through
        ('encoder'     , OneHotCategoricalEncoder()), # no object dtypes, so will pass through
        ('collinearity', MulticollinearityFilterer(threshold=0.85)),
        ('imputer'     , SelectiveImputer()), # pass through
        ('scaler'      , SelectiveScaler()),
        ('boxcox'      , BoxCoxTransformer()),
        ('nzv'         , NearZeroVarianceFilterer(threshold=1e-4)),
        ('pca'         , SelectivePCA(n_components=0.9)),
        ('model'       , RandomForestClassifier(n_jobs=1))
    ])

    # let's define a set of hyper-parameters over which to search
    hp = {
        'collinearity__threshold' : uniform(loc=.8, scale=.15),
        'collinearity__method'    : ['pearson','kendall','spearman'],
        'scaler__scaler'          : [StandardScaler(), RobustScaler()],
        'pca__n_components'       : uniform(loc=.75, scale=.2),
        'pca__whiten'             : [True, False],
        'model__n_estimators'     : randint(5,100),
        'model__max_depth'        : randint(2,25),
        'model__min_samples_leaf' : randint(1,15),
        'model__max_features'     : uniform(loc=.5, scale=.5),
        'model__max_leaf_nodes'   : randint(10,75)
    }

    # define the gridsearch
    search = RandomizedSearchCV(pipe, hp,
                                n_iter=2, # just to test it even works
                                scoring='accuracy',
                                cv=custom_cv,
                                random_state=42)

    # fit the search
    search.fit(X_train, y_train)

    # test the report
    the_report = report_grid_score_detail(search, charts=False)
Ejemplo n.º 23
0
    def _lhs(self, dist, parms, siz=100):
        """
        Latin Hypercube sampling of any distribution.

        modified from code found `online <http://code.google.com/p/bayesian-inference/source/browse/trunk/BIP/Bayes/lhs.py?r=3cfbbaa5806f2b8cc9e2457d967b0a58a3ce459c>`_.
    
        :param dist: random number generator from `scipy.stats <http://docs.scipy.org/doc/scipy/reference/stats.html>`_
        :param parms: tuple of parameters as required for dist.
        :param siz: number or shape tuple for the output sample
    
        """
        if not isinstance(dist, (stats.rv_discrete, stats.rv_continuous)):
            raise TypeError("dist is not a scipy.stats distribution object")
        # number of samples
        n = siz
        if isinstance(siz, (tuple, list)):
            n = np.product(siz)

        perc = np.arange(0, 1.0, 1.0 / n)
        np.random.shuffle(perc)
        smp = [stats.uniform(i, 1.0 / n).rvs() for i in perc]
        v = dist(*parms).ppf(smp)

        if isinstance(siz, (tuple, list)):
            v.shape = siz
        return v
Ejemplo n.º 24
0
def make_theta_gen():
    """
    Generates prior for theta = angle between x axis and semi-MAJOR axis for the gaussian
    population. Returns angle in radians
    """
    # Will allow angle between 0 and 180 degrees (in radians)
    return stats.uniform(loc=0, scale=3.14159)
def gillespie_logistique2(taille_ini, b1,b2,d1,d2,temps):
    """une autre implémentation de l'algorithme de Gillepie
    on ne conserve la taille qu'à des instants prédéfinis"""
    taille = zeros(temps.size) # préalocation de la mémoire
    # initialisation des temps et taille courantes
    temps_courant, taille_courante = 0.0, taille_ini 
    t_nais = (b1 + b2 * taille_courante) * taille_courante # taux de naissance
    t_mort = (d1 + d2 * taille_courante) * taille_courante # taux de mort
    tau    = t_nais + t_mort                               # taux global
    ee = expon()
    uu = uniform()
    delta_temps = ee.rvs() / tau
    for k in range(temps.size):
        # on simule sans dépasser temps[k]
        while temps_courant + delta_temps < temps[k]: 
            temps_courant += delta_temps  # mise à jour instant courant
            if uu.rvs() < (b1 * taille_courante) / tau:
                taille_courante += 1 # naissance
            else:
                taille_courante -= 1 # mort
            t_nais = (b1 + b2 * taille_courante) * taille_courante # taux de naissance
            t_mort = (d1 + d2 * taille_courante) * taille_courante # taux de mort
            tau    = t_nais + t_mort                               # taux global
            delta_temps = ee.rvs() / tau                           # temps de séjour
        taille[k] = taille_courante
    return taille
Ejemplo n.º 26
0
 def testUniformStd(self):
   with self.test_session():
     a = 10.0
     b = 100.0
     uniform = uniform_lib.Uniform(a=a, b=b)
     s_uniform = stats.uniform(loc=a, scale=b - a)
     self.assertAllClose(uniform.stddev().eval(), s_uniform.std())
Ejemplo n.º 27
0
 def _rvs_helper(self):
   num_samples = 10000
   xs = gauss(0, 1).rvs((num_samples, 3))
   xs = divide(xs, reshape(norm(xs, 1), (num_samples, 1)))
   pvalues = self.pdf(xs, normalize=False)
   fmax = self.pdf_max(normalize=False)
   return xs[uniform(0, fmax).rvs(num_samples) < pvalues]
Ejemplo n.º 28
0
def create_training_set ( parameters, minvals, maxvals, n_train=200 ):
    """Creates a traning set for a set of parameters specified by 
    ``parameters`` (not actually used, but useful for debugging
    maybe). Parameters are assumed to be uniformly distributed
    between ``minvals`` and ``maxvals``. ``n_train`` input parameter
    sets will be produced, and returned with the actual distributions
    list. The latter is useful to create validation sets.

    Parameters
    -------------
    parameters: list
        A list of parameter names
    minvals: list
        The minimum value of the parameters. Same order as ``parameters``
    maxvals: list
        The maximum value of the parameters. Same order as ``parameters``
    n_train: int
        How many training points to produce

    Returns
    ---------
    The training set and a distributions object that can be used by
    ``create_validation_set``
    """

    distributions = []
    for i,p in enumerate(parameters):
        distributions.append ( ss.uniform ( loc=minvals[i], \
                            scale=(maxvals[i]-minvals[i] ) ) )
    samples = lhd ( dist=distributions, size=n_train )
    return samples, distributions
Ejemplo n.º 29
0
    def prior(self):
        """ Loops through the parameters
        """

        priorz = []

        for key in self.ordered_keys:

            prior_key = self.prior_dict[key]

            if prior_key['shape'] == 'uniform':

                loc = prior_key['min']
                scale = prior_key['max'] - prior_key['min']

                priorz.append( uniform(loc, scale))

            elif prior_key['shape'] == 'gauss':
                loc = prior_key['mean']
                scale = prior_key['stddev']

                priorz.append( norm(loc, scale) )

            #else:
            #    raise ValueError("Not specified")

        return priorz
Ejemplo n.º 30
0
def lincombo_hierregress_taubybeta(yy, stderrs, XX, maxtau=None, guess_range=False, draws=100):
    yy, stdvars, XX = helpers.check_arguments(yy, stderrs, XX)
    nummus = XX.shape[1]

    print "Sampling tau..."

    if maxtau is None:
        maxtau = pooling.estimated_maxlintau(yy, stderrs, XX)
        print "Using maximum tau =", maxtau

    if maxtau[0] > 0:
        probability_prior_tau = uniform(0, maxtau)

        # Prepare to sample from from p(tau | yy)

        # Create pdf for p(tau | yy)
        def pdf(tau):
            # Requires mus, but is invarient to them
            return probability_tau([np.mean(yy)] * nummus, tau, yy, stdvars, XX, probability_prior_tau)

        dist = ContinuousSampled(pdf, 2)
        if guess_range:
            mini, maxi = dist.guess_ranges([0, 0], maxtau, draws * 10)
        else:
            mini, maxi = 0, maxtau
        dist.prepare_draws(mini, maxi, count=draws)
    else:
        # maxtau == 0
        dist = MultivariateDelta(np.zeros(2))

    print "Sampling mus..."

    return sample_posterior(yy, stderrs, XX, dist, draws)
Ejemplo n.º 31
0
    def __init__(self):
        self.prv = uniform(0, 1)

        self.qfx = np.vectorize(self.qf)
Ejemplo n.º 32
0
net = NeuralNetClassifier(
    MLP,
    criterion=nn.CrossEntropyLoss,
    max_epochs=30,
    lr=0.1,
    module__input_size=11,
    module__num_classes=3,
    device='cuda'
)

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from scipy.stats import uniform

params = {
    'net__lr': uniform(loc=0, scale=0.2),
    'net__module__hidden_size': randint(100, 1000),
    'net__optimizer__weight_decay': uniform(loc=0, scale=0.1),
    'net__batch_size': randint(10, 200)
}

model = Pipeline(steps=[("scaler",StandardScaler()), ("net",net)])

rs = RandomizedSearchCV(model, params, refit=True, cv=3, scoring='accuracy', n_iter=100, n_jobs=-1)

rs.fit(train_x, train_y)
print(rs.best_score_, rs.best_params_)

print(rs.score(test_x, test_y))
Ejemplo n.º 33
0
from matplotlib import pyplot
from scipy.stats import norm, laplace, poisson, cauchy, uniform
import numpy

if __name__ == "__main__":

    sizes = [10, 50, 1000]
    densities = [
        norm(loc=0, scale=1),
        laplace(scale=1 / numpy.sqrt(2), loc=0),
        poisson(10),
        cauchy(),
        uniform(loc=-numpy.sqrt(3), scale=2 * numpy.sqrt(3))
    ]
    names = ["Normal", "Laplace", "Poisson", "Cauchy", "Uniform"]

    for size in sizes:
        n = norm.rvs(loc=0, scale=1, size=size)
        l = laplace.rvs(scale=1 / numpy.sqrt(2), loc=0, size=size)
        p = poisson.rvs(10, size=size)
        c = cauchy.rvs(size=size)
        u = uniform.rvs(loc=-numpy.sqrt(3), scale=2 * numpy.sqrt(3), size=size)
        distributions = [n, l, p, c, u]
        build = list(zip(distributions, densities, names))

        for histogram, density, name in build:
            fig, ax = pyplot.subplots(1, 1)
            ax.hist(histogram,
                    density=True,
                    histtype='stepfilled',
                    alpha=0.6,
Ejemplo n.º 34
0
    def set_pipeline(self):

        if self.reference == 'a':

            if self.year == '2009':
                categorical_features_1 = [
                    'category_code', 'country_code', 'state_code',
                    'founded_at', 'timediff_founded_series_a',
                    'time_diff_series_a_now'
                ]

            elif self.year == '2014':
                categorical_features_1 = [
                    'category_code', 'country_code', 'state_code',
                    'founded_at', 'timediff_founded_series_a',
                    'time_diff_series_a_now'
                ]  #first use imputer /after ohe

            categorical_features_2 = [
                'participants_a', 'raised_amount_usd_a', 'rounds_before_a',
                'mean_comp_worked_before', 'founder_count', 'degree_count'
            ]  # impute first, after ordinals

            booleans_features = [
                'graduate', 'undergrad', 'professional', 'MBA_bool', 'cs_bool',
                'phd_bool', 'top_20_bool', 'mean_comp_founded_before',
                'female_ratio'
            ]  # ordinals/binaries

            #Defining imputers
            imputer = self.get_imputer()
            imputer_2 = SimpleImputer(strategy='most_frequent')

            #pipes for each feature

            pipe_1 = Pipeline([('imputer', imputer_2),
                               ('ohe', OneHotEncoder(handle_unknown='ignore'))
                               ])

            pipe_2 = Pipeline([('imputer_ord', imputer),
                               ('ord_encoder',
                                OneHotEncoder(handle_unknown='ignore'))])

            pipe_bool = Pipeline([('imputer_bool', imputer_2),
                                  ('ord_encoder',
                                   OneHotEncoder(handle_unknown='ignore'))])

            #process

            feateng_blocks = [('cat_ohe', pipe_1, categorical_features_1),
                              ('cat_ord', pipe_2, categorical_features_2),
                              ('cat_bool', pipe_bool, booleans_features)]

        elif self.reference == 0:

            if self.year == '2014':
                categorical_features_1 = [
                    'category_code', 'country_code', 'state_code',
                    'founded_at', 'timediff_founded_series_0',
                    'time_diff_series_0_now'
                ]  #first use imputer /after ohe

            categorical_features_2 = [
                'participants_0', 'raised_amount_usd_0', 'participants_0',
                'mean_comp_worked_before', 'founder_count', 'degree_count'
            ]  # impute first, after ordinals

            booleans_features = [
                'graduate', 'undergrad', 'professional', 'MBA_bool', 'cs_bool',
                'phd_bool', 'top_20_bool', 'mean_comp_founded_before',
                'female_ratio'
            ]  # ordinals/binaries

            #Defining imputers
            imputer = self.get_imputer()
            imputer_2 = SimpleImputer(strategy='most_frequent')

            #pipes for each feature

            pipe_1 = Pipeline([('imputer', imputer_2),
                               ('ohe', OneHotEncoder(handle_unknown='ignore'))
                               ])

            pipe_2 = Pipeline([('imputer_ord', imputer),
                               ('ord_encoder',
                                OneHotEncoder(handle_unknown='ignore'))])

            pipe_bool = Pipeline([('imputer_bool', imputer_2),
                                  ('ord_encoder',
                                   OneHotEncoder(handle_unknown='ignore'))])
            #process

            feateng_blocks = [('cat_1', pipe_1, categorical_features_1),
                              ('cat_2', pipe_2, categorical_features_2),
                              ('cat_bool', pipe_bool, booleans_features)]

        #Columntransformer keeping order
        preprocessor = ColumnTransformer(feateng_blocks,
                                         remainder='passthrough')

        #final_pipeline
        self.pipeline = Pipeline(
            steps=[('preprocessing',
                    preprocessor), ('model_use', self.get_estimator())])

        if self.smote:

            smote = ADASYN(sampling_strategy='minority', n_neighbors=20)
            self.pipeline = Pipeline_imb([('prep', preprocessor),
                                          ('smote', smote),
                                          ('model_use', self.get_estimator())])

        # Random search
        if self.grid_search_choice:
            grid_search = RandomizedSearchCV(
                self.pipeline,
                param_distributions={
                    "model_use__learning_rate": uniform(0, 1),
                    "model_use__gamma": uniform(0, 2),
                    "model_use__max_depth": randint(1, 15),
                    "model_use__colsample_bytree": randint(0.1, 1),
                    "model_use__subsample": [0.2, 0.4, 0.5],
                    "model_use__reg_alpha": uniform(0, 1),
                    "model_use__reg_lambda": uniform(1, 10),
                    "model_use__min_child_weight": randint(1, 10),
                    "model_use__n_estimators": randint(1000, 3000)
                },  #param depending of the model to use
                cv=35,
                scoring='f1',
                n_iter=10,
                n_jobs=-1)

            grid_search.fit(self.X_train, self.y_train)

            self.pipeline = grid_search.best_estimator_
            self.grid_params = grid_search.get_params

            self.set_tag('model_used', self.pipeline)
Ejemplo n.º 35
0
def init_classifier(model="LDA"):
    if model == "LDA":
        clf = LinearDiscriminantAnalysis()
        distributions = dict()
    elif model == "KNN":
        clf = KNeighborsClassifier()
        distributions = dict(
            classifier__n_neighbors=np.arange(1, 16, 1),
            classifier__weights=["uniform", "distance"],
            classifier__metric=["minkowski", "euclidean", "manhattan"],
        )
    elif model == "SVM":
        clf = SVC(kernel="linear")
        distributions = {
            "classifier__C": [
                0.1,
                0.5,
                1,
                3,
                10,
                50,
                100,
                200,
                500,
                1000,
            ],  # uniform(loc=0, scale=100),
            "classifier__gamma": [5, 2, 1, 0.01, 0.001, 0.0001, 0.00001],
            "classifier__kernel":
            ["linear"],  # "rbf", "poly", "sigmoid", "linear"],
            "classifier__max_iter": [100, 500,
                                     1000],  # , 200, 300, 400, 500, 1000],
        }
    elif model == "DT":
        clf = DecisionTreeClassifier()
        distributions = dict(
            classifier__criterion=["gini", "entropy"],
            classifier__splitter=["best", "random"],
        )
    elif model == "LR":
        clf = LogisticRegression()
        distributions = dict(
            classifier__C=uniform(loc=0, scale=4),
            classifier__penalty=["l2", "l1", "elasticnet", "none"],
            classifier__solver=[
                "newton-cg", "lbfgs", "liblinear", "sag", "saga"
            ],
            classifier__multi_class=["auto", "ovr", "multinomial"],
            classifier__max_iter=[100, 200, 300, 400, 500, 1000],
        )
    elif model == "XGBC":
        clf = XGBClassifier()
        distributions = dict()
    elif model == "RF":
        clf = RandomForestClassifier()
        distributions = {
            "classifier__n_estimators": [10],  # mettre dautres valeurs
            "classifier__max_depth": [1, 2, 4, 8, 12, 16],
            "classifier__min_samples_split": [2, 4, 6, 8, 10, 12, 14, 16],
            "classifier__max_features": [0.25],
        }
    return clf, distributions
Ejemplo n.º 36
0
def augmented_data(
    f_sub=None,
    beta=None,
    f_sub_alt=None,
    beta_alt=None,
    f_sub_ref=None,
    beta_ref=None,
    f_sub_prior=uniform(0.001, 0.199),
    beta_prior=uniform(-2.5, 1.0),
    n_images=None,
    n_thetas_marginal=1000,
    draw_host_mass=True,
    draw_host_redshift=True,
    draw_alignment=True,
    mine_gold=True,
    calculate_dx_dm=False,
    return_dx_dm=False,
    roi_size=2.,
):
    """ Wraps around the population simulation, starts the simulation with parameters drawn from the prior and
    "mines the gold" appropriately """

    # Input
    if (f_sub is None or beta is None) and n_images is None:
        raise ValueError(
            "Either f_sub and beta or n_images have to be different from None")
    if n_images is None:
        n_images = len(f_sub)
    n_verbose = max(1, n_images // 100)

    # Hypothesis for sampling
    beta, f_sub = _draw_params(beta, beta_prior, f_sub, f_sub_prior, n_images)

    # Alternate hypothesis (test hypothesis when swapping num - den)
    beta_alt, f_sub_alt = _draw_params(beta_alt, beta_prior, f_sub_alt,
                                       f_sub_prior, n_images)

    # Reference hypothesis
    beta_ref, f_sub_ref = _draw_params(beta_alt, beta_prior, f_sub_alt,
                                       f_sub_prior, n_thetas_marginal - 1)
    params_ref = np.vstack((f_sub_ref, beta_ref)).T

    # Output
    all_params, all_params_alt, all_images = [], [], []
    all_t_xz, all_t_xz_alt, all_log_r_xz, all_log_r_xz_alt = [], [], [], []
    all_sub_latents, all_global_latents = [], []
    all_dx_dm = []

    # Main loop
    for i_sim in range(n_images):
        if (i_sim + 1) % n_verbose == 0:
            logger.info("Simulating image %s / %s", i_sim + 1, n_images)
        else:
            logger.debug("Simulating image %s / %s", i_sim + 1, n_images)

        # Prepare params
        this_f_sub = _pick_param(f_sub, i_sim, n_images)
        this_beta = _pick_param(beta, i_sim, n_images)
        this_f_sub_alt = _pick_param(f_sub_alt, i_sim, n_images)
        this_beta_alt = _pick_param(beta_alt, i_sim, n_images)

        params = np.asarray([this_f_sub, this_beta]).reshape((1, 2))
        params_alt = np.asarray([this_f_sub_alt, this_beta_alt]).reshape(
            (1, 2))
        params_eval = np.vstack(
            (params, params_alt, params_ref)) if mine_gold else None

        logger.debug("Numerator hypothesis: f_sub = %s, beta = %s", this_f_sub,
                     this_beta)

        if mine_gold:
            logger.debug("Evaluating joint log likelihood at %s", params_eval)

        # Simulate
        sim = LensingObservationWithSubhalos(
            m_200_min_sub=1.0e7 * M_s,
            m_200_max_sub_div_M_hst=0.01,
            m_min_calib=1.0e7 * M_s,
            m_max_sub_div_M_hst_calib=0.01,
            f_sub=this_f_sub,
            beta=this_beta,
            params_eval=params_eval,
            calculate_joint_score=mine_gold,
            draw_host_mass=draw_host_mass,
            draw_host_redshift=draw_host_redshift,
            draw_alignment=draw_alignment,
            calculate_msub_derivatives=calculate_dx_dm,
            roi_size=roi_size,
        )

        # Store information
        if calculate_dx_dm:
            sum_abs_dx_dm = np.sum(np.abs(sim.grad_msub_image).reshape(
                sim.grad_msub_image.shape[0], -1),
                                   axis=1)
            sub_latents = np.vstack(
                (sim.m_subs, sim.theta_xs, sim.theta_ys, sum_abs_dx_dm)).T
            if return_dx_dm:
                all_dx_dm.append(sim.grad_msub_image)
        else:
            sub_latents = np.vstack((sim.m_subs, sim.theta_xs, sim.theta_ys)).T
        global_latents = [
            sim.M_200_hst,  # Host mass
            sim.D_l,  # Host distance
            sim.z_l,  # Host redshift
            sim.sigma_v,  # sigma_V
            sim.theta_x_0,  # Source offset x
            sim.theta_y_0,  # Source offset y
            sim.theta_E,  # Host Einstein radius
            sim.n_sub_roi,  # Number of subhalos
            sim.f_sub_realiz,  # Fraction of halo mass in subhalos
            sim.
            n_sub_in_ring,  # Number of subhalos with r < 90% of host Einstein radius
            sim.
            f_sub_in_ring,  # Fraction of halo mass in subhalos with r < 90% of host Einstein radius
            sim.
            n_sub_near_ring,  # Number of subhalos with r within 10% of host Einstein radius
            sim.
            f_sub_near_ring,  # Fraction of halo mass in subhalos with r within 10% of host Einstein radius
        ]
        global_latents = np.asarray(global_latents)

        all_params.append(params)
        all_params_alt.append(params_alt)
        all_images.append(sim.image_poiss_psf)
        all_sub_latents.append(sub_latents)
        all_global_latents.append(global_latents)

        if mine_gold:
            all_log_r_xz.append(_extract_log_r(sim, 0, n_thetas_marginal))
            all_log_r_xz_alt.append(_extract_log_r(sim, 1, n_thetas_marginal))
            all_t_xz.append(sim.joint_scores[0])
            all_t_xz_alt.append(sim.joint_scores[1])

    if calculate_dx_dm and return_dx_dm:
        return (
            np.array(all_params).reshape((-1, 2)),
            np.array(all_params_alt).reshape((-1, 2)),
            np.array(all_images),
            np.array(all_t_xz) if mine_gold else None,
            np.array(all_t_xz_alt) if mine_gold else None,
            np.array(all_log_r_xz) if mine_gold else None,
            np.array(all_log_r_xz_alt) if mine_gold else None,
            all_sub_latents,
            np.array(all_global_latents),
        )
    return (
        np.array(all_params).reshape((-1, 2)),
        np.array(all_params_alt).reshape((-1, 2)),
        np.array(all_images),
        np.array(all_t_xz) if mine_gold else None,
        np.array(all_t_xz_alt) if mine_gold else None,
        np.array(all_log_r_xz) if mine_gold else None,
        np.array(all_log_r_xz_alt) if mine_gold else None,
        all_sub_latents,
        np.array(all_global_latents),
    )
Ejemplo n.º 37
0
 def __init__(self, target_F):
     self.target_f = target_F
     self.uniform = stats.uniform()
Ejemplo n.º 38
0
            while i<n:
                d = min(n-i, 750)
                u = self.uniform.rvs(size=d)
                sample = np.concatenate( [sample, fsolve( lambda y:self.target_f(y) - u, 0.5*np.ones(d) )] )
                i+=d
            return sample
        
        
        
if __name__=="__main__":
    F = lambda x: (x+x**2 + x**(5))/3.
    f = lambda x: (1 + 2*x + 5*x**(4))/3.
    N = 1e4
    print "Testing AR Method."
    print "Generate %d variables:"%N
    g = stats.uniform()
    M = f(1)
    ar = AR_method( target_f = f, sample_g = g, M = M)

    start = time.clock()
    ar_test = ar.generateIII( N )
    print "Mean: %.3f, time taken: %.2f seconds"%(ar_test.mean(), time.clock() - start )

    print
    print "Testing Inverse Method."
    print "Generate %d variables:"%N
    iv = Inversion_method( target_F = F)

    start = time.clock()
    iv_test = iv.generateII( N )
    print "Mean: %.3f, time taken: %.2f seconds"%(iv_test.mean(), time.clock() - start )
Ejemplo n.º 39
0
    def randomised_search(self):
        print_to_consol('Running randomized search to find best classifier')

        #create the decision forest
        clf1 = DecisionTreeClassifier(random_state=20,
                                      class_weight='balanced',
                                      max_features=self.numf)

        ada = AdaBoostClassifier(base_estimator=clf1,
                                 algorithm="SAMME.R",
                                 random_state=55)

        logging.info(f'Initialised classifier using balanced class weights \n')

        #set up randomized search
        param_dict = {
            'base_estimator__criterion': ['gini', 'entropy'],
            'n_estimators': randint(100,
                                    10000),  #number of base estimators to use
            'learning_rate': uniform(0.0001, 1.0),
            'base_estimator__min_samples_split': randint(2, 20),
            'base_estimator__max_depth': randint(1, 10),
            'base_estimator__min_samples_leaf': randint(1, 20),
            'base_estimator__max_leaf_nodes': randint(10, 20)
        }

        logging.info(
            f'Following parameters will be explored in randomized search \n'
            f'{param_dict} \n')

        #building and running the randomized search
        rand_search = RandomizedSearchCV(ada,
                                         param_dict,
                                         random_state=5,
                                         cv=self.cv,
                                         n_iter=self.numc,
                                         scoring='accuracy',
                                         n_jobs=-1)

        rand_search_fitted = rand_search.fit(self.X_train, self.y_train)

        best_parameters = rand_search_fitted.best_params_
        best_scores = rand_search_fitted.best_score_

        logging.info(
            f'Running randomised search for best patameters of classifier \n'
            f'Best parameters found: {best_parameters} \n'
            f'Best accuracy scores found: {best_scores} \n')

        self.model = rand_search_fitted.best_estimator_

        datestring = datetime.strftime(datetime.now(), '%Y%m%d_%H%M')
        joblib.dump(
            self.model,
            os.path.join(self.directory,
                         'best_predictor_' + datestring + '.pkl'))

        logging.info(f'Writing best classifier to disk in {self.directory} \n')

        print_to_consol(
            'Getting 95% confidence interval for uncalibrated classifier')

        alpha, upper, lower = get_confidence_interval(
            self.X_train, self.y_train, self.X_test, self.y_test, self.model,
            self.directory, self.bootiter, 'uncalibrated')

        logging.info(f'{alpha}% confidence interval {upper}% and {lower}% \n'
                     f'for uncalibrated classifier. \n')

        print_to_consol('Getting feature importances for best classifier')

        best_clf_feat_import = self.model.feature_importances_
        best_clf_feat_import_sorted = sorted(zip(best_clf_feat_import,
                                                 self.X_train.columns),
                                             reverse=True)

        logging.info(
            f'Feature importances for best classifier {best_clf_feat_import_sorted} \n'
        )

        all_clf_feat_import_mean = np.mean(
            [tree.feature_importances_ for tree in self.model.estimators_],
            axis=0)
        all_clf_feat_import_mean_sorted = sorted(zip(all_clf_feat_import_mean,
                                                     self.X_train.columns),
                                                 reverse=True)

        print_to_consol('Plotting feature importances for best classifier')

        feature_importances_best_estimator(best_clf_feat_import_sorted,
                                           self.directory)
        logging.info(
            f'Plotting feature importances for best classifier in decreasing order \n'
        )
        feature_importances_error_bars(self.model, self.X_train.columns,
                                       self.directory)
        logging.info(
            f'Plotting feature importances for best classifier with errorbars \n'
        )
Ejemplo n.º 40
0
def make_pi_gen():
    """
    piC = fraction of stars belonging to the gaussian
    """
    return stats.uniform(loc=0, scale=1)  #Uniform distribution from 0 - 1
Ejemplo n.º 41
0
# Let's use **logit** or **inverse sigmoid** transformation to map the support to real number line. Mathematically, $\zeta=logit(\theta)$.
#
# $$
# P(\zeta) = P(T^{-1}(\zeta)) |det J_{T^{-1}}(\zeta)|\\
# P(\zeta) = P(sig(\zeta)) * sig(\zeta) * (1-sig(\zeta))
# $$
#
# where $sig$ is the sigmoid function.
#
# Converting this directly into Python code -

# +
theta = np.linspace(0, 1, 100)
zeta = np.linspace(-5, 5, 100)

dist = uniform()
p_theta = dist.pdf(theta)
sigmoid = sp.special.expit
p_zeta = dist.pdf(sigmoid(zeta)) * sigmoid(zeta) * (1 - sigmoid(zeta))

plot_transformation(theta, zeta, p_theta, p_zeta)
# -

# ### Mean Field ADVI Example
#
# Infer $\mu$ and $\sigma$ for Normal distribution.

# Generating data
mu = 12
sigma = 2.2
data = np.random.normal(mu, sigma, size=200)
Ejemplo n.º 42
0
        val_errors.append(mean_squared_error(y_val_predict, y_val))

    plt.plot(np.sqrt(train_errors), "r-", linewidth=2, label="train")

    plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")

    plt.ylim(0, 1)
    plt.legend(loc="upper right", fontsize=14)
    plt.xlabel("Training set size", fontsize=14)
    plt.ylabel("RMSE(log(y))", fontsize=14)


## Run
plot_learning_curves(SVR(), train_prepared, price_labels)
#%%
param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(),
                                   param_distributions,
                                   n_iter=10,
                                   verbose=2,
                                   random_state=42)

rnd_search_cv.fit(train_prepared, price_labels)

#%%
rnd_search_cv.best_estimator_

#%%

#Random Forest Regressor
from sklearn.linear_model import Ridge
from sklearn import linear_model, datasets
from sklearn.model_selection import RandomizedSearchCV

# Load data
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Create logistic regression
logistic = linear_model.LogisticRegression()

# Create regularization penalty space
penalty = ['l1', 'l2']

# Create regularization hyperparameter distribution using uniform distribution
C = uniform(loc=0, scale=4)

# Create hyperparameter options
hyperparameters = dict(C=C, penalty=penalty)

# Create randomized search 5-fold cross validation and 100 iterations
clf = RandomizedSearchCV(logistic,
                         hyperparameters,
                         random_state=1,
                         n_iter=100,
                         cv=5,
                         verbose=0,
                         n_jobs=-1)

# Fit randomized search
best_model = clf.fit(X, y)
Ejemplo n.º 44
0
 def testUniformVariance(self):
   a = 10.0
   b = 100.0
   uniform = tfd.Uniform(low=a, high=b)
   s_uniform = sp_stats.uniform(loc=a, scale=b - a)
   self.assertAllClose(self.evaluate(uniform.variance()), s_uniform.var())
    C = 0
    for i in range(m):
        C += weights[i] * obj_functions[i]

    return C


if __name__ == "__main__":
    from scipy.stats import uniform

    n_design_variables = 3
    n_obj_functions = 2
    n_simulations = 100000

    x_sampler = [uniform(-5, 10) for i in range(n_design_variables)]

    column_names = ['id'] \
            + ['x{}'.format(i+1) for i in range(n_design_variables)] \
            + ['f{}'.format(i+1) for i in range(n_obj_functions)]
    data = []
    for i in range(n_simulations):
        x = [u.rvs() for u in x_sampler]
        f1, f2 = kurasawe(x)
        row = [i] + x + [f1, f2]
        data.append([i] + x + [f1, f2])

    df = pd.DataFrame(data, columns=column_names)
    print(df)

    import matplotlib.pyplot as plt
Ejemplo n.º 46
0
 def testUniformStd(self):
   a = 10.0
   b = 100.0
   uniform = tfd.Uniform(low=a, high=b)
   s_uniform = sp_stats.uniform(loc=a, scale=b - a)
   self.assertAllClose(self.evaluate(uniform.stddev()), s_uniform.std())
Ejemplo n.º 47
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
from scipy.stats import norm, uniform
import seaborn as sns

# Config
os.chdir("/home/jovyan/work")
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams["figure.figsize"] = (10, 5)
np.random.seed(42)

# Prepare the data
x = uniform(0, 20).rvs(30)
eps = norm(0, 4).rvs(30)
y = 11 + 3*x + eps
plt.scatter(x, y);

# Sampling
with pm.Model() as model:
    b_0 = pm.Normal("b_0", mu=0, sd=10)
    b_1 = pm.Normal("b_1", mu=0, sd=2)
    e = pm.HalfCauchy("e", 2)
    mu = pm.Deterministic("mu", b_0 + b_1*x)
    Y = pm.Normal("Y", mu=mu, sd=e, observed=y)
    trace = pm.sample(10000, step=pm.Metropolis())

pm.traceplot(trace[2000:]);
plt.savefig("./results/4-11-regression-half-cauchy.png")
Ejemplo n.º 48
0
data_x = np.array([1., 2., 3.])
data_y = np.array([1.4, 1.7, 4.1])
data_yerr = np.array([0.2, 0.15, 0.2])

# Define the loglikelihood function
def loglikelihood(theta):
    y = theta[1] * data_x + theta[0]
    chisq = np.sum(((data_y - y) / data_yerr)**2)
    return -chisq / 2.

if __name__ == '__main__':

    # Set up the list of sampled parameters: the prior is Uniform(-5:5) --
    # we are using a fixed uniform prior from scipy.stats
    parm_names = list(['m', 'b'])
    sampled_parameters = [SampledParameter(name=p, prior=uniform(loc=-5.0,scale=10.0)) for p in parm_names]

    # Set the active point population size
    population_size = 100
    # Setup the Nested Sampling run
    n_params = len(sampled_parameters)
    print("Sampling a total of {} parameters".format(n_params))
    #population_size = 10
    print("Will use NS population size of {}".format(population_size))
    # Construct the Nested Sampler
    MNNS = MultiNestNestedSampling(sampled_parameters,
                                   loglikelihood,
                                   population_size)
    #print(PCNS.likelihood(np.array([1.0])))
    #quit()
    # run it
Ejemplo n.º 49
0
# coding:utf8
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import pickle
from scipy import stats as sts

np.random.seed(0)
plt.switch_backend('agg')
root = Path('./ass2/savedoc/')

sigmas = [0.1, 0.5, 1, 2, 5]
num = int(1e4)
tpdf = sts.t(1).pdf
urv = sts.uniform(0, 1)

datadic = {}
accrates = {}
if not (root / 'p6.pkl').is_file():
    for sigma in sigmas:
        nrv = sts.norm(0, sigma)
        xk = 1
        datalst = []
        us = []
        probs = []
        while len(datalst) < num:
            eps = nrv.rvs()
            u = urv.rvs()
            y = xk + eps
            prob = np.min([tpdf(y) / tpdf(xk), 1])
            if u <= prob:
Ejemplo n.º 50
0
def get_pd(**kwargs):
    """
    Get probability distribution.

    Returns
    -------
    pd : scipy.stats.rv_frozen
        Desired probability distribution.

    Other Parameters
    ----------------
    use_hidden : bool
    pdf : str
    scale : str
    a : float
    b : float
    hidden_for_a : list or tuple
    hidden_for_b : list or tuple
    hidden_pdf : str
    """
    allowed_kwargs = {
        'use_hidden', 'pdf', 'scale', 'a', 'b', 'hidden_for_a', 'hidden_for_b',
        'hidden_pdf'
    }
    for key in allowed_kwargs:
        if key not in kwargs:
            raise ValueError(
                'You did not input enough or correct keyword argument.')
    use_hidden = kwargs['use_hidden']
    pdf = kwargs['pdf']
    scale = kwargs['scale']
    a = kwargs['a']
    b = kwargs['b']
    hidden_for_a = kwargs['hidden_for_a']
    hidden_for_b = kwargs['hidden_for_b']
    hidden_pdf = kwargs['hidden_pdf']

    if use_hidden:
        if pdf == 'uniform':
            pd = RandUniform(hidden_for_a, hidden_for_b, scale, hidden_pdf)
        elif pdf == 'normal':
            if scale == 'linear':
                pd = RandTruncnorm(hidden_for_a, hidden_for_b, 0, np.inf,
                                   scale, hidden_pdf)
            elif scale == 'log10':
                pd = RandTruncnorm(hidden_for_a, hidden_for_b, -np.inf, np.inf,
                                   scale, hidden_pdf)
            else:
                raise ValueError(
                    'You did not input enough or correct keyword argument.')
        else:
            raise ValueError(
                'You did not input enough or correct keyword argument.')
    else:
        if pdf == 'uniform':
            pd = uniform(a, b - a)
        elif pdf == 'normal':
            if scale == 'linear':
                _a = (0 - a) / b
                _b = (np.inf - a) / b
                pd = truncnorm(_a, _b, loc=a, scale=b)
            elif scale == 'log10':
                pd = norm(loc=a, scale=b)
            else:
                raise ValueError(
                    'You did not input enough or correct keyword argument.')
        else:
            raise ValueError(
                'You did not input enough or correct keyword argument.')

    return pd
Ejemplo n.º 51
0
 def new_pd(self):
     self._new_para()
     self.pd = uniform(loc=self.loc, scale=self.scale)
     self.pd.random_state.seed()  # re-seed
Ejemplo n.º 52
0
def make_v_gen():
    """
    Generates prior for gaussian X velocity
    """
    return stats.uniform(loc=-4, scale=12)
Ejemplo n.º 53
0
 def __init__(self, a_range, b_range, scale, hidden_pdf):
     super(RandUniform, self).__init__(a_range, b_range, scale, hidden_pdf)
     self._new_para()
     self.pd = uniform(loc=self.loc, scale=self.scale)
     self.pd.random_state.seed()  # re-seed
Ejemplo n.º 54
0
def find_best_model_parameters(
    X_train: np.array,
    y_train: np.array,
    X_val: np.array,
    y_val: np.array,
    model,
    pca: PCA,
    best_k: int,
    n_jobs: int = 1,
    n_iter: int = 60,
) -> dict:

    est = model()

    split, X_combined, y_combined = get_train_test_split(
        X_train, y_train, X_val, y_val)

    if model == SVR:
        distributions = {
            'model__C': loguniform(1e-1, 1e3),
            'model__gamma': loguniform(1e-4, 1e0),
            'model__kernel': ['poly', 'rbf', 'sigmoid'],
            'model__degree': [1, 2, 3, 4, 5, 6],
            'model__cache_size': [500],
        }

    elif model == RandomForestRegressor:
        distributions = {
            'model__n_estimators': randint(10, 2000),
            'model__max_features': uniform(0.01, 0.99),  # 0.01-1.0
            'model__max_depth': randint(10, 110),
            'model__min_samples_split': randint(2, 10),
            'model__min_samples_leaf': randint(1, 10),
            'model__bootstrap': [True, False],
            'model__n_jobs': [n_jobs],
        }

        # I think its more efficient to parallelize each RF instead of the search as it is possible for a lot of cores
        # to idle when all param sets are done except for one really long running one
        n_jobs = 1

    else:
        print(
            f'HP search for {str(model)} is not implemented. Returning default parameters.'
        )
        return est

    pipe = Pipeline(steps=[
        ('kbest', SelectKBest(score_func=f_regression, k=best_k)),
        ('pca', pca),
        ('model', est),
    ])

    search = RandomizedSearchCV(pipe,
                                distributions,
                                cv=split,
                                n_iter=n_iter,
                                n_jobs=n_jobs,
                                refit=False)

    search.fit(X_combined, y_combined)

    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    #     print(pd.DataFrame(search.cv_results_))

    # Remove 'model__' from keys
    best_params = {k[7:]: v for k, v in search.best_params_.items()}
    # print('best model parameters:', best_params)

    return model(**best_params)
Ejemplo n.º 55
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson, uniform, norm
from scipy.integrate import trapz

# invent a "recurrence time"
T = 150
L = 1000
NPTS = 100
NSAMP = 10000
ones = np.ones(NSAMP)

# set poission recurrence for earthquakes
T_prior = poisson(T)
X_prior = uniform(0, L)

# get the samples
Dt = T_prior.rvs(size=NSAMP)
x = X_prior.rvs(size=NSAMP)
t = np.cumsum(Dt)


def likelihood_data_mu(data, mu):
    return np.prod([poisson.pmf(d, mu) for d in data])


def post_mu(data):
    mu_mean = np.average(data)
    mu_std = np.std(data)
    mu_prior = norm(mu_mean, mu_std)
    mu = np.linspace(mu_prior.ppf(0.01), mu_prior.ppf(0.99), NPTS)
Ejemplo n.º 56
0
            mock_log_model.assert_called_once()

        query = "tags.{} = '{}'".format(MLFLOW_PARENT_RUN_ID, run.info.run_id)
        assert len(mlflow.search_runs([run.info.experiment_id])) == 1
        assert len(mlflow.search_runs([run.info.experiment_id], query)) == 0


@pytest.mark.parametrize(
    "cv_class, search_space",
    [
        (sklearn.model_selection.GridSearchCV, {
            "kernel": ("linear", "rbf"),
            "C": [1, 5, 10]
        }),
        (sklearn.model_selection.RandomizedSearchCV, {
            "C": uniform(loc=0, scale=4)
        }),
    ],
)
@pytest.mark.parametrize("backend", [None, "threading", "loky"])
def test_parameter_search_estimators_produce_expected_outputs(
        cv_class, search_space, backend):
    mlflow.sklearn.autolog()

    svc = sklearn.svm.SVC()
    cv_model = cv_class(svc, search_space, n_jobs=5, return_train_score=True)
    X, y = get_iris()

    def train_cv_model():
        if backend is None:
            cv_model.fit(X, y)
Ejemplo n.º 57
0
#First one must be zero, for the prior.
DataIndices = [0, 1, 2, 100]

#True regression parameters that we wish to recover. Do not set these outside the range of [-1,1]
a0 = -0.3
a1 = 0.5

NPoints = 100  #Number of (x,y) training points
noiseSD = 0.2  #True noise standard deviation
priorPrecision = 2.0  #Fix the prior precision, alpha. We will use a zero-mean isotropic Gaussian.
likelihoodSD = noiseSD  # Assume the likelihood precision, beta, is known.
likelihoodPrecision = 1.0 / (likelihoodSD**2)

#Because of how axises are set up, x and y values should be in the same range as the coefficients.

x = 2 * uniform().rvs(NPoints) - 1
y = a0 + a1 * x + norm(0, noiseSD).rvs(NPoints)


def MeanCovPost(x, y):
    #Given data vectors x and y, this returns the posterior mean and covariance.
    X = np.array([[1, x1] for x1 in x])
    Precision = np.diag(
        [priorPrecision] * 2) + likelihoodPrecision * X.T.dot(X)
    Cov = np.linalg.inv(Precision)
    Mean = likelihoodPrecision * Cov.dot(X.T.dot(y))
    return {'Mean': Mean, 'Cov': Cov}


def GaussPdfMaker(mean, cov):
    #For a given (mean, cov) pair, this returns a vectorized pdf function.
Ejemplo n.º 58
0
f = interpolate.interpld(x, y, kind='quadratic')
x_new = np.arange(0, 10, 0.1)
y_new = f(x_new)

# Generate normal distribution

mu = 2.0
sigma = 0.5
norm_rv = sts.norm(loc=mu, scale=sigma)
x = norm_rv.rvs(size=4)  # [2.42471807,  2.89001427,  1.5406754 ,  2.218372]

# Generate uniform distribution

a = 1
b = 4
uniform_rv = sts.uniform(a, b - a)
x = uniform_rv.rvs(
    size=4)  # [2.90068986,  1.30900927,  2.61667386,  1.82853085]

# Generate Bernoulli distribution

p = 0.7
bernoulli_rv = sts.bernoulli(p)
x = bernoulli_rv.rvs(size=4)  # [1, 1, 1, 0]

# Generate binomial distribution

n = 20
p = 0.7
binom_rv = sts.binom(n, p)
x = binom_rv.rvs(size=4)  # [13, 15, 13, 14]
Ejemplo n.º 59
0
steps = [ ('extract', FBCSP(fs,4,40,4,4,n_components=4)),
          ('select', SelectKBest()),          
          ('classify',SVC())
        ]

pipeline = Pipeline(steps = steps)

param_dist = {'extract__n_components':[4],
              'extract__fs':[fs],
              'extract__f_low':[4],
              'extract__f_high':[40],
              'extract__bandwidth':[4],
              'extract__step':[4],
              'select__score_func':[mutual_info_classif],
              'select__k':randint(1,145),              
              'classify__C':uniform(1e-2,1e2),
              'classify__kernel':['linear']
              }


kappa_corr = lambda target,output : (cohen_kappa_score(target,output)+1)/2

search = RandomizedSearchCV(pipeline, param_distributions=param_dist,
                            scoring=make_scorer(kappa_corr),
                            n_iter=5,n_jobs=5,verbose=10,cv=10)

search.fit(Xdata,labels)
cv_results = search.cv_results_
cv_results = pd.DataFrame.from_dict(cv_results)
cv_results.to_csv(savename)
Ejemplo n.º 60
0
def make_sig_gen():
    """
    Generates prior for gaussian semi-MAJOR axis vel standard deviation
    """
    return stats.uniform(loc=0, scale=8)