Python logitの例、scipy.special.logit Pythonの例

コード例 #1

1

ファイルを表示

ファイル: utils.py プロジェクト: tata-antares/tagging_LHCb

    def predict_proba(self, X):
        probs = X
        calibrated_probs = numpy.zeros(len(probs))
        ind_1, ind_2 = self._compute_inds(len(probs))

        probs_1 = probs[ind_1]
        probs_2 = probs[ind_2]
        
        if self.logistic:
            probs_1 = numpy.clip(probs_1, 0.001, 0.999)
            probs_2 = numpy.clip(probs_2, 0.001, 0.999)
            probs_1 = probs_1[:, numpy.newaxis]
            probs_2 = probs_2[:, numpy.newaxis]
            calibrated_probs[ind_1] = self.calibrators[1].predict_proba(logit(probs_1))[:, 1]
            calibrated_probs[ind_2] = self.calibrators[0].predict_proba(logit(probs_2))[:, 1]
        else:
            calibrated_probs[ind_1] = self.calibrators[1].transform(probs_1)
            calibrated_probs[ind_2] = self.calibrators[0].transform(probs_2)
            numpy.random.seed(self.random_state)
            calibrated_probs = calibrated_probs + numpy.random.normal(size=len(calibrated_probs)) * 0.001
        return calibrated_probs

コード例 #2

0

ファイルを表示

ファイル: tests.py プロジェクト: jpceia/maxlike

    def test_logistic_cross(self):
        mle = maxlike.Logistic()
        mle.model = Sum(2)
        mle.model.add(X(), 0, 0)
        mle.model.add(-X(), 0, 1)
        mle.model.add(-Scalar(), 1, [])
        mle.add_constraint([0], Linear([1]))

        # fetch and prepare data
        df = pd.read_csv(data_folder + "data_proba.csv", index_col=[0, 1])
        df['w'] = df['-1'] + df['1']
        kwargs, _ = prepare_dataframe(df, 'w', '1', {'X': np.sum})
        N = kwargs['N']
        S = kwargs['X']
        u = -logit(S.sum(0) / N.sum(0))
        v = logit(S.sum(1) / N.sum(1))
        a = (u + v) / 2
        h = ((u - v) / 2).mean()

        mle.add_param(a)
        mle.add_param(h)

        tol = 1e-8
        mle.fit(**kwargs, verbose=self.verbose)
        
        a, h = mle.params
        s_a, s_h = mle.std_error()

        df = pd.read_csv(data_folder + "test_logistic_cross.csv")

        self.assertAlmostEqual(h,   0.3059389232047434, delta=tol)
        self.assertAlmostEqual(s_h, 0.1053509333552778, delta=tol)
        np.testing.assert_allclose(a, df['a'], atol=tol)
        np.testing.assert_allclose(s_a, df['s_a'], atol=tol)

コード例 #3

0

ファイルを表示

ファイル: phyl-log-likelihood-test.py プロジェクト: argriffing/phyly

def main():
    get_s = block_get_s
    #get_s = mixture_get_s

    objective = partial(generic_objective, get_s)
    neg_ll = partial(generic_neg_ll, get_s)

    edge_rates = [1, 30, 1, 30, 30]
    kappa = 0.2
    theta = 0.5
    alpha = 2.0
    #edge_rates = [1, 2, 3, 1, 10]
    #alpha = 1.0
    #kappa = 3.0
    #theta = 0.5
    X0 = np.array(edge_rates + [kappa, logit(theta), alpha], dtype=float)
    print('%.20g' % neg_ll(X0))

    desired_ll = 85.030942031997312824
    #edge_rates = [1, 2, 3, 1, 10]
    #kappa = 3
    X0 = np.array(edge_rates + [kappa, logit(theta), alpha], dtype=float)
    a = 1e-6
    bounds = [(a, None) for i in X0[:5]] + [(a, None), (a, 1), (a, None)]
    result = optimize.minimize(
            objective, X0, method='L-BFGS-B', jac=True, bounds=bounds)
    #result = optimize.minimize(
            #neg_ll, X0, method='L-BFGS-B', bounds=bounds)
    print(result)

コード例 #4

0

ファイルを表示

ファイル: macroeco_distributions.py プロジェクト: davharris/macroecotools

def logser_solver(ab):
    """Given abundance data, solve for MLE of logseries parameter p."""
    ab = check_for_support(ab, lower=1)
    BOUNDS = [0, 1]
    DIST_FROM_BOUND = 10 ** -15
    y = lambda x: 1 / log(1 / (1 - expit(x))) * expit(x) / (1 - expit(x)) - sum(ab) / len(ab)
    x = bisect(y, logit(BOUNDS[0] + DIST_FROM_BOUND), logit(BOUNDS[1] - DIST_FROM_BOUND), xtol=1.490116e-08)
    return expit(x)

コード例 #5

0

ファイルを表示

ファイル: utils.py プロジェクト: anuwish/tagging_LHCb

def calibrate_probs(labels, weights, probs, logistic=False, random_state=11, threshold=0., return_calibrator=False, symmetrize=False):
    """
    Calibrate output to probabilities using 2-folding to calibrate all data
    
    :param probs: probabilities, numpy.array of shape [n_samples]
    :param labels: numpy.array of shape [n_samples] with labels 
    :param weights: numpy.array of shape [n_samples]
    :param threshold: float, to set labels 0/1 
    :param logistic: bool, use logistic or isotonic regression
    :param symmetrize: bool, do symmetric calibration, ex. for B+, B-
    
    :return: calibrated probabilities
    """
    labels = (labels > threshold) * 1
    ind = numpy.arange(len(probs))
    ind_1, ind_2 = train_test_split(ind, random_state=random_state, train_size=0.5)
    
    calibrator = LogisticRegression(C=100) if logistic else IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip')
    est_calib_1, est_calib_2 = clone(calibrator), clone(calibrator)
    probs_1 = probs[ind_1]
    probs_2 = probs[ind_2]
    
    if logistic:
        probs_1 = numpy.clip(probs_1, 0.001, 0.999)
        probs_2 = numpy.clip(probs_2, 0.001, 0.999)
        probs_1 = logit(probs_1)[:, numpy.newaxis]
        probs_2 = logit(probs_2)[:, numpy.newaxis]
        if symmetrize:
            est_calib_1.fit(numpy.r_[probs_1, 1-probs_1], 
                            numpy.r_[labels[ind_1] > 0, labels[ind_1] <= 0])
            est_calib_2.fit(numpy.r_[probs_2, 1-probs_2], 
                            numpy.r_[labels[ind_2] > 0, labels[ind_2] <= 0])
        else:
            est_calib_1.fit(probs_1, labels[ind_1])
            est_calib_2.fit(probs_2, labels[ind_2])
    else:
        if symmetrize:
            est_calib_1.fit(numpy.r_[probs_1, 1-probs_1], 
                            numpy.r_[labels[ind_1] > 0, labels[ind_1] <= 0],
                            numpy.r_[weights[ind_1], weights[ind_1]])
            est_calib_2.fit(numpy.r_[probs_2, 1-probs_2], 
                            numpy.r_[labels[ind_2] > 0, labels[ind_2] <= 0],
                            numpy.r_[weights[ind_2], weights[ind_2]])
        else:
            est_calib_1.fit(probs_1, labels[ind_1], weights[ind_1])
            est_calib_2.fit(probs_2, labels[ind_2], weights[ind_2])
        
    calibrated_probs = numpy.zeros(len(probs))
    if logistic:
        calibrated_probs[ind_1] = est_calib_2.predict_proba(probs_1)[:, 1]
        calibrated_probs[ind_2] = est_calib_1.predict_proba(probs_2)[:, 1]
    else:
        calibrated_probs[ind_1] = est_calib_2.transform(probs_1)
        calibrated_probs[ind_2] = est_calib_1.transform(probs_2)
    if return_calibrator:
        return calibrated_probs, (est_calib_1, est_calib_2)
    else:
        return calibrated_probs

コード例 #6

0

ファイルを表示

ファイル: plot_entropy.py プロジェクト: zahlenteufel/thesis_code

def plot(x, y, logit_scale=False, output_file=None):
    if logit_scale:
        x = logit(x)
        y = logit(y)
    plt.scatter(x, y, alpha=0.05)
    if output_file:
        plt.savefig(output_file)
    else:
        plt.show()
        plt.close()

コード例 #7

0

ファイルを表示

ファイル: test_update_params_linear_regression.py プロジェクト: noashin/kinetic_ising_model_neurons

def test_update_p_3():
    from scipy.special import logit
    from update_params_linear_regression import update_p_3

    N = 4
    p0 = 0.3

    res = update_p_3(p0, N)
    expected_res = np.array([logit(p0), logit(p0), logit(p0), logit(p0)])

    np.testing.assert_array_equal(res, expected_res)

コード例 #8

0

ファイルを表示

ファイル: subj_speed_k.py プロジェクト: carolfs/mpl_m0exp

def main():
    "Calculates the mean k and mean responses for each participant."
    with open(KSPEED_CURVES_FN, 'rb') as inpf:
        mpl_kcurves = [pickle.load(inpf) for k in range(KMAXCURVE + 1)]
    # Determine interval with maximum difference in mean response
    ini, end = None, None
    dif = 0
    for i in range(NTRIALS - 100):
        j = i + 100
        this_dif = 0
        for trial in range(i, j):
            this_dif += mpl_kcurves[0][trial] - mpl_kcurves[2][trial]
        this_dif /= 100
        if this_dif > dif:
            dif = this_dif
            ini, end = i, j
    print(dif, ini, end)
    if not os.path.exists('mean_k.pickle'):
        samples = get_samples()
        samples = samples.sample(10000)
        mean_k = [get_subject_meank(samples, i) for i in range(N)]
        with open('mean_k.pickle', 'wb') as outf:
            pickle.dump((ini, end), outf)
            pickle.dump(mean_k, outf)
    else:
        with open('mean_k.pickle', 'rb') as inpf:
            ini, end = pickle.load(inpf)
            mean_k = pickle.load(inpf)
    # ini, end = 200, 300
    mean_resp = [np.mean(y[ini:end]) for x, y in bdata]
    mod = sm.OLS(logit(mean_resp), [(1, k) for k in mean_k])
    res = mod.fit()
    print(res.summary())

コード例 #9

0

ファイルを表示

ファイル: modelutil.py プロジェクト: argriffing/jsonctmctree

def _pack_acgt(pi):
    a, c, g, t = pi
    ag = a+g  # purines
    ct = c+t  # pyrimidines
    a_div_ag = a / ag
    c_div_ct = c / ct
    return logit([ag, a_div_ag, c_div_ct])

コード例 #10

0

ファイルを表示

ファイル: mst.py プロジェクト: padenis/attelo

def _graph(instances, use_prob=True):
    """ Builds a directed graph for instances

        instances are quadruplets of the form:
            edu_source, edu_target, probability_of_attachment, relation

        returns a Digraph
    """

    root_id = _get_root(set(e for s, t, _, _ in instances for e in (s, t))).id

    targets = defaultdict(list)
    labels = dict()
    scores = dict()

    for source, target, prob, rel in instances:
        src, tgt = source.id, target.id

        # Ignore all edges directed to the root
        if tgt == root_id:
            continue

        scores[src, tgt] = _cap_score(logit(prob)) if use_prob else prob
        labels[src, tgt] = rel
        targets[src].append(tgt)

    return Digraph(targets,
                   lambda s, t: scores[s, t],
                   lambda s, t: labels[s, t])

コード例 #11

0

ファイルを表示

ファイル: process_tree.py プロジェクト: argriffing/repro-galtier-2004

def pack_params(nt_distn, kappa, alpha, v):
    # does not include edge rates
    a, c, g, t = nt_distn
    return np.concatenate([
            #logit([a+g, a/(a+g), c/(c+t)]),
            logit([c+g]),
            np.log([kappa, alpha, v])])

コード例 #12

0

ファイルを表示

ファイル: distribution_util_test.py プロジェクト: KalraA/tensorflow

  def testGetLogitsAndProbProbabilityMultidimensional(self):
    p = np.array([[0.3, 0.4, 0.3], [0.1, 0.5, 0.4]], dtype=np.float32)

    with self.test_session():
      new_logits, new_p = distribution_util.get_logits_and_prob(
          p=p, multidimensional=True, validate_args=True)

      self.assertAllClose(special.logit(p), new_logits.eval())
      self.assertAllClose(p, new_p.eval())

コード例 #13

0

ファイルを表示

ファイル: network.py プロジェクト: oliviaguest/connectionism

    def inverse_activation_function(self, x):
      if x == 1:
        x = 0.999999
      elif x == 0:
        x = 0.000001 
        
      #print 'logit', x, logit(x)

      return logit(x)

コード例 #14

0

ファイルを表示

ファイル: test_logit.py プロジェクト: 87/scipy

    def test_nan(self):
        expected = np.array([np.nan]*4)
        olderr = np.seterr(invalid='ignore')
        try:
            actual = logit(np.array([-3., -2., 2., 3.]))
        finally:
            np.seterr(**olderr)

        assert_equal(expected, actual)

コード例 #15

0

ファイルを表示

ファイル: distribution_util_test.py プロジェクト: AlbertXiebnu/tensorflow

  def testGetLogitsAndProbsProbability(self):
    p = np.array([0.01, 0.2, 0.5, 0.7, .99], dtype=np.float32)

    with self.test_session():
      new_logits, new_p = distribution_util.get_logits_and_probs(
          probs=p, validate_args=True)

      self.assertAllClose(special.logit(p), new_logits.eval())
      self.assertAllClose(p, new_p.eval())

コード例 #16

0

ファイルを表示

ファイル: utils.py プロジェクト: ihincks/python-qinfer

def compactspace(scale, n):
    r"""
    Returns points :math:`x` spaced in the open interval
    :math:`(-\infty, \infty)`  by linearly spacing in the compactified
    coordinate :math:`s(x) = e^{-\alpha x} / (1 + e^{-\alpha x})^2`,
    where :math:`\alpha` is a scale factor.
    """
    logit = logistic(scale=scale).ppf
    compact_xs = np.linspace(0, 1, n + 2)[1:-1]
    return logit(compact_xs)

コード例 #17

0

ファイルを表示

ファイル: correlation_curves.py プロジェクト: zahlenteufel/thesis_code

def correlation_curve_ngrams(texts, ngram_orders):
    corrs = []
    targets = texts.target_words()
    cloze_probs = HumanPredictor().batch_predict(texts)

    for order in ngram_orders:
        x = []
        y = []

        ngram_probs = NgramPredictor(order).batch_predict(texts)

        for target, cloze_prob, ngram_prob in zip(targets, cloze_probs, ngram_probs):
            x.append(cloze_prob)
            y.append(ngram_prob)

        lx = logit(x)
        ly = logit(y)

        corrs.append(pearsonr(lx, ly)[0])
    return corrs

コード例 #18

0

ファイルを表示

ファイル: sequentialParts_layer.py プロジェクト: EdwardBetts/partsNet

 def codePatches(self,patches,currentParts):
     flatpatches = patches.reshape((patches.shape[0],-1))
     print(flatpatches.shape)
     part_logits = np.rollaxis(logit(currentParts).astype(np.float64),0,4)
     part_logits = part_logits.reshape(part_logits.shape[0] * part_logits.shape[1] * part_logits.shape[2], -1)
     print(part_logits.shape)
     constant_terms = np.apply_over_axes(np.sum, np.log(1-currentParts).astype(np.float64),[1,2,3]).ravel()
     print(constant_terms.shape)
     codeParts = np.dot(flatpatches,part_logits)
     codeParts = codeParts + constant_terms
     print(codeParts.shape)
     return np.argmax(codeParts, axis = 1)

コード例 #19

0

ファイルを表示

ファイル: test_logit.py プロジェクト: 1641731459/scipy

    def check_logit_out(self, dtype, expected):
        a = np.linspace(0,1,10)
        a = np.array(a, dtype=dtype)
        olderr = np.seterr(divide='ignore')
        try:
            actual = logit(a)
        finally:
            np.seterr(**olderr)

        assert_almost_equal(actual, expected)

        assert_equal(actual.dtype, np.dtype(dtype))

コード例 #20

0

ファイルを表示

ファイル: correlation_curves.py プロジェクト: zahlenteufel/thesis_code

def correlation_curve_cache(texts, ngram_order, cache_lambdas):
    corrs = []
    targets = texts.target_words()
    cloze_probs = HumanPredictor().batch_predict(texts)
    ngram_probs = NgramPredictor(ngram_order).batch_predict(texts)

    for cache_lambda in cache_lambdas:
        x = []
        y = []

        cache_probs = UnigramCachePredictor().batch_predict(texts)

        for target, cloze_prob, ngram_prob, cache_prob in zip(targets, cloze_probs, ngram_probs, cache_probs):
            x.append(cloze_prob)
            y.append(cache_lambda * cache_prob + (1 - cache_lambda) * ngram_prob)

        lx = logit(x)
        ly = logit(y)

        corrs.append(pearsonr(lx, ly)[0])
    return corrs

コード例 #21

0

ファイルを表示

ファイル: compare.py プロジェクト: ajmaurer/Chicago-Course-Work

def bern_y(X,p1,base_prob=.25,beta_sd=1):
    n,p = X.shape
    X_1    = X[:,:p1]
    v = 0 
    while v<1E-5:
        beta   = npran.randn(p1)*beta_sd
        if p1>0:
            eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
            y      = npran.binomial(1,invlogit(eta),n)
        else:
            y      = npran.binomial(1,base_prob,n)
        v = np.min(nplin.svd(np.hstack((X,y[:,np.newaxis])))[1])
    return y

コード例 #22

0

ファイルを表示

ファイル: utils.py プロジェクト: ihincks/python-qinfer

def from_simplex(x):
    r"""
    Inteprets the last index of x as unit simplices and returns a
    real array of the sampe shape in logit space.

    Inverse to :func:`to_simplex` ; see that function for more details.

    :param np.ndarray: Array of unit simplices along the last index.
    
    :rtype: ``np.ndarray``
    """
    n = x.shape[-1]
    # z are the stick breaking fractions in [0,1]
    # the last one is always 1, so don't worry about it
    z = np.empty(shape=x.shape)
    z[..., 0] = x[..., 0]
    z[..., 1:-1] = x[..., 1:-1] / (1 - x[..., :-2].cumsum(axis=-1))

    # now z are the logit-transformed breaking fractions
    z[..., :-1] = logit(z[..., :-1]) - logit(1 / (n - np.arange(n-1, dtype=np.float)))
    # set this to 0 manually to avoid subtracting inf-inf
    z[..., -1] = 0
    return z

コード例 #23

0

ファイルを表示

ファイル: simulate.py プロジェクト: ajmaurer/Chicago-Course-Work

def genXy_bern_X_norm_beta(seed,n,p1,pnull,x_prob=.25,base_prob=.25,beta_sd=1):
    """ The X are normal. p1 predictive vars, pnull null vars. beta on the p1 vars is ~normal(0,beta_sd) and the intercept is logit(base_prob)"""
    if not seed == None:
        npran.seed(seed)
    X_1    = npran.binomial(1,x_prob,(n,p1))
    X_null = npran.binomial(1,x_prob,(n,pnull))
    X      = np.concatenate((X_1,X_null),axis=1)
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y

コード例 #24

0

ファイルを表示

ファイル: sequentialParts_layer.py プロジェクト: EdwardBetts/partsNet

    def extract(self,X):
        assert self._parts is not None, "Must be trained before calling extract"
        th = self._settings['threshold']
        part_logits = np.rollaxis(logit(self._parts).astype(np.float64),0,4)
        constant_terms = np.apply_over_axes(np.sum, np.log(1-self._parts).astype(np.float64), [1, 2, 3]).ravel()

        from pnet.cyfuncs import code_index_map_multi

        feature_map = code_index_map_multi(X, part_logits, constant_terms, th,
                                           outer_frame=self._settings['outer_frame'], 
                                           min_llh=self._settings.get('min_llh', -np.inf),
                                           n_coded=self._settings.get('n_coded', 1))


        return (feature_map, self._num_parts)

コード例 #25

0

ファイルを表示

ファイル: test_logit.py プロジェクト: 87/scipy

    def check_logit_out(self, dtype, expected):
        a = np.linspace(0,1,10)
        a = np.array(a, dtype=dtype)
        olderr = np.seterr(divide='ignore')
        try:
            actual = logit(a)
        finally:
            np.seterr(**olderr)

        if np.__version__ >= '1.6':
            assert_almost_equal(actual, expected)
        else:
            assert_almost_equal(actual[1:-1], expected[1:-1])

        assert_equal(actual.dtype, np.dtype(dtype))

コード例 #26

0

ファイルを表示

ファイル: mixture_classification_layer.py プロジェクト: amitgroup/parts-net

    def _extract(self, phi, data):
        X = phi(data)
        XX = X[:, np.newaxis, np.newaxis]
        theta = self._models[np.newaxis]

        S = self._settings.get('standardize')
        if S:
            llh = XX * logit(theta)
            bb = np.apply_over_axes(np.sum, llh, [-3, -2, -1])[..., 0, 0, 0]
            bb = (bb - self._means) / self._sigmas
            yhat = np.argmax(bb.max(-1), axis=1)
        else:
            llh = XX * np.log(theta) + (1 - XX) * np.log(1 - theta)
            bb = np.apply_over_axes(np.sum, llh, [-3, -2, -1])[..., 0, 0, 0]
            yhat = np.argmax(bb.max(-1), axis=1)
        return yhat

コード例 #27

0

ファイルを表示

ファイル: simulate.py プロジェクト: ajmaurer/Chicago-Course-Work

def genXy_binary_X_norm_beta(seed,n,p1,pnull,base_prob=.25,beta_sd=1,A_base_diag=-1,A_sd=.2):
    ''' X is binary from the isling model, with the coefficients drawn from a normal. Y is binary, with beta's coefficients also from a normal '''
    if not seed == None:
        npran.seed(seed)
    p = p1 + pnull
    A = npran.normal(0,.2,(p,p))-np.diag(A_base_diag*np.ones(p))
    X = draw_random_binary(n,A)
    X_1    = X[:,:p1]
    X_null = X[:,p1:]
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y

コード例 #28

0

ファイルを表示

ファイル: utils.py プロジェクト: sra4077/Horizon

 def preprocess_feature(cls, feature, parameters):
     is_not_empty = 1 - np.isclose(feature, MISSING_VALUE)
     if parameters.feature_type == identify_types.BINARY:
         # Binary features are always 1 unless they are 0
         return ((feature != 0) * is_not_empty).astype(np.float32)
     if parameters.boxcox_lambda is not None:
         feature = stats.boxcox(
             np.maximum(feature + parameters.boxcox_shift, BOX_COX_MARGIN),
             parameters.boxcox_lambda,
         )
     # No *= to ensure consistent out-of-place operation.
     if parameters.feature_type == identify_types.PROBABILITY:
         feature = np.clip(feature, 0.01, 0.99)
         feature = special.logit(feature)
     elif parameters.feature_type == identify_types.QUANTILE:
         transformed_feature = np.zeros_like(feature)
         for i in six.moves.range(feature.shape[0]):
             transformed_feature[i] = cls.value_to_quantile(
                 feature[i], parameters.quantiles
             )
         feature = transformed_feature
     elif parameters.feature_type == identify_types.ENUM:
         possible_values = parameters.possible_values
         mapping = {}
         for i, possible_value in enumerate(possible_values):
             mapping[possible_value] = i
         output_feature = np.zeros((len(feature), len(possible_values)))
         for i, val in enumerate(feature):
             if abs(val - MISSING_VALUE) < 1e-2:
                 # This check is required by the PT preprocessing but not C2
                 continue
             output_feature[i][mapping[val]] = 1.0
         return output_feature
     elif parameters.feature_type == identify_types.CONTINUOUS_ACTION:
         min_value = parameters.min_value
         max_value = parameters.max_value
         feature = (
             (feature - min_value) * ((1 - 1e-6) * 2 / (max_value - min_value))
             - 1
             + 1e-6
         )
     else:
         feature = feature - parameters.mean
         feature /= parameters.stddev
         feature = np.clip(feature, MIN_FEATURE_VALUE, MAX_FEATURE_VALUE)
     feature *= is_not_empty
     return feature

コード例 #29

0

ファイルを表示

ファイル: simulate.py プロジェクト: ajmaurer/Chicago-Course-Work

def genXy_given_X_norm_beta(seed,data,n,p1,pnull,base_prob=.25,beta_sd=1):
    ''' X is binary from the isling model, with the coefficients drawn from a normal. Y is binary, with beta's coefficients also from a normal '''
    if not seed == None:
        npran.seed(seed)
    p = p1 + pnull
    h,w = data.shape
    rows = npran.choice(h,n)
    X      = data[rows,:][:,npran.choice(w,p)]
    X_1    = X[:,:p1]
    X_null = X[:,p1:]
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y

コード例 #30

0

ファイルを表示

ファイル: macroeco_distributions.py プロジェクト: davharris/macroecotools

def nbinom_lower_trunc_solver(ab):
    """Given abundance data, solve for MLE of negative binomial (lower-truncated at 1) parameters n and p"""
    ab = check_for_support(ab, lower=1)
    mu = np.mean(ab)
    var = np.var(ab, ddof=1)
    p0 = 1 - mu / var
    if p0 < 0:
        p0 = 10 ** -5
    elif p0 > 1:
        p0 = 1 - 10 ** -5
    logit_p0 = logit(p0)
    log_n0 = log(mu * (1 - p0) / p0)

    def negbin_func(x):
        return -nbinom_lower_trunc_ll(ab, exp(x[0]), expit(x[1]))

    log_n, logit_p = optimize.fmin(negbin_func, x0=[log_n0, logit_p0])
    return exp(log_n), expit(logit_p)

コード例 #31

0

ファイルを表示

ファイル: model_selection_new.py プロジェクト: scrna-seq/TASC-B

def model3(gene_name, abkt, y_g, num_random_restarts, minrr):
    '''
    optimization with 1 pg, 2 theta
    :param abkt:
    :param y_g:
    :param num_random_restarts:
    :param minrr:
    :return: min object with lowest negative log-likelihood
    '''
    theta_lower0, theta_upper0, p_lower0, p_upper0, std_lower0, std_upper0, theta_lower1, theta_upper1, p_lower1, p_upper1, std_lower1, std_upper1 = get_rr_range_grp(
        y_g, group_info)
    real_params_g_rtimes = column_stack(
        (uniform(theta_lower0, theta_upper0, num_random_restarts),
         uniform(theta_lower1, theta_upper1, num_random_restarts),
         log(
             uniform(min(std_lower0, std_lower1), max(std_upper0, std_upper1),
                     num_random_restarts)),
         logit(
             uniform(min(p_lower0, p_lower1), max(p_upper0, p_upper1),
                     num_random_restarts))))
    arg_min_x = []
    val_min_x = []
    for i in range(num_random_restarts):
        log_fh.log('tasc free theta optimization #' + str(i) + ' for gene ' +
                   gene_name)
        real_params_g = real_params_g_rtimes[i, :]
        optim_result_obj = minimize(
            likelihood.neg_log_sum_marginal_likelihood_free_theta,
            x0=real_params_g,
            args=(abkt, y_g, group_info),
            method='L-BFGS-B')
        if optim_result_obj.success and (not np.isnan(
                optim_result_obj.fun)) and (optim_result_obj.fun != 0):
            arg_min_x.append(optim_result_obj)
            val_min_x.append(optim_result_obj.fun)
        if len(arg_min_x) >= minrr:
            break

    if len(arg_min_x) == 0:
        return None
    else:
        return arg_min_x[np.argmin(val_min_x)]

コード例 #32

0

ファイルを表示

    def sampling(self,
                 samples,
                 sigmoids,
                 epsilon=1e-8,
                 shift_percent=95.0,
                 rank=None):

        sigmoids = np.clip(sigmoids.astype(np.float), 1e-14, 1 - 1e-14)

        # Update upper bound
        D_tilde = logit(sigmoids)
        self.D_tilde_M = np.maximum(self.D_tilde_M, np.amax(D_tilde))

        # Compute probability
        D_delta = D_tilde - self.D_tilde_M
        F = D_delta - np.log(1 - np.exp(D_delta - epsilon))
        if shift_percent is not None:
            gamma = np.percentile(F, shift_percent)
            # print("gamma", gamma)
            F = F - gamma
        P = np.squeeze(logistic(F))

        # Filter out samples
        # accept = np.random.rand(len(D_delta)) < P
        # good_samples = samples[accept]
        # print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(len(D_delta), np.sum(accept), np.sum(accept)/len(D_delta) ))

        if rank is not None:
            order = np.argsort(P)[::-1]
            accept = order[:int(rank * len(D_delta))]
            good_samples = samples[accept, :]
            print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(
                len(D_delta), np.size(accept, 0),
                np.size(accept, 0) / len(D_delta)))
        else:
            accept = np.random.rand(len(D_delta)) < P
            good_samples = samples[accept]
            print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(
                len(D_delta), np.sum(accept),
                np.sum(accept) / len(D_delta)))

        return good_samples

コード例 #33

0

ファイルを表示

def pbo_core_calc(Cs, Ms, Ms_values, Ms_index, metric_func, verbose=False):
    # make sure chucks are concatenated in their original order
    order = [x for x, _ in Cs]
    sort_ind = np.argsort(order)

    Cs_values = np.array([v for _, v in Cs])
    if verbose:
        print("Cs index = {}, ".format(order), end="")
    J_x = np.concatenate(Cs_values[sort_ind, :])

    # find Cs_bar
    Cs_bar_index = list(sorted(Ms_index - set(order)))
    if verbose:
        print("Cs_bar_index = {}".format(Cs_bar_index))
    J_bar_x = np.concatenate(Ms_values[Cs_bar_index, :])

    R_x = metric_func(J_x)
    R_bar_x = metric_func(J_bar_x)

    R_rank_x = ss.rankdata(R_x)
    R_bar_rank_x = ss.rankdata(R_bar_x)

    rn_x = np.argmax(R_rank_x)
    rn_bar_x = R_bar_rank_x[rn_x]

    w_bar_x = float(rn_bar_x) / len(R_bar_rank_x)
    logit_x = spec.logit(w_bar_x)

    core = PBOCore(
        J_x,
        J_bar_x,
        R_x,
        R_bar_x,
        R_rank_x,
        R_bar_rank_x,
        rn_x,
        rn_bar_x,
        w_bar_x,
        logit_x,
    )

    return core

コード例 #34

0

ファイルを表示

def get_example(load_example, eval_tracker, model, get_offsets):
    """Generates individual training examples.

  Args:
    load_example: callable returning a tuple of image and label ndarrays
                  as well as the seed coordinate and volume name of the example
    eval_tracker: EvalTracker object
    model: FFNModel object
    get_offsets: iterable of (x, y, z) offsets to investigate within the
        training patch

  Yields:
    tuple of:
      seed array, shape [1, z, y, x, 1]
      image array, shape [1, z, y, x, 1]
      label array, shape [1, z, y, x, 1]
  """
    seed_shape = train_canvas_size(model).tolist()[::-1]

    while True:
        full_patches, full_labels, loss_weights, coord, volname = load_example(
        )
        # Always start with a clean seed.
        seed = logit(mask.make_seed(seed_shape, 1, pad=FLAGS.seed_pad))

        for off in get_offsets(model, seed):
            predicted = mask.crop_and_pad(seed, off,
                                          model.input_seed_size[::-1])
            patches = mask.crop_and_pad(full_patches, off,
                                        model.input_image_size[::-1])
            labels = mask.crop_and_pad(full_labels, off,
                                       model.pred_mask_size[::-1])
            weights = mask.crop_and_pad(loss_weights, off,
                                        model.pred_mask_size[::-1])

            # Necessary, since the caller is going to update the array and these
            # changes need to be visible in the following iterations.
            assert predicted.base is seed
            yield predicted, patches, labels, weights

        eval_tracker.add_patch(full_labels, seed, loss_weights, coord, volname,
                               full_patches)

コード例 #35

0

ファイルを表示

ファイル: simulations.py プロジェクト: guyling/ContextDependentEvolution

 def addContextToChromosomeTable(self, chromosome, chromMatrix,
                                 motifMatrix):
     chromMatrix = sc.logit(chromMatrix)
     #print chromMatrix
     gamma, realBeta = self.gamma, self.beta
     n = self.n
     for char in range(n):
         relevantIndex = self.motifIndexByChar(char)
         charMotifMatrix = np.delete(motifMatrix, np.s_[relevantIndex], 1)
         betaBychar = np.delete(realBeta, np.s_[relevantIndex])
         gammaByChar = np.delete(gamma, np.s_[relevantIndex])
         coeffByChar = gammaByChar * betaBychar
         additionOfcontextBychar = np.dot(coeffByChar, charMotifMatrix.T)
         chromMatrix[:, char] += additionOfcontextBychar
     chromMatrix = sc.expit(chromMatrix)
     chromMatrix = preprocessing.normalize(
         chromMatrix, norm='l1',
         axis=1)  #Normlizing the matrix back to being a stoch. matrix
     #print np.sum(chromMatrix,axis=1)
     return chromMatrix

コード例 #36

0

ファイルを表示

def mean_coh_logit(coh, weights=None, axis=None):

    # logit transform of R, ensuring to nan out any infinities
    z = logit(np.sqrt(coh))
    z[np.isinf(z)] = np.nan

    if axis is None:
        z = np.nanmean(z)
    else:
        # this is needed since nanmean doesn't accept a tuple as the axis argument, so we need to loop over each axis
        if not isinstance(axis, collections.Iterable):
            axis = (axis, )

        # perform the mean over each desired axis
        zm = np.ma.array(z, mask=np.isnan(z))
        zm = np.ma.average(zm, axis=axis, weights=weights)
        z = zm.filled()

    # inverse logit transform, returning to R^2
    return expit(z)**2

コード例 #37

0

ファイルを表示

def predict(st, norm, bounds):

    rew = np.log(1 + (st[:, -1:]))

    a_x = bounds[0]
    b_x = bounds[2]

    eps = 1e-5

    rew = np.clip(rew, a_x + eps, b_x - eps)

    rew = logit((rew - a_x) / (b_x - a_x))

    st[:, -1:] = rew

    State = np.zeros((1, 61))
    State[0, :] = np.hstack((st[0, 0], st[:, [1, 2, 3, -1]].ravel()))

    X = (State - norm[0]) / norm[1]
    return np.round(policy_network(X)[0, :], 4)

コード例 #38

0

ファイルを表示

    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        self.inodes = inputnodes  # количество узлов входного слоя
        self.hnodes = hiddennodes  # скрытого слоя
        self.onodes = outputnodes  # выходгого слоя
        self.lr = learningrate  # коэффициент обучения, он же - шаг градиентного спуска

        # матрица весов связей входного слоя со скрытым
        self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5),
                                       (self.hnodes, self.inodes))

        # матрица весов связей скрытого слоя с выходным
        self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5),
                                       (self.onodes, self.hnodes))

        # фунуция активации (сигмоида)
        self.activation_function = lambda x: sigmoid(x)

        # обратная функция активации для обратного прохода
        self.inverse_activation_function = lambda x: logit(x)
        pass

コード例 #39

0

ファイルを表示

ファイル: test30_many_click_val.py プロジェクト: vivi489/gp_ucb_python_lib

    def run_model(self, model_number, x, calc_gt=False, n_exp=1):
        mean1 = [3, 3, 3]
        cov1 = np.eye(3) * 0.75
        mean2 = [-2, -2, -2]
        cov2 = np.eye(3) * 0.75
        mean3 = [1, 1, 1]
        cov3 = np.eye(3) * 1.0

        prob = multivariate_normal.pdf(x, mean=mean1, cov=cov1) + multivariate_normal.pdf(x, mean=mean2, cov=cov2) \
               + multivariate_normal.pdf(x, mean=mean3, cov=cov3)
        prob *= 3.0

        if calc_gt:
            return logit(prob)

        if n_exp > 1:
            return np.random.binomial(n=n_exp, p=prob)

        clicked = int(flip(prob))
        return clicked

コード例 #40

0

ファイルを表示

def get_policy_fn(request, ffn_model):
  """Returns a policy class based on the InferenceRequest proto."""

  if request.movement_policy_name:
    movement_policy_class = globals().get(request.movement_policy_name, None)
    if movement_policy_class is None:
      movement_policy_class = import_symbol(request.movement_policy_name)
  else:  # Default / fallback.
    movement_policy_class = FaceMaxMovementPolicy

  if request.movement_policy_args:
    kwargs = json.loads(request.movement_policy_args)
  else:
    kwargs = {}
  if 'deltas' not in kwargs:
    kwargs['deltas'] = ffn_model.deltas[::-1]
  if 'score_threshold' not in kwargs:
    kwargs['score_threshold'] = logit(request.inference_options.move_threshold)

  return lambda canvas: movement_policy_class(canvas, **kwargs)

コード例 #41

0

ファイルを表示

ファイル: test_sigmoid_cross_entropy_with_logits_op.py プロジェクト: sandyhouse/Paddle

    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
        self.python_api = test_fluid_sigmoid
        batch_size = 64
        num_classes = 20
        self.inputs = {
            'X': logit(
                np.random.uniform(0, 1, (batch_size, num_classes))
                .astype("float64")),
            'Label': np.random.randint(0, 2, (batch_size, num_classes))
            .astype("float64")
        }

        # Fw Pass is implemented as elementwise sigmoid followed by
        # elementwise logistic loss
        # Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X))
        sigmoid_X = expit(self.inputs['X'])
        term1 = self.inputs['Label'] * np.log(sigmoid_X)
        term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X)
        self.outputs = {'Out': -term1 - term2}

コード例 #42

0

ファイルを表示

ファイル: risuy.py プロジェクト: kukwuka/tkinter.python

    def __init__(self, inputnodes, hiddennodes, outputnoddes, learningrate):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnoddes

        self.lr = learningrate

        self.wih = np.random.normal(0.0, pow(self.hnodes, -0.5),
                                    (self.hnodes, self.inodes))
        self.who = np.random.normal(0.0, pow(self.onodes, -0.5),
                                    (self.onodes, self.hnodes))

        # self.who = loadedho
        # self.wih = loadedih

        self.activation_function = lambda x: sks.expit(x)

        self.inverse_activation_function = lambda x: sks.logit(x)

        pass

コード例 #43

0

ファイルを表示

def estimate_student(normalized_ranks):
    """This fits a PyMC3 model. All the model does is
    fit the parameters for t distribution, since it is clear
    (in the authors opinion) that the logit-transformed ranks 
    are very well described by a t distribution. The logit
    ranks are thus the observations, and the model finds the 
    ranges of parameters consistent with those obs."""

    with pm.Model() as model:
        nu = pm.HalfNormal('nu', 50)  #very broad priors
        mu = pm.Normal('mu', mu=0, sigma=50)  #very broad priors
        sigma = pm.HalfNormal('sig', 50)  #very broad priors

        lik = pm.StudentT('t',
                          nu=nu,
                          mu=mu,
                          sigma=sigma,
                          observed=logit(normalized_ranks))
        trace = pm.sample(1000, tune=1000)
    return trace, model

コード例 #44

0

ファイルを表示

def fit_treatment_model(df, term_counts):
    indices = df.post_index.values
    tc = term_counts[indices, :]
    tc = tc.toarray()
    f_z = logit(df.treatment_probability.values)
    print(f_z.shape, tc.shape)
    features = np.column_stack((f_z, tc))
    labels = df.treatment.values

    true_model = LogisticRegression(solver='liblinear')
    true_model.fit(features, labels)
    coeffs = np.array(true_model.coef_).flatten()[1:]
    print(coeffs.mean(), coeffs.std())

    np.random.shuffle(tc)
    features = np.column_stack((f_z, tc))
    permuted = LogisticRegression(solver='liblinear')
    permuted.fit(features, labels)
    permuted_coeffs = np.array(permuted.coef_).flatten()[1:]
    print(permuted_coeffs.mean(), permuted_coeffs.std())

コード例 #45

0

ファイルを表示

ファイル: fig_model_anyedit_12kChar_traintest.py プロジェクト: maxwshen/lib-analysis

def add_annotations(df):
    fe_cols = [
        col for col in df.columns
        if 'Fraction edited' in col and 'logit' not in col
    ]
    mean_edit_fqs = df[fe_cols].apply(np.nanmean, axis='columns')
    df['Obs edit frequency'] = mean_edit_fqs

    from scipy.special import logit, expit
    mean_logit_edit_fq = logit(
        np.mean(df[df['TrainTest_GBTR'] == 'train']['Obs edit frequency']))

    # Need to choose logit std to convert data
    # std_logit_edit_fq = 1.1
    std_logit_edit_fq = 2

    df['Pred edit frequency'] = expit((df['y_pred_GBTR'] * std_logit_edit_fq) +
                                      mean_logit_edit_fq)

    return df

コード例 #46

0

ファイルを表示

def max_pred_offsets(model, seed):
  """Generates offsets with the policy used for inference."""
  # Always start at the center.
  queue = deque([(0, 0, 0)])
  done = set()

  train_image_radius = train_image_size(model) // 2
  input_image_radius = np.array(model.input_image_size) // 2

  while queue:
    offset = queue.popleft()

    # Drop any offsets that would take us beyond the image fragment we
    # loaded for training.
    if np.any(np.abs(np.array(offset)) + input_image_radius >
              train_image_radius):
      continue

    # Ignore locations that were visited previously.
    quantized_offset = (
        offset[0] // max(model.deltas[0], 1),
        offset[1] // max(model.deltas[1], 1),
        offset[2] // max(model.deltas[2], 1))

    if quantized_offset in done:
      continue

    done.add(quantized_offset)

    yield offset

    # Look for new offsets within the updated seed.
    curr_seed = mask.crop_and_pad(seed, offset, model.pred_mask_size[::-1])
    todos = sorted(
        movement.get_scored_move_offsets(
            model.deltas[::-1],
            curr_seed[0, ..., 0],
            threshold=logit(FLAGS.threshold)), reverse=True)
    queue.extend((x[2] + offset[0],
                  x[1] + offset[1],
                  x[0] + offset[2]) for _, x in todos)

コード例 #47

0

ファイルを表示

ファイル: test_preprocessing.py プロジェクト: weiddeng/Horizon

 def preprocess_feature(self, feature, parameters):
     is_not_empty = 1 - np.isclose(feature, normalization.MISSING_VALUE)
     if parameters.feature_type == identify_types.BINARY:
         # Binary features are always 1 unless they are 0
         return ((feature != 0) * is_not_empty).astype(np.float32)
     if parameters.boxcox_lambda is not None:
         feature = stats.boxcox(
             np.maximum(feature + parameters.boxcox_shift,
                        normalization.BOX_COX_MARGIN),
             parameters.boxcox_lambda,
         )
     # No *= to ensure consistent out-of-place operation.
     if parameters.feature_type == identify_types.PROBABILITY:
         feature = special.logit(np.clip(feature, 1e-6, 1.0))
     elif parameters.feature_type == identify_types.QUANTILE:
         transformed_feature = np.zeros_like(feature)
         for i in six.moves.range(feature.shape[0]):
             transformed_feature[i] = self._value_to_quantile(
                 feature[i], parameters.quantiles)
         feature = transformed_feature
     elif parameters.feature_type == identify_types.ENUM:
         possible_values = parameters.possible_values
         mapping = {}
         for i, possible_value in enumerate(possible_values):
             mapping[possible_value] = i
         output_feature = np.zeros((len(feature), len(possible_values)))
         for i, val in enumerate(feature):
             if abs(val - MISSING_VALUE) < 1e-2:
                 continue
             output_feature[i][mapping[val]] = 1.0
         return output_feature
     elif parameters.feature_type == identify_types.CONTINUOUS_ACTION:
         min_value = parameters.min_value
         max_value = parameters.max_value
         feature = ((feature - min_value) *
                    ((1 - 1e-6) * 2 / (max_value - min_value)) - 1 + 1e-6)
     else:
         feature = feature - parameters.mean
         feature /= parameters.stddev
     feature *= is_not_empty
     return feature

コード例 #48

0

ファイルを表示

def load_trn_data_newTF(filedir, params):

    # loading data. Files in the archive are 'params' and 'stats'
    data = np.load(filedir)  # samples_dir = results/samples/

    # 7 parameters: Na+ current, CaT current (T-type Calcium, low-threshold), CaS current, A current (transient potassium current), KCa current, Kd current, H current (hyperpolarization current)
    sample_params = data["params"]  # there are 7 parameters in the network
    sample_stats = data[
        "stats"]  # there are 15 summary_stats in 'PrinzStats' (see the params variable above).
    # These 15 stats can be seen in summstats.py. They are: cycle_period, burst_length*3, end_to_start*2, start_to_end*2, duty_cycle*3, phase_gap*2, phase*2

    prior = netio.create_prior(params, log=True)

    lower = np.asarray(prior.lower)
    upper = np.asarray(prior.upper)
    inputscale = lambda x: (x - lower) / (upper - lower)
    bijection = lambda x: logit(inputscale(x)
                                )  # logit function with scaled input

    sample_params = bijection(sample_params)

    # normalize data
    params_mean = np.mean(sample_params, axis=0)
    params_std = np.std(sample_params, axis=0)
    sample_params = (sample_params - params_mean) / params_std

    # extract number of training samples
    sample_params_pilot = sample_params[:params.pilot_samples]
    sample_stats_pilot = sample_stats[:params.pilot_samples]
    sample_params_train = sample_params[params.
                                        pilot_samples:params.pilot_samples +
                                        params.n_train]
    sample_stats_train = sample_stats[params.
                                      pilot_samples:params.pilot_samples +
                                      params.n_train]

    pilot_data = (sample_params_pilot, sample_stats_pilot)
    trn_data = [sample_params_train, sample_stats_train
                ]  # taking log of conductances to get the training data

    return pilot_data, trn_data, params_mean, params_std

コード例 #49

0

ファイルを表示

def conwayMaxwellBinomialPriorKernel(com_params, a, b, c, m):
    """
    For calculating the kernel of the conjugate prior of the Conway-Maxwell binomial distribution. 
    Arguments:  com_params, p, nu, the parameters of the Conway-Maxwell binomial distribution
                a, hyperparameter corresponding to the first sufficient stat,
                b, hyperparameter corresponding to the second sufficient stat,
                c, hyperparameter corresponding to the pseudocount
                m, int, the number of bernoulli variables, considered fixed and known
    Returns:    The value of the kernel of the conjugate prior 
    """
    conjugateProprietyTest(a,b,c,m)
    # propriety_dist = norm(0, 1)
    p, nu = com_params
    if (p == 1) | (p == 0):
        return 0
    test_dist = ConwayMaxwellBinomial(p, nu, m)
    natural_params = np.array([logit(p), nu])
    pseudodata_part = np.dot(natural_params, np.array([a,b]))
    partition_part = np.log(test_dist.normaliser) - (nu * getLogFactorial(m)) - (m * np.log(1-p))
    # propriety_part = norm.pdf(logit(p)) * norm.pdf(nu - 1)
    return np.exp(pseudodata_part - c * partition_part)

コード例 #50

0

ファイルを表示

ファイル: pav_rocch.py プロジェクト: bsxfan/PYLLR

 def scores_vs_llrs(self):
     """
     Returns score and llr points, convenient for plotting purposes. A score 
     vector and an llr vector are returned, each with 2*nbins elements, 
     where nbins is the number of bins in this PAV solution. The scores 
     vector alternates the minimum and maximum score in each bin. There is 
     only one llr value associated with each bin, but those values are 
     duplicated, to correspond to the scores. The resulting plot of 
     scores vs llrs is steppy, with exactly horizontal and vertical line 
     segments.
     
     The initial and final llr bins may be -inf and +inf.
     
     """
     p = self.p
     LLRs = np.empty_like(self.scores)
     llr = LLRs[:, 0]
     llr[:] = logit(p)
     llr -= np.log(self.T / self.N)
     LLRs[:, 1] = llr
     return self.scores.ravel(), LLRs.ravel()

コード例 #51

0

ファイルを表示

def ensemble_submissions(submission_fnames, weights, mus=None, sigmas=None):
    assert len(submission_fnames) > 0, "Must provide at least one submission to ensemble."
    # Check that we have a weight for each submission
    assert len(submission_fnames) == len(weights), "Number of submissions and weights must match."
    # Get the id column of the submissions
    ids = pd.read_csv(submission_fnames[0])['id'].values
    # Read in all the submission values
    submissions = [pd.read_csv(sub_fname)[LABEL_NAMES].values for sub_fname in submission_fnames]
    # Combine them based on their respective weights
    combined = 0
    for j, sub in enumerate(submissions):
        if np.all((0 <= sub) & (sub <= 1.)):
            logging.info("Applying logit to submission %s" % submission_fnames[j])
            sub = logit(sub)
        if mus is not None and sigmas is not None:
            logging.info("Standardizing with mean %s and std %s" % (mus, sigmas))
            sub = sub - mus[np.newaxis]
            sub = sub / (sigmas[np.newaxis] + 1e-9)
        combined = combined + weights[j][np.newaxis] * sub
    # combined = expit(combined)
    return ids, combined

コード例 #52

0

ファイルを表示

def cross_entropy(tar, non, Ptar=0.5, deriv=False):

    baseline = -Ptar * np.log(Ptar) - (1 - Ptar) * np.log(1 - Ptar)
    logitprior = logit(Ptar)
    if not deriv:
        t = np.mean(softplus(-tar - logitprior))
        n = np.mean(softplus(non + logitprior))
        return (Ptar * t + (1 - Ptar) * n) / baseline

    t, back1 = softplus(-tar - logitprior, deriv=True)
    n, back2 = softplus(non + logitprior, deriv=True)
    k1 = Ptar / (len(t) * baseline)
    k2 = (1 - Ptar) / (len(n) * baseline)
    y = k1 * t.sum() + k2 * n.sum()

    def back(dy):
        dtar = back1(-dy * k1)
        dnon = back2(dy * k2)
        return dtar, dnon

    return y, back

コード例 #53

0

ファイルを表示

ファイル: helper.py プロジェクト: mirophan/reginr

def logit_transform(a, t=5):
    """Apply logit function, setting a max threshold instead of +/- inf
    
    Args:
        a (np.array): array to transform
        t (float): max threshold for +/- inf values
        
    Returns:
        np.array of logit values
    """
    if type(a) is not np.ndarray:
        a = np.array(a)
    y = logit(a)
    # cap inf relative to max and min values - not implemented
    #ub = np.max(y[(y!=inf)&(y!=-inf)])
    #lb = np.min(y[(y!=inf)&(y!=-inf)])

    # replace inf values with threshold
    y[y == inf] = t
    y[y == -inf] = -t
    return y

コード例 #54

0

ファイルを表示

def rand_eval(all_coords, model, whole_images, whole_labels, criterion):
    global index, max_index
    if index < max_index - 10:
        index_list = np.random.randint(low=index + 1,
                                       high=max_index,
                                       size=(1, 10))

    eval_loss = 0
    for i in range(10):
        patch, labels = get_one_input(all_coords[index_list[i]],
                                      np.array(FLAGS.fov_size), whole_images,
                                      whole_labels)
        patch = patch.cuda()
        labels = labels.cuda()
        seed = logit(utils.initial_seed(FLAGS.fov_size))
        seed = seed.cuda()
        pred_seed = model(t.cat((patch, seed), 1))
        seed += pred_seed
        eval_loss += criterion(seed, labels)
    eval_loss /= 10
    return eval_loss.data().cpu()

コード例 #55

0

ファイルを表示

def interp_loop(snr, tdp, threshold, c):
    """
    function to loop over to peform spline interpolation
    :param snr:
    :param rho:
    :param threshold:
    :param c: fuzzfactor
    """
    # find min value
    min_tdp = np.min(tdp)
    # take log of data to avoid negative values
    tmp_tdp = special.logit(tdp - min_tdp + c)
    # interpolate with spline interpolation
    tck = interpolate.splrep(snr, tmp_tdp)
    # new x and y values
    snr_new = np.linspace(1, 10, 1e3)
    tmp_tdp_new = interpolate.splev(snr_new, tck, der=0)
    # return to linear space
    tdp_new = special.expit(tmp_tdp_new) + min_tdp - c

    return snr_new, tdp_new

コード例 #56

0

ファイルを表示

ファイル: logit_normal.py プロジェクト: duncandc/stat_utils

    def _cdf(self, x, mu, sigma):
        r"""
        cumulative probability distribution function

        Parameters
        ----------
        mu : array_like
            mean of the logit of `x`

        sigma : array_like
            standard deviation of the logit of `x`

        Notes
        -----
        """

        sigma = np.atleast_1d(sigma).astype(np.float64)
        mu = np.atleast_1d(mu).astype(np.float64)

        norm = 1.0 / 2.0
        return norm * (1.0 + erf((logit(x) - mu) / (np.sqrt(2.0 * sigma**2))))

コード例 #57

0

ファイルを表示

ファイル: create_pca_figure.py プロジェクト: jamorrison/wgms_kit_comparison

def beta_to_m(betas, covgs, k):
    """Transform beta values into m values.

    Inputs -
        betas - pd.Series of beta values
        covgs - pd.Series of covg values
        k     - number of pseudoreads for smoothing
    Returns
        pd.Series of m values
    """
    b = list(betas)
    c = list(covgs)

    s = []
    for i in range(len(c)):
        m = (c[i] * b[i])
        u = (c[i] - m)
        s.append((m + k) / ((m + k) + (u + k)))
    out = logit(s)

    return pd.Series(out)

コード例 #58

0

ファイルを表示

ファイル: regression.py プロジェクト: crflynn/skgrf

def simulate_nuisance_and_easy_treatment(n=1000, p=5, sigma=1.0, adj=0.0):
    """Synthetic data with a difficult nuisance components and an easy treatment effect
        From Setup A in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'

    Args:
        n (int, optional): number of observations
        p (int optional): number of covariates (>=5)
        sigma (float): standard deviation of the error term
        adj (float): adjustment term for the distribution of propensity, e. Higher values shift the distribution to 0.

    Returns:
        (tuple): Synthetically generated samples with the following outputs:

            - y ((n,)-array): outcome variable.
            - X ((n,p)-ndarray): independent variables.
            - w ((n,)-array): treatment flag with value 0 or 1.
            - tau ((n,)-array): individual treatment effect.
            - b ((n,)-array): expected outcome.
            - e ((n,)-array): propensity of receiving treatment.
    """

    X = np.random.uniform(size=n * p).reshape((n, -1))
    b = (
        np.sin(np.pi * X[:, 0] * X[:, 1])
        + 2 * (X[:, 2] - 0.5) ** 2
        + X[:, 3]
        + 0.5 * X[:, 4]
    )
    eta = 0.1
    e = np.maximum(
        np.repeat(eta, n),
        np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1 - eta, n)),
    )
    e = expit(logit(e) - adj)
    tau = (X[:, 0] + X[:, 1]) / 2

    w = np.random.binomial(1, e, size=n)
    y = b + (w - 0.5) * tau + sigma * np.random.normal(size=n)

    return y, X, w, tau, b, e

コード例 #59

0

ファイルを表示

ファイル: test_models.py プロジェクト: evgeni-nikolaev/pymer4

def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV_l ~ IV1+ (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)

    assert model.coefs.shape == (2, 13)
    estimates = np.array([-0.16098421, 0.00296261])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, pd.core.frame.DataFrame)
    assert model.fixef.shape == (47, 2)

    assert isinstance(model.ranef, pd.core.frame.DataFrame)
    assert model.ranef.shape == (47, 2)

    assert np.allclose(model.coefs.loc[:, "Estimate"],
                       model.fixef.mean(),
                       atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
    assert np.allclose(
        model.predict(model.data, use_rfx=True, pred_type="link"),
        logit(model.data.fits),
    )

    # Test RFX only
    model = Lmer("DV_l ~ 0 + (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)
    assert model.fixef.shape == (47, 2)

    model = Lmer("DV_l ~ 0 + (IV1|Group) + (1|IV3)",
                 data=df,
                 family="binomial")
    model.fit(summarize=False)
    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)

コード例 #60

0

ファイルを表示

def main_dtclassifier(datastruct, min_samples_leaf=0.1, experiment_id=None):
    print("Starting experiment Decision Trees")
    mlflow.set_experiment("Santander Kaggle")
    df, train_x, train_y, test_x, test_y = datastruct
    metrics = {}

    with mlflow.start_run():
        print("Training model")
        start_timer = time.time()

        # train 200 small models
        models = []
        for var in train_x.columns:
            clf = DecisionTreeClassifier(min_samples_leaf=min_samples_leaf,
                                         random_state=0)
            clf.fit(train_x[var].values.reshape(-1, 1), train_y)
            models.append(clf)

        stop_timer = time.time()
        print("Model trained")

        predictions = [
            m.predict_proba(x.reshape(-1, 1))[:, 1]
            for (m, x) in zip(models, test_x.values.T)
        ]
        pred_y = np.array(predictions).T.mean(axis=1)
        pred_y_logit = logit(np.array(predictions).T).sum(axis=1)

        metrics['roc_auc'] = roc_auc_score(test_y, pred_y)
        metrics['roc_auc_logit'] = roc_auc_score(test_y, pred_y_logit)
        metrics['elapsed_time'] = (stop_timer - start_timer)

        #mlflow logging
        mlflow.log_param('model_type', "200 Decision Trees")
        mlflow.log_param('features', train_x.columns)
        mlflow.log_param('sample_size', df.shape)
        mlflow.log_param('min_samples_leaf', min_samples_leaf)
        mlflow.log_metrics(metrics)

        print("Completed")