Beispiel #1
1
    def predict_proba(self, X):
        probs = X
        calibrated_probs = numpy.zeros(len(probs))
        ind_1, ind_2 = self._compute_inds(len(probs))

        probs_1 = probs[ind_1]
        probs_2 = probs[ind_2]
        
        if self.logistic:
            probs_1 = numpy.clip(probs_1, 0.001, 0.999)
            probs_2 = numpy.clip(probs_2, 0.001, 0.999)
            probs_1 = probs_1[:, numpy.newaxis]
            probs_2 = probs_2[:, numpy.newaxis]
            calibrated_probs[ind_1] = self.calibrators[1].predict_proba(logit(probs_1))[:, 1]
            calibrated_probs[ind_2] = self.calibrators[0].predict_proba(logit(probs_2))[:, 1]
        else:
            calibrated_probs[ind_1] = self.calibrators[1].transform(probs_1)
            calibrated_probs[ind_2] = self.calibrators[0].transform(probs_2)
            numpy.random.seed(self.random_state)
            calibrated_probs = calibrated_probs + numpy.random.normal(size=len(calibrated_probs)) * 0.001
        return calibrated_probs
Beispiel #2
0
    def test_logistic_cross(self):
        mle = maxlike.Logistic()
        mle.model = Sum(2)
        mle.model.add(X(), 0, 0)
        mle.model.add(-X(), 0, 1)
        mle.model.add(-Scalar(), 1, [])
        mle.add_constraint([0], Linear([1]))

        # fetch and prepare data
        df = pd.read_csv(data_folder + "data_proba.csv", index_col=[0, 1])
        df['w'] = df['-1'] + df['1']
        kwargs, _ = prepare_dataframe(df, 'w', '1', {'X': np.sum})
        N = kwargs['N']
        S = kwargs['X']
        u = -logit(S.sum(0) / N.sum(0))
        v = logit(S.sum(1) / N.sum(1))
        a = (u + v) / 2
        h = ((u - v) / 2).mean()

        mle.add_param(a)
        mle.add_param(h)

        tol = 1e-8
        mle.fit(**kwargs, verbose=self.verbose)
        
        a, h = mle.params
        s_a, s_h = mle.std_error()

        df = pd.read_csv(data_folder + "test_logistic_cross.csv")

        self.assertAlmostEqual(h,   0.3059389232047434, delta=tol)
        self.assertAlmostEqual(s_h, 0.1053509333552778, delta=tol)
        np.testing.assert_allclose(a, df['a'], atol=tol)
        np.testing.assert_allclose(s_a, df['s_a'], atol=tol)
def main():
    get_s = block_get_s
    #get_s = mixture_get_s

    objective = partial(generic_objective, get_s)
    neg_ll = partial(generic_neg_ll, get_s)

    edge_rates = [1, 30, 1, 30, 30]
    kappa = 0.2
    theta = 0.5
    alpha = 2.0
    #edge_rates = [1, 2, 3, 1, 10]
    #alpha = 1.0
    #kappa = 3.0
    #theta = 0.5
    X0 = np.array(edge_rates + [kappa, logit(theta), alpha], dtype=float)
    print('%.20g' % neg_ll(X0))

    desired_ll = 85.030942031997312824
    #edge_rates = [1, 2, 3, 1, 10]
    #kappa = 3
    X0 = np.array(edge_rates + [kappa, logit(theta), alpha], dtype=float)
    a = 1e-6
    bounds = [(a, None) for i in X0[:5]] + [(a, None), (a, 1), (a, None)]
    result = optimize.minimize(
            objective, X0, method='L-BFGS-B', jac=True, bounds=bounds)
    #result = optimize.minimize(
            #neg_ll, X0, method='L-BFGS-B', bounds=bounds)
    print(result)
def logser_solver(ab):
    """Given abundance data, solve for MLE of logseries parameter p."""
    ab = check_for_support(ab, lower=1)
    BOUNDS = [0, 1]
    DIST_FROM_BOUND = 10 ** -15
    y = lambda x: 1 / log(1 / (1 - expit(x))) * expit(x) / (1 - expit(x)) - sum(ab) / len(ab)
    x = bisect(y, logit(BOUNDS[0] + DIST_FROM_BOUND), logit(BOUNDS[1] - DIST_FROM_BOUND), xtol=1.490116e-08)
    return expit(x)
Beispiel #5
0
def calibrate_probs(labels, weights, probs, logistic=False, random_state=11, threshold=0., return_calibrator=False, symmetrize=False):
    """
    Calibrate output to probabilities using 2-folding to calibrate all data
    
    :param probs: probabilities, numpy.array of shape [n_samples]
    :param labels: numpy.array of shape [n_samples] with labels 
    :param weights: numpy.array of shape [n_samples]
    :param threshold: float, to set labels 0/1 
    :param logistic: bool, use logistic or isotonic regression
    :param symmetrize: bool, do symmetric calibration, ex. for B+, B-
    
    :return: calibrated probabilities
    """
    labels = (labels > threshold) * 1
    ind = numpy.arange(len(probs))
    ind_1, ind_2 = train_test_split(ind, random_state=random_state, train_size=0.5)
    
    calibrator = LogisticRegression(C=100) if logistic else IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip')
    est_calib_1, est_calib_2 = clone(calibrator), clone(calibrator)
    probs_1 = probs[ind_1]
    probs_2 = probs[ind_2]
    
    if logistic:
        probs_1 = numpy.clip(probs_1, 0.001, 0.999)
        probs_2 = numpy.clip(probs_2, 0.001, 0.999)
        probs_1 = logit(probs_1)[:, numpy.newaxis]
        probs_2 = logit(probs_2)[:, numpy.newaxis]
        if symmetrize:
            est_calib_1.fit(numpy.r_[probs_1, 1-probs_1], 
                            numpy.r_[labels[ind_1] > 0, labels[ind_1] <= 0])
            est_calib_2.fit(numpy.r_[probs_2, 1-probs_2], 
                            numpy.r_[labels[ind_2] > 0, labels[ind_2] <= 0])
        else:
            est_calib_1.fit(probs_1, labels[ind_1])
            est_calib_2.fit(probs_2, labels[ind_2])
    else:
        if symmetrize:
            est_calib_1.fit(numpy.r_[probs_1, 1-probs_1], 
                            numpy.r_[labels[ind_1] > 0, labels[ind_1] <= 0],
                            numpy.r_[weights[ind_1], weights[ind_1]])
            est_calib_2.fit(numpy.r_[probs_2, 1-probs_2], 
                            numpy.r_[labels[ind_2] > 0, labels[ind_2] <= 0],
                            numpy.r_[weights[ind_2], weights[ind_2]])
        else:
            est_calib_1.fit(probs_1, labels[ind_1], weights[ind_1])
            est_calib_2.fit(probs_2, labels[ind_2], weights[ind_2])
        
    calibrated_probs = numpy.zeros(len(probs))
    if logistic:
        calibrated_probs[ind_1] = est_calib_2.predict_proba(probs_1)[:, 1]
        calibrated_probs[ind_2] = est_calib_1.predict_proba(probs_2)[:, 1]
    else:
        calibrated_probs[ind_1] = est_calib_2.transform(probs_1)
        calibrated_probs[ind_2] = est_calib_1.transform(probs_2)
    if return_calibrator:
        return calibrated_probs, (est_calib_1, est_calib_2)
    else:
        return calibrated_probs
def plot(x, y, logit_scale=False, output_file=None):
    if logit_scale:
        x = logit(x)
        y = logit(y)
    plt.scatter(x, y, alpha=0.05)
    if output_file:
        plt.savefig(output_file)
    else:
        plt.show()
        plt.close()
def test_update_p_3():
    from scipy.special import logit
    from update_params_linear_regression import update_p_3

    N = 4
    p0 = 0.3

    res = update_p_3(p0, N)
    expected_res = np.array([logit(p0), logit(p0), logit(p0), logit(p0)])

    np.testing.assert_array_equal(res, expected_res)
Beispiel #8
0
def main():
    "Calculates the mean k and mean responses for each participant."
    with open(KSPEED_CURVES_FN, 'rb') as inpf:
        mpl_kcurves = [pickle.load(inpf) for k in range(KMAXCURVE + 1)]
    # Determine interval with maximum difference in mean response
    ini, end = None, None
    dif = 0
    for i in range(NTRIALS - 100):
        j = i + 100
        this_dif = 0
        for trial in range(i, j):
            this_dif += mpl_kcurves[0][trial] - mpl_kcurves[2][trial]
        this_dif /= 100
        if this_dif > dif:
            dif = this_dif
            ini, end = i, j
    print(dif, ini, end)
    if not os.path.exists('mean_k.pickle'):
        samples = get_samples()
        samples = samples.sample(10000)
        mean_k = [get_subject_meank(samples, i) for i in range(N)]
        with open('mean_k.pickle', 'wb') as outf:
            pickle.dump((ini, end), outf)
            pickle.dump(mean_k, outf)
    else:
        with open('mean_k.pickle', 'rb') as inpf:
            ini, end = pickle.load(inpf)
            mean_k = pickle.load(inpf)
    # ini, end = 200, 300
    mean_resp = [np.mean(y[ini:end]) for x, y in bdata]
    mod = sm.OLS(logit(mean_resp), [(1, k) for k in mean_k])
    res = mod.fit()
    print(res.summary())
Beispiel #9
0
def _pack_acgt(pi):
    a, c, g, t = pi
    ag = a+g  # purines
    ct = c+t  # pyrimidines
    a_div_ag = a / ag
    c_div_ct = c / ct
    return logit([ag, a_div_ag, c_div_ct])
Beispiel #10
0
def _graph(instances, use_prob=True):
    """ Builds a directed graph for instances

        instances are quadruplets of the form:
            edu_source, edu_target, probability_of_attachment, relation

        returns a Digraph
    """

    root_id = _get_root(set(e for s, t, _, _ in instances for e in (s, t))).id

    targets = defaultdict(list)
    labels = dict()
    scores = dict()

    for source, target, prob, rel in instances:
        src, tgt = source.id, target.id

        # Ignore all edges directed to the root
        if tgt == root_id:
            continue

        scores[src, tgt] = _cap_score(logit(prob)) if use_prob else prob
        labels[src, tgt] = rel
        targets[src].append(tgt)

    return Digraph(targets,
                   lambda s, t: scores[s, t],
                   lambda s, t: labels[s, t])
def pack_params(nt_distn, kappa, alpha, v):
    # does not include edge rates
    a, c, g, t = nt_distn
    return np.concatenate([
            #logit([a+g, a/(a+g), c/(c+t)]),
            logit([c+g]),
            np.log([kappa, alpha, v])])
  def testGetLogitsAndProbProbabilityMultidimensional(self):
    p = np.array([[0.3, 0.4, 0.3], [0.1, 0.5, 0.4]], dtype=np.float32)

    with self.test_session():
      new_logits, new_p = distribution_util.get_logits_and_prob(
          p=p, multidimensional=True, validate_args=True)

      self.assertAllClose(special.logit(p), new_logits.eval())
      self.assertAllClose(p, new_p.eval())
Beispiel #13
0
    def inverse_activation_function(self, x):
      if x == 1:
        x = 0.999999
      elif x == 0:
        x = 0.000001 
        
      #print 'logit', x, logit(x)

      return logit(x)
Beispiel #14
0
    def test_nan(self):
        expected = np.array([np.nan]*4)
        olderr = np.seterr(invalid='ignore')
        try:
            actual = logit(np.array([-3., -2., 2., 3.]))
        finally:
            np.seterr(**olderr)

        assert_equal(expected, actual)
  def testGetLogitsAndProbsProbability(self):
    p = np.array([0.01, 0.2, 0.5, 0.7, .99], dtype=np.float32)

    with self.test_session():
      new_logits, new_p = distribution_util.get_logits_and_probs(
          probs=p, validate_args=True)

      self.assertAllClose(special.logit(p), new_logits.eval())
      self.assertAllClose(p, new_p.eval())
Beispiel #16
0
def compactspace(scale, n):
    r"""
    Returns points :math:`x` spaced in the open interval
    :math:`(-\infty, \infty)`  by linearly spacing in the compactified
    coordinate :math:`s(x) = e^{-\alpha x} / (1 + e^{-\alpha x})^2`,
    where :math:`\alpha` is a scale factor.
    """
    logit = logistic(scale=scale).ppf
    compact_xs = np.linspace(0, 1, n + 2)[1:-1]
    return logit(compact_xs)
def correlation_curve_ngrams(texts, ngram_orders):
    corrs = []
    targets = texts.target_words()
    cloze_probs = HumanPredictor().batch_predict(texts)

    for order in ngram_orders:
        x = []
        y = []

        ngram_probs = NgramPredictor(order).batch_predict(texts)

        for target, cloze_prob, ngram_prob in zip(targets, cloze_probs, ngram_probs):
            x.append(cloze_prob)
            y.append(ngram_prob)

        lx = logit(x)
        ly = logit(y)

        corrs.append(pearsonr(lx, ly)[0])
    return corrs
 def codePatches(self,patches,currentParts):
     flatpatches = patches.reshape((patches.shape[0],-1))
     print(flatpatches.shape)
     part_logits = np.rollaxis(logit(currentParts).astype(np.float64),0,4)
     part_logits = part_logits.reshape(part_logits.shape[0] * part_logits.shape[1] * part_logits.shape[2], -1)
     print(part_logits.shape)
     constant_terms = np.apply_over_axes(np.sum, np.log(1-currentParts).astype(np.float64),[1,2,3]).ravel()
     print(constant_terms.shape)
     codeParts = np.dot(flatpatches,part_logits)
     codeParts = codeParts + constant_terms
     print(codeParts.shape)
     return np.argmax(codeParts, axis = 1)
Beispiel #19
0
    def check_logit_out(self, dtype, expected):
        a = np.linspace(0,1,10)
        a = np.array(a, dtype=dtype)
        olderr = np.seterr(divide='ignore')
        try:
            actual = logit(a)
        finally:
            np.seterr(**olderr)

        assert_almost_equal(actual, expected)

        assert_equal(actual.dtype, np.dtype(dtype))
def correlation_curve_cache(texts, ngram_order, cache_lambdas):
    corrs = []
    targets = texts.target_words()
    cloze_probs = HumanPredictor().batch_predict(texts)
    ngram_probs = NgramPredictor(ngram_order).batch_predict(texts)

    for cache_lambda in cache_lambdas:
        x = []
        y = []

        cache_probs = UnigramCachePredictor().batch_predict(texts)

        for target, cloze_prob, ngram_prob, cache_prob in zip(targets, cloze_probs, ngram_probs, cache_probs):
            x.append(cloze_prob)
            y.append(cache_lambda * cache_prob + (1 - cache_lambda) * ngram_prob)

        lx = logit(x)
        ly = logit(y)

        corrs.append(pearsonr(lx, ly)[0])
    return corrs
Beispiel #21
0
def bern_y(X,p1,base_prob=.25,beta_sd=1):
    n,p = X.shape
    X_1    = X[:,:p1]
    v = 0 
    while v<1E-5:
        beta   = npran.randn(p1)*beta_sd
        if p1>0:
            eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
            y      = npran.binomial(1,invlogit(eta),n)
        else:
            y      = npran.binomial(1,base_prob,n)
        v = np.min(nplin.svd(np.hstack((X,y[:,np.newaxis])))[1])
    return y
Beispiel #22
0
def from_simplex(x):
    r"""
    Inteprets the last index of x as unit simplices and returns a
    real array of the sampe shape in logit space.

    Inverse to :func:`to_simplex` ; see that function for more details.

    :param np.ndarray: Array of unit simplices along the last index.
    
    :rtype: ``np.ndarray``
    """
    n = x.shape[-1]
    # z are the stick breaking fractions in [0,1]
    # the last one is always 1, so don't worry about it
    z = np.empty(shape=x.shape)
    z[..., 0] = x[..., 0]
    z[..., 1:-1] = x[..., 1:-1] / (1 - x[..., :-2].cumsum(axis=-1))

    # now z are the logit-transformed breaking fractions
    z[..., :-1] = logit(z[..., :-1]) - logit(1 / (n - np.arange(n-1, dtype=np.float)))
    # set this to 0 manually to avoid subtracting inf-inf
    z[..., -1] = 0
    return z
def genXy_bern_X_norm_beta(seed,n,p1,pnull,x_prob=.25,base_prob=.25,beta_sd=1):
    """ The X are normal. p1 predictive vars, pnull null vars. beta on the p1 vars is ~normal(0,beta_sd) and the intercept is logit(base_prob)"""
    if not seed == None:
        npran.seed(seed)
    X_1    = npran.binomial(1,x_prob,(n,p1))
    X_null = npran.binomial(1,x_prob,(n,pnull))
    X      = np.concatenate((X_1,X_null),axis=1)
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y
    def extract(self,X):
        assert self._parts is not None, "Must be trained before calling extract"
        th = self._settings['threshold']
        part_logits = np.rollaxis(logit(self._parts).astype(np.float64),0,4)
        constant_terms = np.apply_over_axes(np.sum, np.log(1-self._parts).astype(np.float64), [1, 2, 3]).ravel()

        from pnet.cyfuncs import code_index_map_multi

        feature_map = code_index_map_multi(X, part_logits, constant_terms, th,
                                           outer_frame=self._settings['outer_frame'], 
                                           min_llh=self._settings.get('min_llh', -np.inf),
                                           n_coded=self._settings.get('n_coded', 1))


        return (feature_map, self._num_parts)
Beispiel #25
0
    def check_logit_out(self, dtype, expected):
        a = np.linspace(0,1,10)
        a = np.array(a, dtype=dtype)
        olderr = np.seterr(divide='ignore')
        try:
            actual = logit(a)
        finally:
            np.seterr(**olderr)

        if np.__version__ >= '1.6':
            assert_almost_equal(actual, expected)
        else:
            assert_almost_equal(actual[1:-1], expected[1:-1])

        assert_equal(actual.dtype, np.dtype(dtype))
    def _extract(self, phi, data):
        X = phi(data)
        XX = X[:, np.newaxis, np.newaxis]
        theta = self._models[np.newaxis]

        S = self._settings.get('standardize')
        if S:
            llh = XX * logit(theta)
            bb = np.apply_over_axes(np.sum, llh, [-3, -2, -1])[..., 0, 0, 0]
            bb = (bb - self._means) / self._sigmas
            yhat = np.argmax(bb.max(-1), axis=1)
        else:
            llh = XX * np.log(theta) + (1 - XX) * np.log(1 - theta)
            bb = np.apply_over_axes(np.sum, llh, [-3, -2, -1])[..., 0, 0, 0]
            yhat = np.argmax(bb.max(-1), axis=1)
        return yhat
def genXy_binary_X_norm_beta(seed,n,p1,pnull,base_prob=.25,beta_sd=1,A_base_diag=-1,A_sd=.2):
    ''' X is binary from the isling model, with the coefficients drawn from a normal. Y is binary, with beta's coefficients also from a normal '''
    if not seed == None:
        npran.seed(seed)
    p = p1 + pnull
    A = npran.normal(0,.2,(p,p))-np.diag(A_base_diag*np.ones(p))
    X = draw_random_binary(n,A)
    X_1    = X[:,:p1]
    X_null = X[:,p1:]
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y
Beispiel #28
0
 def preprocess_feature(cls, feature, parameters):
     is_not_empty = 1 - np.isclose(feature, MISSING_VALUE)
     if parameters.feature_type == identify_types.BINARY:
         # Binary features are always 1 unless they are 0
         return ((feature != 0) * is_not_empty).astype(np.float32)
     if parameters.boxcox_lambda is not None:
         feature = stats.boxcox(
             np.maximum(feature + parameters.boxcox_shift, BOX_COX_MARGIN),
             parameters.boxcox_lambda,
         )
     # No *= to ensure consistent out-of-place operation.
     if parameters.feature_type == identify_types.PROBABILITY:
         feature = np.clip(feature, 0.01, 0.99)
         feature = special.logit(feature)
     elif parameters.feature_type == identify_types.QUANTILE:
         transformed_feature = np.zeros_like(feature)
         for i in six.moves.range(feature.shape[0]):
             transformed_feature[i] = cls.value_to_quantile(
                 feature[i], parameters.quantiles
             )
         feature = transformed_feature
     elif parameters.feature_type == identify_types.ENUM:
         possible_values = parameters.possible_values
         mapping = {}
         for i, possible_value in enumerate(possible_values):
             mapping[possible_value] = i
         output_feature = np.zeros((len(feature), len(possible_values)))
         for i, val in enumerate(feature):
             if abs(val - MISSING_VALUE) < 1e-2:
                 # This check is required by the PT preprocessing but not C2
                 continue
             output_feature[i][mapping[val]] = 1.0
         return output_feature
     elif parameters.feature_type == identify_types.CONTINUOUS_ACTION:
         min_value = parameters.min_value
         max_value = parameters.max_value
         feature = (
             (feature - min_value) * ((1 - 1e-6) * 2 / (max_value - min_value))
             - 1
             + 1e-6
         )
     else:
         feature = feature - parameters.mean
         feature /= parameters.stddev
         feature = np.clip(feature, MIN_FEATURE_VALUE, MAX_FEATURE_VALUE)
     feature *= is_not_empty
     return feature
def genXy_given_X_norm_beta(seed,data,n,p1,pnull,base_prob=.25,beta_sd=1):
    ''' X is binary from the isling model, with the coefficients drawn from a normal. Y is binary, with beta's coefficients also from a normal '''
    if not seed == None:
        npran.seed(seed)
    p = p1 + pnull
    h,w = data.shape
    rows = npran.choice(h,n)
    X      = data[rows,:][:,npran.choice(w,p)]
    X_1    = X[:,:p1]
    X_null = X[:,p1:]
    beta   = npran.randn(p1)*beta_sd
    if p1>0:
        eta    = cutoff(np.dot(X_1,beta)+logit(base_prob))
        y      = npran.binomial(1,invlogit(eta),n)
    else:
        y      = npran.binomial(1,base_prob,n)
    return X,y
def nbinom_lower_trunc_solver(ab):
    """Given abundance data, solve for MLE of negative binomial (lower-truncated at 1) parameters n and p"""
    ab = check_for_support(ab, lower=1)
    mu = np.mean(ab)
    var = np.var(ab, ddof=1)
    p0 = 1 - mu / var
    if p0 < 0:
        p0 = 10 ** -5
    elif p0 > 1:
        p0 = 1 - 10 ** -5
    logit_p0 = logit(p0)
    log_n0 = log(mu * (1 - p0) / p0)

    def negbin_func(x):
        return -nbinom_lower_trunc_ll(ab, exp(x[0]), expit(x[1]))

    log_n, logit_p = optimize.fmin(negbin_func, x0=[log_n0, logit_p0])
    return exp(log_n), expit(logit_p)
Beispiel #31
0
def model3(gene_name, abkt, y_g, num_random_restarts, minrr):
    '''
    optimization with 1 pg, 2 theta
    :param abkt:
    :param y_g:
    :param num_random_restarts:
    :param minrr:
    :return: min object with lowest negative log-likelihood
    '''
    theta_lower0, theta_upper0, p_lower0, p_upper0, std_lower0, std_upper0, theta_lower1, theta_upper1, p_lower1, p_upper1, std_lower1, std_upper1 = get_rr_range_grp(
        y_g, group_info)
    real_params_g_rtimes = column_stack(
        (uniform(theta_lower0, theta_upper0, num_random_restarts),
         uniform(theta_lower1, theta_upper1, num_random_restarts),
         log(
             uniform(min(std_lower0, std_lower1), max(std_upper0, std_upper1),
                     num_random_restarts)),
         logit(
             uniform(min(p_lower0, p_lower1), max(p_upper0, p_upper1),
                     num_random_restarts))))
    arg_min_x = []
    val_min_x = []
    for i in range(num_random_restarts):
        log_fh.log('tasc free theta optimization #' + str(i) + ' for gene ' +
                   gene_name)
        real_params_g = real_params_g_rtimes[i, :]
        optim_result_obj = minimize(
            likelihood.neg_log_sum_marginal_likelihood_free_theta,
            x0=real_params_g,
            args=(abkt, y_g, group_info),
            method='L-BFGS-B')
        if optim_result_obj.success and (not np.isnan(
                optim_result_obj.fun)) and (optim_result_obj.fun != 0):
            arg_min_x.append(optim_result_obj)
            val_min_x.append(optim_result_obj.fun)
        if len(arg_min_x) >= minrr:
            break

    if len(arg_min_x) == 0:
        return None
    else:
        return arg_min_x[np.argmin(val_min_x)]
Beispiel #32
0
    def sampling(self,
                 samples,
                 sigmoids,
                 epsilon=1e-8,
                 shift_percent=95.0,
                 rank=None):

        sigmoids = np.clip(sigmoids.astype(np.float), 1e-14, 1 - 1e-14)

        # Update upper bound
        D_tilde = logit(sigmoids)
        self.D_tilde_M = np.maximum(self.D_tilde_M, np.amax(D_tilde))

        # Compute probability
        D_delta = D_tilde - self.D_tilde_M
        F = D_delta - np.log(1 - np.exp(D_delta - epsilon))
        if shift_percent is not None:
            gamma = np.percentile(F, shift_percent)
            # print("gamma", gamma)
            F = F - gamma
        P = np.squeeze(logistic(F))

        # Filter out samples
        # accept = np.random.rand(len(D_delta)) < P
        # good_samples = samples[accept]
        # print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(len(D_delta), np.sum(accept), np.sum(accept)/len(D_delta) ))

        if rank is not None:
            order = np.argsort(P)[::-1]
            accept = order[:int(rank * len(D_delta))]
            good_samples = samples[accept, :]
            print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(
                len(D_delta), np.size(accept, 0),
                np.size(accept, 0) / len(D_delta)))
        else:
            accept = np.random.rand(len(D_delta)) < P
            good_samples = samples[accept]
            print("[!] total: {:d}, accept: {:d}, percent: {:.2f}".format(
                len(D_delta), np.sum(accept),
                np.sum(accept) / len(D_delta)))

        return good_samples
Beispiel #33
0
def pbo_core_calc(Cs, Ms, Ms_values, Ms_index, metric_func, verbose=False):
    # make sure chucks are concatenated in their original order
    order = [x for x, _ in Cs]
    sort_ind = np.argsort(order)

    Cs_values = np.array([v for _, v in Cs])
    if verbose:
        print("Cs index = {}, ".format(order), end="")
    J_x = np.concatenate(Cs_values[sort_ind, :])

    # find Cs_bar
    Cs_bar_index = list(sorted(Ms_index - set(order)))
    if verbose:
        print("Cs_bar_index = {}".format(Cs_bar_index))
    J_bar_x = np.concatenate(Ms_values[Cs_bar_index, :])

    R_x = metric_func(J_x)
    R_bar_x = metric_func(J_bar_x)

    R_rank_x = ss.rankdata(R_x)
    R_bar_rank_x = ss.rankdata(R_bar_x)

    rn_x = np.argmax(R_rank_x)
    rn_bar_x = R_bar_rank_x[rn_x]

    w_bar_x = float(rn_bar_x) / len(R_bar_rank_x)
    logit_x = spec.logit(w_bar_x)

    core = PBOCore(
        J_x,
        J_bar_x,
        R_x,
        R_bar_x,
        R_rank_x,
        R_bar_rank_x,
        rn_x,
        rn_bar_x,
        w_bar_x,
        logit_x,
    )

    return core
Beispiel #34
0
def get_example(load_example, eval_tracker, model, get_offsets):
    """Generates individual training examples.

  Args:
    load_example: callable returning a tuple of image and label ndarrays
                  as well as the seed coordinate and volume name of the example
    eval_tracker: EvalTracker object
    model: FFNModel object
    get_offsets: iterable of (x, y, z) offsets to investigate within the
        training patch

  Yields:
    tuple of:
      seed array, shape [1, z, y, x, 1]
      image array, shape [1, z, y, x, 1]
      label array, shape [1, z, y, x, 1]
  """
    seed_shape = train_canvas_size(model).tolist()[::-1]

    while True:
        full_patches, full_labels, loss_weights, coord, volname = load_example(
        )
        # Always start with a clean seed.
        seed = logit(mask.make_seed(seed_shape, 1, pad=FLAGS.seed_pad))

        for off in get_offsets(model, seed):
            predicted = mask.crop_and_pad(seed, off,
                                          model.input_seed_size[::-1])
            patches = mask.crop_and_pad(full_patches, off,
                                        model.input_image_size[::-1])
            labels = mask.crop_and_pad(full_labels, off,
                                       model.pred_mask_size[::-1])
            weights = mask.crop_and_pad(loss_weights, off,
                                        model.pred_mask_size[::-1])

            # Necessary, since the caller is going to update the array and these
            # changes need to be visible in the following iterations.
            assert predicted.base is seed
            yield predicted, patches, labels, weights

        eval_tracker.add_patch(full_labels, seed, loss_weights, coord, volname,
                               full_patches)
 def addContextToChromosomeTable(self, chromosome, chromMatrix,
                                 motifMatrix):
     chromMatrix = sc.logit(chromMatrix)
     #print chromMatrix
     gamma, realBeta = self.gamma, self.beta
     n = self.n
     for char in range(n):
         relevantIndex = self.motifIndexByChar(char)
         charMotifMatrix = np.delete(motifMatrix, np.s_[relevantIndex], 1)
         betaBychar = np.delete(realBeta, np.s_[relevantIndex])
         gammaByChar = np.delete(gamma, np.s_[relevantIndex])
         coeffByChar = gammaByChar * betaBychar
         additionOfcontextBychar = np.dot(coeffByChar, charMotifMatrix.T)
         chromMatrix[:, char] += additionOfcontextBychar
     chromMatrix = sc.expit(chromMatrix)
     chromMatrix = preprocessing.normalize(
         chromMatrix, norm='l1',
         axis=1)  #Normlizing the matrix back to being a stoch. matrix
     #print np.sum(chromMatrix,axis=1)
     return chromMatrix
Beispiel #36
0
def mean_coh_logit(coh, weights=None, axis=None):

    # logit transform of R, ensuring to nan out any infinities
    z = logit(np.sqrt(coh))
    z[np.isinf(z)] = np.nan

    if axis is None:
        z = np.nanmean(z)
    else:
        # this is needed since nanmean doesn't accept a tuple as the axis argument, so we need to loop over each axis
        if not isinstance(axis, collections.Iterable):
            axis = (axis, )

        # perform the mean over each desired axis
        zm = np.ma.array(z, mask=np.isnan(z))
        zm = np.ma.average(zm, axis=axis, weights=weights)
        z = zm.filled()

    # inverse logit transform, returning to R^2
    return expit(z)**2
Beispiel #37
0
def predict(st, norm, bounds):

    rew = np.log(1 + (st[:, -1:]))

    a_x = bounds[0]
    b_x = bounds[2]

    eps = 1e-5

    rew = np.clip(rew, a_x + eps, b_x - eps)

    rew = logit((rew - a_x) / (b_x - a_x))

    st[:, -1:] = rew

    State = np.zeros((1, 61))
    State[0, :] = np.hstack((st[0, 0], st[:, [1, 2, 3, -1]].ravel()))

    X = (State - norm[0]) / norm[1]
    return np.round(policy_network(X)[0, :], 4)
Beispiel #38
0
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        self.inodes = inputnodes  # количество узлов входного слоя
        self.hnodes = hiddennodes  # скрытого слоя
        self.onodes = outputnodes  # выходгого слоя
        self.lr = learningrate  # коэффициент обучения, он же - шаг градиентного спуска

        # матрица весов связей входного слоя со скрытым
        self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5),
                                       (self.hnodes, self.inodes))

        # матрица весов связей скрытого слоя с выходным
        self.who = numpy.random.normal(0.0, pow(self.onodes, -0.5),
                                       (self.onodes, self.hnodes))

        # фунуция активации (сигмоида)
        self.activation_function = lambda x: sigmoid(x)

        # обратная функция активации для обратного прохода
        self.inverse_activation_function = lambda x: logit(x)
        pass
    def run_model(self, model_number, x, calc_gt=False, n_exp=1):
        mean1 = [3, 3, 3]
        cov1 = np.eye(3) * 0.75
        mean2 = [-2, -2, -2]
        cov2 = np.eye(3) * 0.75
        mean3 = [1, 1, 1]
        cov3 = np.eye(3) * 1.0

        prob = multivariate_normal.pdf(x, mean=mean1, cov=cov1) + multivariate_normal.pdf(x, mean=mean2, cov=cov2) \
               + multivariate_normal.pdf(x, mean=mean3, cov=cov3)
        prob *= 3.0

        if calc_gt:
            return logit(prob)

        if n_exp > 1:
            return np.random.binomial(n=n_exp, p=prob)

        clicked = int(flip(prob))
        return clicked
Beispiel #40
0
def get_policy_fn(request, ffn_model):
  """Returns a policy class based on the InferenceRequest proto."""

  if request.movement_policy_name:
    movement_policy_class = globals().get(request.movement_policy_name, None)
    if movement_policy_class is None:
      movement_policy_class = import_symbol(request.movement_policy_name)
  else:  # Default / fallback.
    movement_policy_class = FaceMaxMovementPolicy

  if request.movement_policy_args:
    kwargs = json.loads(request.movement_policy_args)
  else:
    kwargs = {}
  if 'deltas' not in kwargs:
    kwargs['deltas'] = ffn_model.deltas[::-1]
  if 'score_threshold' not in kwargs:
    kwargs['score_threshold'] = logit(request.inference_options.move_threshold)

  return lambda canvas: movement_policy_class(canvas, **kwargs)
    def setUp(self):
        self.op_type = "sigmoid_cross_entropy_with_logits"
        self.python_api = test_fluid_sigmoid
        batch_size = 64
        num_classes = 20
        self.inputs = {
            'X': logit(
                np.random.uniform(0, 1, (batch_size, num_classes))
                .astype("float64")),
            'Label': np.random.randint(0, 2, (batch_size, num_classes))
            .astype("float64")
        }

        # Fw Pass is implemented as elementwise sigmoid followed by
        # elementwise logistic loss
        # Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X))
        sigmoid_X = expit(self.inputs['X'])
        term1 = self.inputs['Label'] * np.log(sigmoid_X)
        term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X)
        self.outputs = {'Out': -term1 - term2}
Beispiel #42
0
    def __init__(self, inputnodes, hiddennodes, outputnoddes, learningrate):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnoddes

        self.lr = learningrate

        self.wih = np.random.normal(0.0, pow(self.hnodes, -0.5),
                                    (self.hnodes, self.inodes))
        self.who = np.random.normal(0.0, pow(self.onodes, -0.5),
                                    (self.onodes, self.hnodes))

        # self.who = loadedho
        # self.wih = loadedih

        self.activation_function = lambda x: sks.expit(x)

        self.inverse_activation_function = lambda x: sks.logit(x)

        pass
Beispiel #43
0
def estimate_student(normalized_ranks):
    """This fits a PyMC3 model. All the model does is
    fit the parameters for t distribution, since it is clear
    (in the authors opinion) that the logit-transformed ranks 
    are very well described by a t distribution. The logit
    ranks are thus the observations, and the model finds the 
    ranges of parameters consistent with those obs."""

    with pm.Model() as model:
        nu = pm.HalfNormal('nu', 50)  #very broad priors
        mu = pm.Normal('mu', mu=0, sigma=50)  #very broad priors
        sigma = pm.HalfNormal('sig', 50)  #very broad priors

        lik = pm.StudentT('t',
                          nu=nu,
                          mu=mu,
                          sigma=sigma,
                          observed=logit(normalized_ranks))
        trace = pm.sample(1000, tune=1000)
    return trace, model
Beispiel #44
0
def fit_treatment_model(df, term_counts):
    indices = df.post_index.values
    tc = term_counts[indices, :]
    tc = tc.toarray()
    f_z = logit(df.treatment_probability.values)
    print(f_z.shape, tc.shape)
    features = np.column_stack((f_z, tc))
    labels = df.treatment.values

    true_model = LogisticRegression(solver='liblinear')
    true_model.fit(features, labels)
    coeffs = np.array(true_model.coef_).flatten()[1:]
    print(coeffs.mean(), coeffs.std())

    np.random.shuffle(tc)
    features = np.column_stack((f_z, tc))
    permuted = LogisticRegression(solver='liblinear')
    permuted.fit(features, labels)
    permuted_coeffs = np.array(permuted.coef_).flatten()[1:]
    print(permuted_coeffs.mean(), permuted_coeffs.std())
def add_annotations(df):
    fe_cols = [
        col for col in df.columns
        if 'Fraction edited' in col and 'logit' not in col
    ]
    mean_edit_fqs = df[fe_cols].apply(np.nanmean, axis='columns')
    df['Obs edit frequency'] = mean_edit_fqs

    from scipy.special import logit, expit
    mean_logit_edit_fq = logit(
        np.mean(df[df['TrainTest_GBTR'] == 'train']['Obs edit frequency']))

    # Need to choose logit std to convert data
    # std_logit_edit_fq = 1.1
    std_logit_edit_fq = 2

    df['Pred edit frequency'] = expit((df['y_pred_GBTR'] * std_logit_edit_fq) +
                                      mean_logit_edit_fq)

    return df
Beispiel #46
0
def max_pred_offsets(model, seed):
  """Generates offsets with the policy used for inference."""
  # Always start at the center.
  queue = deque([(0, 0, 0)])
  done = set()

  train_image_radius = train_image_size(model) // 2
  input_image_radius = np.array(model.input_image_size) // 2

  while queue:
    offset = queue.popleft()

    # Drop any offsets that would take us beyond the image fragment we
    # loaded for training.
    if np.any(np.abs(np.array(offset)) + input_image_radius >
              train_image_radius):
      continue

    # Ignore locations that were visited previously.
    quantized_offset = (
        offset[0] // max(model.deltas[0], 1),
        offset[1] // max(model.deltas[1], 1),
        offset[2] // max(model.deltas[2], 1))

    if quantized_offset in done:
      continue

    done.add(quantized_offset)

    yield offset

    # Look for new offsets within the updated seed.
    curr_seed = mask.crop_and_pad(seed, offset, model.pred_mask_size[::-1])
    todos = sorted(
        movement.get_scored_move_offsets(
            model.deltas[::-1],
            curr_seed[0, ..., 0],
            threshold=logit(FLAGS.threshold)), reverse=True)
    queue.extend((x[2] + offset[0],
                  x[1] + offset[1],
                  x[0] + offset[2]) for _, x in todos)
Beispiel #47
0
 def preprocess_feature(self, feature, parameters):
     is_not_empty = 1 - np.isclose(feature, normalization.MISSING_VALUE)
     if parameters.feature_type == identify_types.BINARY:
         # Binary features are always 1 unless they are 0
         return ((feature != 0) * is_not_empty).astype(np.float32)
     if parameters.boxcox_lambda is not None:
         feature = stats.boxcox(
             np.maximum(feature + parameters.boxcox_shift,
                        normalization.BOX_COX_MARGIN),
             parameters.boxcox_lambda,
         )
     # No *= to ensure consistent out-of-place operation.
     if parameters.feature_type == identify_types.PROBABILITY:
         feature = special.logit(np.clip(feature, 1e-6, 1.0))
     elif parameters.feature_type == identify_types.QUANTILE:
         transformed_feature = np.zeros_like(feature)
         for i in six.moves.range(feature.shape[0]):
             transformed_feature[i] = self._value_to_quantile(
                 feature[i], parameters.quantiles)
         feature = transformed_feature
     elif parameters.feature_type == identify_types.ENUM:
         possible_values = parameters.possible_values
         mapping = {}
         for i, possible_value in enumerate(possible_values):
             mapping[possible_value] = i
         output_feature = np.zeros((len(feature), len(possible_values)))
         for i, val in enumerate(feature):
             if abs(val - MISSING_VALUE) < 1e-2:
                 continue
             output_feature[i][mapping[val]] = 1.0
         return output_feature
     elif parameters.feature_type == identify_types.CONTINUOUS_ACTION:
         min_value = parameters.min_value
         max_value = parameters.max_value
         feature = ((feature - min_value) *
                    ((1 - 1e-6) * 2 / (max_value - min_value)) - 1 + 1e-6)
     else:
         feature = feature - parameters.mean
         feature /= parameters.stddev
     feature *= is_not_empty
     return feature
Beispiel #48
0
def load_trn_data_newTF(filedir, params):

    # loading data. Files in the archive are 'params' and 'stats'
    data = np.load(filedir)  # samples_dir = results/samples/

    # 7 parameters: Na+ current, CaT current (T-type Calcium, low-threshold), CaS current, A current (transient potassium current), KCa current, Kd current, H current (hyperpolarization current)
    sample_params = data["params"]  # there are 7 parameters in the network
    sample_stats = data[
        "stats"]  # there are 15 summary_stats in 'PrinzStats' (see the params variable above).
    # These 15 stats can be seen in summstats.py. They are: cycle_period, burst_length*3, end_to_start*2, start_to_end*2, duty_cycle*3, phase_gap*2, phase*2

    prior = netio.create_prior(params, log=True)

    lower = np.asarray(prior.lower)
    upper = np.asarray(prior.upper)
    inputscale = lambda x: (x - lower) / (upper - lower)
    bijection = lambda x: logit(inputscale(x)
                                )  # logit function with scaled input

    sample_params = bijection(sample_params)

    # normalize data
    params_mean = np.mean(sample_params, axis=0)
    params_std = np.std(sample_params, axis=0)
    sample_params = (sample_params - params_mean) / params_std

    # extract number of training samples
    sample_params_pilot = sample_params[:params.pilot_samples]
    sample_stats_pilot = sample_stats[:params.pilot_samples]
    sample_params_train = sample_params[params.
                                        pilot_samples:params.pilot_samples +
                                        params.n_train]
    sample_stats_train = sample_stats[params.
                                      pilot_samples:params.pilot_samples +
                                      params.n_train]

    pilot_data = (sample_params_pilot, sample_stats_pilot)
    trn_data = [sample_params_train, sample_stats_train
                ]  # taking log of conductances to get the training data

    return pilot_data, trn_data, params_mean, params_std
Beispiel #49
0
def conwayMaxwellBinomialPriorKernel(com_params, a, b, c, m):
    """
    For calculating the kernel of the conjugate prior of the Conway-Maxwell binomial distribution. 
    Arguments:  com_params, p, nu, the parameters of the Conway-Maxwell binomial distribution
                a, hyperparameter corresponding to the first sufficient stat,
                b, hyperparameter corresponding to the second sufficient stat,
                c, hyperparameter corresponding to the pseudocount
                m, int, the number of bernoulli variables, considered fixed and known
    Returns:    The value of the kernel of the conjugate prior 
    """
    conjugateProprietyTest(a,b,c,m)
    # propriety_dist = norm(0, 1)
    p, nu = com_params
    if (p == 1) | (p == 0):
        return 0
    test_dist = ConwayMaxwellBinomial(p, nu, m)
    natural_params = np.array([logit(p), nu])
    pseudodata_part = np.dot(natural_params, np.array([a,b]))
    partition_part = np.log(test_dist.normaliser) - (nu * getLogFactorial(m)) - (m * np.log(1-p))
    # propriety_part = norm.pdf(logit(p)) * norm.pdf(nu - 1)
    return np.exp(pseudodata_part - c * partition_part)
Beispiel #50
0
 def scores_vs_llrs(self):
     """
     Returns score and llr points, convenient for plotting purposes. A score 
     vector and an llr vector are returned, each with 2*nbins elements, 
     where nbins is the number of bins in this PAV solution. The scores 
     vector alternates the minimum and maximum score in each bin. There is 
     only one llr value associated with each bin, but those values are 
     duplicated, to correspond to the scores. The resulting plot of 
     scores vs llrs is steppy, with exactly horizontal and vertical line 
     segments.
     
     The initial and final llr bins may be -inf and +inf.
     
     """
     p = self.p
     LLRs = np.empty_like(self.scores)
     llr = LLRs[:, 0]
     llr[:] = logit(p)
     llr -= np.log(self.T / self.N)
     LLRs[:, 1] = llr
     return self.scores.ravel(), LLRs.ravel()
Beispiel #51
0
def ensemble_submissions(submission_fnames, weights, mus=None, sigmas=None):
    assert len(submission_fnames) > 0, "Must provide at least one submission to ensemble."
    # Check that we have a weight for each submission
    assert len(submission_fnames) == len(weights), "Number of submissions and weights must match."
    # Get the id column of the submissions
    ids = pd.read_csv(submission_fnames[0])['id'].values
    # Read in all the submission values
    submissions = [pd.read_csv(sub_fname)[LABEL_NAMES].values for sub_fname in submission_fnames]
    # Combine them based on their respective weights
    combined = 0
    for j, sub in enumerate(submissions):
        if np.all((0 <= sub) & (sub <= 1.)):
            logging.info("Applying logit to submission %s" % submission_fnames[j])
            sub = logit(sub)
        if mus is not None and sigmas is not None:
            logging.info("Standardizing with mean %s and std %s" % (mus, sigmas))
            sub = sub - mus[np.newaxis]
            sub = sub / (sigmas[np.newaxis] + 1e-9)
        combined = combined + weights[j][np.newaxis] * sub
    # combined = expit(combined)
    return ids, combined
Beispiel #52
0
def cross_entropy(tar, non, Ptar=0.5, deriv=False):

    baseline = -Ptar * np.log(Ptar) - (1 - Ptar) * np.log(1 - Ptar)
    logitprior = logit(Ptar)
    if not deriv:
        t = np.mean(softplus(-tar - logitprior))
        n = np.mean(softplus(non + logitprior))
        return (Ptar * t + (1 - Ptar) * n) / baseline

    t, back1 = softplus(-tar - logitprior, deriv=True)
    n, back2 = softplus(non + logitprior, deriv=True)
    k1 = Ptar / (len(t) * baseline)
    k2 = (1 - Ptar) / (len(n) * baseline)
    y = k1 * t.sum() + k2 * n.sum()

    def back(dy):
        dtar = back1(-dy * k1)
        dnon = back2(dy * k2)
        return dtar, dnon

    return y, back
Beispiel #53
0
def logit_transform(a, t=5):
    """Apply logit function, setting a max threshold instead of +/- inf
    
    Args:
        a (np.array): array to transform
        t (float): max threshold for +/- inf values
        
    Returns:
        np.array of logit values
    """
    if type(a) is not np.ndarray:
        a = np.array(a)
    y = logit(a)
    # cap inf relative to max and min values - not implemented
    #ub = np.max(y[(y!=inf)&(y!=-inf)])
    #lb = np.min(y[(y!=inf)&(y!=-inf)])

    # replace inf values with threshold
    y[y == inf] = t
    y[y == -inf] = -t
    return y
Beispiel #54
0
def rand_eval(all_coords, model, whole_images, whole_labels, criterion):
    global index, max_index
    if index < max_index - 10:
        index_list = np.random.randint(low=index + 1,
                                       high=max_index,
                                       size=(1, 10))

    eval_loss = 0
    for i in range(10):
        patch, labels = get_one_input(all_coords[index_list[i]],
                                      np.array(FLAGS.fov_size), whole_images,
                                      whole_labels)
        patch = patch.cuda()
        labels = labels.cuda()
        seed = logit(utils.initial_seed(FLAGS.fov_size))
        seed = seed.cuda()
        pred_seed = model(t.cat((patch, seed), 1))
        seed += pred_seed
        eval_loss += criterion(seed, labels)
    eval_loss /= 10
    return eval_loss.data().cpu()
Beispiel #55
0
def interp_loop(snr, tdp, threshold, c):
    """
    function to loop over to peform spline interpolation
    :param snr:
    :param rho:
    :param threshold:
    :param c: fuzzfactor
    """
    # find min value
    min_tdp = np.min(tdp)
    # take log of data to avoid negative values
    tmp_tdp = special.logit(tdp - min_tdp + c)
    # interpolate with spline interpolation
    tck = interpolate.splrep(snr, tmp_tdp)
    # new x and y values
    snr_new = np.linspace(1, 10, 1e3)
    tmp_tdp_new = interpolate.splev(snr_new, tck, der=0)
    # return to linear space
    tdp_new = special.expit(tmp_tdp_new) + min_tdp - c

    return snr_new, tdp_new
Beispiel #56
0
    def _cdf(self, x, mu, sigma):
        r"""
        cumulative probability distribution function

        Parameters
        ----------
        mu : array_like
            mean of the logit of `x`

        sigma : array_like
            standard deviation of the logit of `x`

        Notes
        -----
        """

        sigma = np.atleast_1d(sigma).astype(np.float64)
        mu = np.atleast_1d(mu).astype(np.float64)

        norm = 1.0 / 2.0
        return norm * (1.0 + erf((logit(x) - mu) / (np.sqrt(2.0 * sigma**2))))
def beta_to_m(betas, covgs, k):
    """Transform beta values into m values.

    Inputs -
        betas - pd.Series of beta values
        covgs - pd.Series of covg values
        k     - number of pseudoreads for smoothing
    Returns
        pd.Series of m values
    """
    b = list(betas)
    c = list(covgs)

    s = []
    for i in range(len(c)):
        m = (c[i] * b[i])
        u = (c[i] - m)
        s.append((m + k) / ((m + k) + (u + k)))
    out = logit(s)

    return pd.Series(out)
Beispiel #58
0
def simulate_nuisance_and_easy_treatment(n=1000, p=5, sigma=1.0, adj=0.0):
    """Synthetic data with a difficult nuisance components and an easy treatment effect
        From Setup A in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'

    Args:
        n (int, optional): number of observations
        p (int optional): number of covariates (>=5)
        sigma (float): standard deviation of the error term
        adj (float): adjustment term for the distribution of propensity, e. Higher values shift the distribution to 0.

    Returns:
        (tuple): Synthetically generated samples with the following outputs:

            - y ((n,)-array): outcome variable.
            - X ((n,p)-ndarray): independent variables.
            - w ((n,)-array): treatment flag with value 0 or 1.
            - tau ((n,)-array): individual treatment effect.
            - b ((n,)-array): expected outcome.
            - e ((n,)-array): propensity of receiving treatment.
    """

    X = np.random.uniform(size=n * p).reshape((n, -1))
    b = (
        np.sin(np.pi * X[:, 0] * X[:, 1])
        + 2 * (X[:, 2] - 0.5) ** 2
        + X[:, 3]
        + 0.5 * X[:, 4]
    )
    eta = 0.1
    e = np.maximum(
        np.repeat(eta, n),
        np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1 - eta, n)),
    )
    e = expit(logit(e) - adj)
    tau = (X[:, 0] + X[:, 1]) / 2

    w = np.random.binomial(1, e, size=n)
    y = b + (w - 0.5) * tau + sigma * np.random.normal(size=n)

    return y, X, w, tau, b, e
Beispiel #59
0
def test_logistic_lmm():

    df = pd.read_csv(os.path.join(get_resource_path(), "sample_data.csv"))
    model = Lmer("DV_l ~ IV1+ (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)

    assert model.coefs.shape == (2, 13)
    estimates = np.array([-0.16098421, 0.00296261])
    assert np.allclose(model.coefs["Estimate"], estimates, atol=0.001)

    assert isinstance(model.fixef, pd.core.frame.DataFrame)
    assert model.fixef.shape == (47, 2)

    assert isinstance(model.ranef, pd.core.frame.DataFrame)
    assert model.ranef.shape == (47, 2)

    assert np.allclose(model.coefs.loc[:, "Estimate"],
                       model.fixef.mean(),
                       atol=0.01)

    # Test prediction
    assert np.allclose(model.predict(model.data, use_rfx=True),
                       model.data.fits)
    assert np.allclose(
        model.predict(model.data, use_rfx=True, pred_type="link"),
        logit(model.data.fits),
    )

    # Test RFX only
    model = Lmer("DV_l ~ 0 + (IV1|Group)", data=df, family="binomial")
    model.fit(summarize=False)
    assert model.fixef.shape == (47, 2)

    model = Lmer("DV_l ~ 0 + (IV1|Group) + (1|IV3)",
                 data=df,
                 family="binomial")
    model.fit(summarize=False)
    assert isinstance(model.fixef, list)
    assert model.fixef[0].shape == (47, 2)
    assert model.fixef[1].shape == (3, 2)
Beispiel #60
0
def main_dtclassifier(datastruct, min_samples_leaf=0.1, experiment_id=None):
    print("Starting experiment Decision Trees")
    mlflow.set_experiment("Santander Kaggle")
    df, train_x, train_y, test_x, test_y = datastruct
    metrics = {}

    with mlflow.start_run():
        print("Training model")
        start_timer = time.time()

        # train 200 small models
        models = []
        for var in train_x.columns:
            clf = DecisionTreeClassifier(min_samples_leaf=min_samples_leaf,
                                         random_state=0)
            clf.fit(train_x[var].values.reshape(-1, 1), train_y)
            models.append(clf)

        stop_timer = time.time()
        print("Model trained")

        predictions = [
            m.predict_proba(x.reshape(-1, 1))[:, 1]
            for (m, x) in zip(models, test_x.values.T)
        ]
        pred_y = np.array(predictions).T.mean(axis=1)
        pred_y_logit = logit(np.array(predictions).T).sum(axis=1)

        metrics['roc_auc'] = roc_auc_score(test_y, pred_y)
        metrics['roc_auc_logit'] = roc_auc_score(test_y, pred_y_logit)
        metrics['elapsed_time'] = (stop_timer - start_timer)

        #mlflow logging
        mlflow.log_param('model_type', "200 Decision Trees")
        mlflow.log_param('features', train_x.columns)
        mlflow.log_param('sample_size', df.shape)
        mlflow.log_param('min_samples_leaf', min_samples_leaf)
        mlflow.log_metrics(metrics)

        print("Completed")