def test_forwardS_same_as_old(self):
        self.forS.astep(0.)
        pS0_Test = self.forS.compute_S0_GIVEN_X0(
            self.forS.computeLikelihoodOfS(
                self.myTestPoint['X'],
                logistic.cdf(self.myTestPoint['B_logodds']),
                logistic.cdf(self.myTestPoint['B0_logodds'])))
        pS0_Correct = load(open('pS0_Test_data.pkl', 'rb'))

        pSnt_Test = np.zeros((self.nObs, self.M))
        for n in xrange(self.N):
            n0 = self.zeroIndices[n]
            for t in xrange(self.T[n] - 1):
                pSnt_Test[n0 + t] = self.forS.compute_pSt_GIVEN_St1(
                    n0, t, self.myTestPoint['S'][n0 + t])
        pSnt_Test = pSnt_Test / pSnt_Test.sum(axis=1)[:, np.newaxis]
        pSnt_Correct = load(open('pSnt_Test_data.pkl', 'rb'))
        pSnt_Correct = np.concatenate(
            [pSnt_Correct[i, 0:self.T[i], :] for i in range(self.N)])
        pSnt_Correct = pSnt_Correct / pSnt_Correct.sum(axis=1)[:, np.newaxis]

        #import pdb; pdb.set_trace()
        np.testing.assert_array_almost_equal(pS0_Test,
                                             pS0_Correct,
                                             err_msg="forwardS test off",
                                             decimal=6)
        np.testing.assert_array_almost_equal(pSnt_Test,
                                             pSnt_Correct,
                                             err_msg="forwardS test off",
                                             decimal=6)
Ejemplo n.º 2
0
def sim_t_fixed_data():
    n = 10000
    np.random.seed(1011)
    df = pd.DataFrame()
    df['W1'] = np.random.normal(size=n)
    df['W2'] = np.random.binomial(1, size=n, p=logistic.cdf(df['W1']))
    df['W3'] = np.random.normal(size=n)
    df['A'] = np.random.binomial(1,
                                 size=n,
                                 p=logistic.cdf(-1 + 2 * df['W1']**2))
    df['Ya1'] = np.random.binomial(
        1,
        size=n,
        p=logistic.cdf(-0.5 + 2 * df['W1']**2 + 0.5 * df['W2'] - 0.5 * 1 +
                       1.1 * df['W3']))
    df['Ya0'] = np.random.binomial(
        1,
        size=n,
        p=logistic.cdf(-0.5 + 2 * df['W1']**2 + 0.5 * df['W2'] +
                       1.1 * df['W3']))
    df['Y'] = np.where(df['A'] == 1, df['Ya1'], df['Ya0'])
    df['W1_sq'] = df['W1']**2
    df['t'] = 1
    df['t0'] = 0
    df['id'] = df.index
    return df
Ejemplo n.º 3
0
def train(iterations,confidence):                     #scratch implementation to train the ANN
    no_units=6
    no_features=4
    W1,b1,W2,b2=init_para(no_units,no_features)
    X,y=get_Xy()                                    # X--->train_X ,  y--->test_y
    m=5              #training examples
    for i in range(iterations):
        Z1 = np.dot(W1, X) + b1  # foreprop begins...
        A1 = logistic.cdf(Z1)
        Z2 = np.dot(W2, A1) + b2
        A2 = logistic.cdf(Z2)  # foreprop ends...

        log = np.multiply(np.log(A2), y) + np.multiply((1 - y), np.log(1 - A2))   #cross entropy function
        cost = -np.sum(log) / float(m)  # cost function...

        # dA2 = -(y/A2)+((1-y)/(1-A2))         #back prop begins...
        dZ2 = A2 - y
        dW2 = (np.dot(dZ2, A1.T)) / m
        db2 = np.sum(dZ2, axis=1, keepdims=True)
        # dA1 = dZ2*(W2)
        dZ1 = np.multiply(np.dot(W2.T,dZ2),(1-np.power(A1,2)))
        dW1 = (np.dot(dZ1, X.T)) / m
        db1 = np.sum(dZ1, axis=1, keepdims=True)  # backprop ends...
        print (cost)
        W1 = W1 - confidence * dW1  # update begins...
        b1 = b1 - confidence * db1
        W2 = W2 - confidence * dW2
        b2 = b2 - confidence * db2  # update ends...

    return W1,b1,W2,b2
Ejemplo n.º 4
0
def statin_dgm_truth(network, pr_a, shift=False, restricted=False):
    graph = network.copy()
    data = network_to_df(graph)

    # Running Data Generating Mechanism for A
    if shift:  # If a shift in the Odds distribution is instead specified
        prob = logistic.cdf(-5.3 + 0.2 * data['L'] + 0.15 * (data['A'] - 30) +
                            0.4 * np.where(data['R_1'] == 1, 1, 0) +
                            0.9 * np.where(data['R_2'] == 2, 1, 0) +
                            1.5 * np.where(data['R_3'] == 3, 1, 0))
        odds = probability_to_odds(prob)
        pr_a = odds_to_probability(np.exp(np.log(odds) + pr_a))

    statin = np.random.binomial(n=1, p=pr_a, size=nx.number_of_nodes(graph))
    data['statin'] = statin

    if restricted:  # removing other observations from the restricted set
        attrs = exposure_restrictions(network=network.graph['label'],
                                      exposure='statin')
        exclude = list(attrs.keys())
        data = data.loc[~data.index.isin(exclude)].copy()

    # Running Data Generating Mechanism for Y
    pr_y = logistic.cdf(-5.05 - 0.8 * data['statin'] + 0.37 *
                        (np.sqrt(data['A'] - 39.9)) + 0.75 * data['R'] +
                        0.75 * data['L'])
    cvd = np.random.binomial(n=1, p=pr_y, size=data.shape[0])
    return np.mean(cvd)
Ejemplo n.º 5
0
def sofrygin_observational(graph):
    """Simulates the exposure and outcome according to the mechanisms specified in Sofrygin & van der Laan 2017

    A ~ Bernoulli(expit(-1.2 + 1.5*W + 0.6*map(W)))
    Y ~ Bernoulli(expit(-2.5 + 1.5*W + 0.5*A + 1.5*map(A) + 1.5*map(W)))

    Returns
    -------
    Network object with node attributes
    """
    n = len(graph.nodes())
    w = np.array([d['W'] for n, d in graph.nodes(data=True)])

    # Calculating map(W), generating A, and adding to network
    w_s = exp_map(graph, 'W', measure='sum')
    a = np.random.binomial(n=1, p=logistic.cdf(-1.2 + 1.5*w + 0.6*w_s), size=n)
    for node in graph.nodes():
        graph.node[node]['A'] = a[node]

    # Calculating map(A), generating Y, and adding to network
    a_s = exp_map(graph, 'A', measure='sum')
    y = np.random.binomial(n=1, p=logistic.cdf(-2.5 + 1.5*w + 0.5*a + 1.5*a_s + 1.5*w_s), size=n)
    for node in graph.nodes():
        graph.node[node]['Y'] = y[node]

    return graph
Ejemplo n.º 6
0
def yardage_distribution_table(yds_increment):
    #this creates a 3 dimmensional array that indicates the probability
    #an offense gets a certain amount of yard chunks according to the offensive &
    #defensive playcalls
    global mu_array
    mu = mu_array
    global s_array
    s = s_array
    global turnover_array
    global d_list
    d = d_list
    global zero_index
    x_shape = np.shape(mu)[0]
    y_shape = np.shape(mu)[1]
    d_shape = np.shape(d)[0]
    table1 = np.empty((x_shape, y_shape, d_shape))
    for x in range(np.shape(mu)[0]):
        for y in range(np.shape(mu)[1]):
            for d_1 in range(np.shape(d)[0]):
                table1[x,y,d_1] = (logistic.cdf((yds_increment)*d[d_1]+(yds_increment/2),mu[x,y],s[x,y]) - \
                      logistic.cdf((yds_increment)*d[d_1]-(yds_increment/2),mu[x,y],s[x,y]))
    #the above line rounds yardage gained to the nearest chunk; note
    #we are using a logistic fit for this data
            normalizing_factor = sum(table1[x, y])
            #ensures each table sums to 1 for probabilities sake
            table1[x, y] = (table1[x, y] / normalizing_factor)
    return table1
Ejemplo n.º 7
0
def vwma(vals: pd.Series,
         mean_alpha: float = 0.125,
         verbose: bool = False,
         inverse: bool = False):
    orig_idx = vals.index
    diff_vals = vals / vals.shift(1)
    if verbose:
        print(diff_vals)
        print(len(diff_vals))
    diff_vals.dropna(inplace=True)
    scaler_std = sk_prep.StandardScaler()
    # normal_vol_ewma = vals.ewm(alpha=mean_alpha).std()
    # if verbose:
    #     print(normal_vol_ewma)
    normal_vol_ewma = [
        v[0] for v in scaler_std.fit_transform(diff_vals.values.reshape(-1, 1))
    ]
    if inverse:
        normal_vol_ewma = [1 - logistic.cdf(v) for v in normal_vol_ewma]
    else:
        normal_vol_ewma = [logistic.cdf(v) for v in normal_vol_ewma]

    avg_ewm_factor = mean_alpha / 0.5
    alphas = [v * avg_ewm_factor for v in normal_vol_ewma]
    alphas = [mean_alpha] + alphas
    if verbose:
        print('Length of alphas list: ', len(alphas))
        print('Length of values list: ', len(vals))
    final_data = pd.DataFrame(data=list(zip(vals, alphas)),
                              columns=['vals', 'alpha'],
                              index=orig_idx)
    cume_alphas = None
    last_vwma = None
    for idx, val, alpha in final_data.itertuples():
        if not cume_alphas:
            cume_alphas = mean_alpha
            vwma = val
        else:
            cume_alphas += (alpha * (1 - cume_alphas))
            adj_alpha = alpha / cume_alphas
            vwma = (val * adj_alpha) + (last_vwma * (1 - adj_alpha))
        final_data.at[idx, 'cume_alphas'] = cume_alphas
        final_data.at[idx, 'vwma'] = vwma
        last_vwma = vwma
        # print(val, alpha)

    # print(sum(normal_vol_ewma)/len(normal_vol_ewma))
    if verbose:
        print('==== Head ====')
        print(final_data.head(10))
        print('==== Tail ====')
        print(final_data.tail(10))
        print(len(final_data['vwma']))

    # final_data.set_index(orig_idx)
    return final_data['vwma']
 def test_forwardX_same_as_old(self):
     Psi = self.forX.computePsi(self.myTestPoint['S'],logistic.cdf(self.myTestPoint['B_logodds']))
     LikelihoodOfX = self.forX.computeLikelihoodOfX(self.myTestPoint['X'],self.Z_original,logistic.cdf(self.myTestPoint['L_logodds']))
     beta = self.forX.computeBeta(Psi,LikelihoodOfX)
     pX_Test = self.forX.computePX(beta,logistic.cdf(self.myTestPoint['B0_logodds']),self.myTestPoint['S'],self.myTestPoint['X'],LikelihoodOfX,Psi)
     pX_Test = pX_Test[:,:,0] / (pX_Test[:,:,0]+pX_Test[:,:,1])
     pX_Correct = load(open('pX_Test_data.pkl','rb'))
     pX_Correct = np.concatenate([pX_Correct[i,0:self.T[i],:,:] for i in range(self.N)])
     pX_Correct = pX_Correct[:,:,0] / (pX_Correct[:,:,0]+pX_Correct[:,:,1])
     #import pdb; pdb.set_trace()
     np.testing.assert_array_almost_equal(pX_Test, pX_Correct, err_msg="forwardX likelihood test off",decimal = 6)
Ejemplo n.º 9
0
 def prediction(self, modfv):
     """For each altMod, multiples its feature vector with featureWeights,
     subtracts featureBias, applies sigmoid, returns sum."""
     assert len(modfv.sAltMods) == len(modfv.altModFvs)
     numSoftMods = 0
     for alt in xrange(len(modfv.sAltMods)):
         dot = np.asscalar(np.dot(modfv.altModFvs[alt],
                                  self.featureWeights))
         numSoftMods += logistic.cdf(dot - self.featureBias)
     # 1 - ... because we need to output noncomp confidence.
     return (1. - logistic.cdf(numSoftMods / self.numSoftModsScale -
                               self.numSoftModsBias))
Ejemplo n.º 10
0
 def continuous_data(self):
     n = 10000
     np.random.seed(1011)
     df = pd.DataFrame()
     df['W1'] = np.random.normal(size=n)
     df['W2'] = np.random.binomial(1, size=n, p=logistic.cdf(df['W1']))
     df['W3'] = np.random.normal(size=n)
     df['A'] = np.random.binomial(1, size=n, p=logistic.cdf(-1 + 2 * df['W1'] ** 2))
     df['Y'] = -0.5 + 2*df['W1'] + 0.5*df['W2'] - 0.5*df['A'] + 1.1*df['W3'] + np.random.normal(size=n)
     df['t'] = 1
     df['id'] = df.index
     return df
Ejemplo n.º 11
0
def weight_matrix(x_data, y_data, model, phi, s, alpha):
    """
    Function to calculate Wmap given the input data and corresponsing labels
    
    Parameters:
    x_data      - Independent variables
    y_data      - labels
    model       - Type of model (logistic, poisson, ordinal)
    phi         - Threshold values for ordinal regression and empty list for others
    s and alpha - Control parameter for the spread of the distribution
    
    Returns:
    Wmap for the given data and number of iterations it took to converge
    """
    #y_data = y_data.reshape(-1,1)
    w = np.zeros((x_data.shape[1], 1))
    count = 0
    while True:
        a = x_data.dot(w)

        if model == "logistic":
            yi = logistic.cdf(a)
            d = y_data - yi
            r = yi * (1 - yi)
        elif model == "poisson":
            yi = np.exp(a)
            d = np.subtract(y_data, yi)
            r = yi
        else:
            yi = logistic.cdf(s * (phi - a))
            d = [
                yi[i][y_data[i]] + yi[i][y_data[i] - 1] - 1
                for i in range(len(x_data))
            ]
            d = np.array(d)
            r = [
                s**2 * ((yi[i][y_data[i]] * (1 - yi[i][y_data[i]])) +
                        (yi[i][y_data[i] - 1] * (1 - yi[i][y_data[i] - 1])))
                for i in range(len(x_data))
            ]
            r = np.array(r)

        g = x_data.transpose().dot(d) - (alpha * w)
        r = np.diagflat(r)
        h_inv = inv(-x_data.transpose().dot(r).dot(x_data) -
                    (alpha * np.identity(x_data.shape[1])))
        w_new = w - h_inv.dot(g)
        if np.divide(norm(w_new - w, 2), norm(w, 2)) < 0.001 or count == 100:
            break
        w = w_new
        count += 1
    return w_new, count
Ejemplo n.º 12
0
def naloxone_dgm_truth(network, pr_a, shift=False, restricted=False):
    graph = network.copy()
    data = network_to_df(graph)
    adj_matrix = nx.adjacency_matrix(graph, weight=None)
    data['O_sum'] = fast_exp_map(adj_matrix,
                                 np.array(data['O']),
                                 measure='sum')
    data['O_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['O']),
                                  measure='mean')
    data['G_sum'] = fast_exp_map(adj_matrix,
                                 np.array(data['G']),
                                 measure='sum')
    data['G_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['G']),
                                  measure='mean')

    # Running Data Generating Mechanism for A
    if shift:  # If a shift in the Odds distribution is instead specified
        prob = logistic.cdf(-1.3 - 1.5 * data['P'] +
                            1.5 * data['P'] * data['G'] +
                            0.95 * data['O_mean'] + 0.95 * data['G_mean'])
        odds = probability_to_odds(prob)
        pr_a = odds_to_probability(np.exp(np.log(odds) + pr_a))

    naloxone = np.random.binomial(n=1, p=pr_a, size=nx.number_of_nodes(graph))
    data['naloxone'] = naloxone
    if restricted:  # if we are in the restricted scenarios
        attrs = exposure_restrictions(network=network.graph['label'],
                                      exposure='naloxone')
        data.update(
            pd.DataFrame(list(attrs.values()),
                         index=list(attrs.keys()),
                         columns=['naloxone']))
        exclude = list(attrs.keys())

    # Creating network summary variables
    data['naloxone_sum'] = fast_exp_map(adj_matrix,
                                        np.array(data['naloxone']),
                                        measure='sum')

    # Running Data Generating Mechanism for Y
    pr_y = logistic.cdf(-1.1 - 0.2 * data['naloxone_sum'] + 1.7 * data['P'] -
                        0.9 * data['G'] + 0.75 * data['O_mean'] -
                        0.75 * data['G_mean'])
    overdose = np.random.binomial(n=1, p=pr_y, size=nx.number_of_nodes(graph))
    if restricted:
        data['overdose'] = overdose
        data = data.loc[~data.index.isin(exclude)].copy()
        overdose = np.array(data['overdose'])

    return np.mean(overdose)
Ejemplo n.º 13
0
def naloxone_baseline_dgm(graph, number_of_nodes):
    """Simulates baseline variables for the naloxone & overdose data set

    G ~ Bernoulli(0.325)
    Uc ~ Bernoulli(0.65)
    P ~ Bernoulli(expit(B + B*G + B*sum(G)))
    O ~ Bernoulli(P==1: 0.1,
                  P==0: 0.3)

    Returns
    -------
    pandas DataFrame with the distribution of W
    """
    # Gender
    g = np.random.binomial(
        n=1, p=0.325, size=number_of_nodes
    )  # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4454335/
    for node, value in zip(graph.nodes(), g):
        graph.node[node]['G'] = value

    g_s = exp_map(graph, 'G', measure='mean')

    # Trust in authorities (unobserved variable)
    c = np.random.binomial(n=1, p=0.75, size=number_of_nodes)

    # Recently released from prison
    beta_p = {0: -1.1, 1: 0.5, 2: 0.1}  # Beta parameters
    mp = logistic.cdf(beta_p[0] + beta_p[1] * g + beta_p[2] * g_s)  # model
    p = np.random.binomial(
        n=1, p=mp, size=number_of_nodes)  # Generating values from above

    # Prior overdose
    beta_o = {0: -1.7, 1: 0.1, 2: 0.1, 3: 0.6}  # Beta parameters
    mo = logistic.cdf(beta_o[0] + beta_o[1] * g + beta_o[2] * g_s +
                      beta_o[3] * p)  # model
    o = np.random.binomial(
        n=1, p=mo, size=number_of_nodes)  # Generating values from above

    # Output W distribution data set
    nodes = []
    for nod, d in graph.nodes(data=True):
        nodes.append(nod)

    data = pd.DataFrame()
    data['id'] = nodes
    data['G'] = g
    data['Uc'] = c
    data['P'] = p
    data['O'] = o
    return data
def test_calc_b():
    from scipy.stats import logistic
    from update_params_linear_regression import calc_b

    p_2_new = np.array([1.0, 2.0, 3.0])
    p_3 = np.array([2.0, 1.0, 1.0])
    m_1 = np.array([2.0, 4.0, 3.0])
    v_1 = np.array([1.0, 1.0, 1.0])
    v_s = np.array([3.0, 1.0, 2.0])

    res = calc_b(p_2_new, p_3, m_1, v_1, v_s)
    expected_res = np.array([0.0, 3.5, 2.0 / 3.0]) * logistic.cdf(np.array([3.0, 3.0, 4.0])) + \
                   np.array([3.0, 15.0, 8.0]) * logistic.cdf(np.array([-3.0, -3.0, -4.0]))

    np.testing.assert_array_almost_equal(res, expected_res)
 def run(self, x, y):
     # N: # of Examples, k: # of features
     (N, k) = x.shape
     cumu_false = 0.0
     cumu_false_negative = 0.0
     if self.w is None:
         self.w = csc_matrix(np.zeros((k, 1)))
     # Start PA
     for i in range(N):
         if i % 100 == 0:
             print('step: ', i)
             print('Cumulative Error Rate in this day', cumu_false / (i + 1))
             print('Cumulative False Negative Rate in this day', cumu_false_negative / (i + 1))
         xi = x[i, :].T
         yi = y[i, :][0]
         tmp = (self.w.T).dot(xi)
         prob_of_positive = logistic.cdf(tmp[0, 0])
         predict = 1 if prob_of_positive >= 0.5 else -1
         mistake = 1 if (predict != yi) else 0
         false_negative = 1 if yi == 1 and mistake else 0
         cumu_false += mistake
         cumu_false_negative += false_negative
         # update w
         self.w = self.w + self.gamma * xi * ((yi + 1) / 2 - prob_of_positive)
     return (cumu_false, cumu_false_negative)
def label_generator(problem, X, param, difficulty=1, beta=None, important=None):
        
    if important is None or important > X.shape[-1]:
        important = X.shape[-1]
    dim_latent = sum([important**i for i in range(1, difficulty+1)])
    if beta is None:
        beta = np.random.normal(size=[1, dim_latent])
    important_dims = np.random.choice(X.shape[-1], important, replace=False)
    funct_init = lambda inp: np.sum(beta * generate_features(inp[:,important_dims], difficulty), -1)
    batch_size = max(100, min(len(X), 10000000//dim_latent))
    y_true = np.zeros(len(X))
    while True:
        try:
            for itr in range(int(np.ceil(len(X)/batch_size))):
                y_true[itr * batch_size: (itr+1) * batch_size] = funct_init(
                    X[itr * batch_size: (itr+1) * batch_size])
            break
        except MemoryError:
            batch_size = batch_size//2
    mean, std = np.mean(y_true), np.std(y_true)
    funct = lambda x: (np.sum(beta * generate_features(
        x[:, important_dims], difficulty), -1) - mean) / std
    y_true = (y_true - mean)/std
    if problem is 'classification':
        y_true = logistic.cdf(param * y_true)
        y = (np.random.random(X.shape[0]) < y_true).astype(int)
    elif problem is 'regression':
        y = y_true + param * np.random.normal(size=len(y_true))
    else:
        raise ValueError('Invalid problem specified!')
    return beta, y, y_true, funct
    def getProblems(self):
        problems = {}

        def cycleGen(items, speed):
            i = 0
            while True:
                item = items[i]
                for s in range(speed):
                    yield i, item
                i += 1
                if i >= len(items):
                    i = 0

        getQuestionDifficulty = cycleGen(self.questionDifficulty, 1)
        getQuestionSkill = cycleGen(self.questionSkill, 2)

        for pid in range(1, self.questionCount + 1):

            questionDifficultyGroup, questionDifficulty = next(
                getQuestionDifficulty)
            questionSkillGroup, questionSkill = next(getQuestionSkill)

            problem = {
                'id': pid,
                'title': str(pid),
                'statement': 'none',
                'performance': {},
                'difficulty': np.random.normal(questionDifficulty),
                'difficultyGroup': questionDifficultyGroup,
                'skillGroup': questionSkillGroup
            }
            problems[pid] = problem

        userSkills = np.random.normal(size=(self.userCount,
                                            len(self.questionSkill)))

        for u in range(self.userCount):

            if self.lambdaSolvedLevels:
                groups = self.lambdaSolvedLevels(u)
            else:
                groups = []
                allGroups = list(range(len(self.questionDifficulty)))
                shuffle(allGroups)
                for group in allGroups:
                    groups.append(group)
                    if random() > self.probabilitySolvingNextLevel:
                        break

            for problem in problems.values():

                if problem['difficultyGroup'] not in groups:
                    continue

                a = np.random.random()
                r = logistic.cdf(userSkills[u, problem['skillGroup']] -
                                 problem['difficulty'])
                problem['performance'][u] = 1.0 if a >= r else 0.0

        return problems
    def RF(self, args):  ## Random Forest

        logger.info("Running Random Forest... ")

        if args.predictor.lower() == 'classifier':
            from sklearn.ensemble import RandomForestClassifier as randomforest
            rf = randomforest(  #n_estimators = 5000,
                criterion='entropy', random_state=42)

        elif args.predictor.lower() == 'regressor':
            from sklearn.ensemble import RandomForestRegressor as randomforest
            ## Initialize RandomForest
            rf = randomforest(n_estimators=5000,
                              min_samples_leaf=0.12,
                              criterion='entropy',
                              warm_start=True,
                              max_depth=8)

        rf.fit(self.X_train, self.y_train)

        # Get the predicted values
        self.y_pred = rf.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)
        self.data['boosting_score'] = self.y_pred
        self.model = rf
        return self
Ejemplo n.º 19
0
 def predict_proba(self, X):
     try:
         X = RegressionModel.augment_matrix(X)
         proba_y = logistic.cdf(np.matmul(X, self.w_))
         return proba_y
     except TypeError:
         raise RuntimeError("Unfitted model")
Ejemplo n.º 20
0
def diet_baseline_dgm(graph, number_of_nodes):
    """Simulates baseline variables for the diet & BMI data set

    Returns
    -------
    pandas DataFrame with the distribution of W
    """
    # Gender
    g = np.random.binomial(n=1, p=0.5, size=number_of_nodes)

    # Baseline BMI
    b = np.random.lognormal(3.4, sigma=0.2, size=number_of_nodes)

    # Exercise
    pe = logistic.cdf(
        -0.25)  # logistic.cdf(-0.25 + 0.3*g + -0.0515*b + 0.001*b*b)
    e = np.random.binomial(
        n=1, p=pe, size=number_of_nodes)  # Generating values from above

    # Output W distribution data set
    nodes = []
    for nod, d in graph.nodes(data=True):
        nodes.append(nod)

    data = pd.DataFrame()
    data['id'] = nodes
    data['G'] = g
    data['B'] = b
    data['E'] = e
    return data
Ejemplo n.º 21
0
    def _sigmoid(self, sigmoid_spacing, n_drifts):
        """
        Funkcja, która generuje okresowy sigmoid zgodnie z wymaganiami.
        """

        period = (
            int((self.n_samples) / (n_drifts)) if n_drifts > 0 else int(self.n_samples)
        )
        css = sigmoid_spacing if sigmoid_spacing is not None else 9999
        _probabilities = (
            logistic.cdf(
                np.concatenate(
                    [
                        np.linspace(
                            -css if i % 2 else css, css if i % 2 else -css, period
                        )
                        for i in range(n_drifts)
                    ]
                )
            )
            if n_drifts > 0
            else np.ones(self.n_samples)
        )

        # Szybka naprawa, żeby dało się przepuścić podzielną z resztą liczbę dryfów
        probabilities = np.ones(self.n_chunks * self.chunk_size) * _probabilities[-1]
        probabilities[: _probabilities.shape[0]] = _probabilities

        return (period, probabilities)
Ejemplo n.º 22
0
def sigmoid(x):
    """ Activation function for LSTM gate layers
        x: a vector, to pass through the sigmoid, squashing it to the range [0,1]
    """
    # Could experiment with more activation functions:
    #return np.tanh(x)
    return logistic.cdf(x)
Ejemplo n.º 23
0
    def cal_single_user_error(self, r):
        err = 0.0
        row = self.matrix[r]

        poshidprobs = np.zeros(self.numhids)
        poshidprobs += self.hidbiaises
        for i in range(len(row)):
            poshidprobs += self.Wijk[row[i][1] - 1][row[i][0]]
        poshidprobs = logistic.cdf(poshidprobs)

        #end of positive phase
        poshidstates = poshidprobs > np.random.rand(self.numhids)
        #print 'poshidstates', np.mean(poshidstates)

        for en in self.matrix1[r]:
            item = en[0]
            pred = en[1]
            negdata = np.zeros(self.five)
            for tmp in range(self.five):
                negdata[tmp] = np.sum(self.Wijk[tmp][item] * poshidstates)
                negdata[tmp] += self.visbiaises[tmp][item]
            negdata = np.exp(negdata)
            sum1 = np.sum(negdata)
            negdata /= sum1
            tmp = np.zeros(self.five)
            for i in range(self.five):
                tmp[i] = i + 1
            score = np.sum(negdata * tmp)
            err += abs(pred - score)
            return err
Ejemplo n.º 24
0
def diet_dgm(network, restricted=False):
    """
    Parameters
    ----------
    network:
        input network
    restricted:
        whether to use the restricted treatment assignment
    """
    graph = network.copy()
    data = network_to_df(graph)

    adj_matrix = nx.adjacency_matrix(graph, weight=None)
    data['G_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['G']),
                                  measure='mean')
    data['E_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['E']),
                                  measure='mean')
    data['E_sum'] = fast_exp_map(adj_matrix,
                                 np.array(data['E']),
                                 measure='sum')
    data['B_mean_dist'] = fast_exp_map(adj_matrix,
                                       np.array(data['B']),
                                       measure='mean_dist')
    data['B_mean'] = fast_exp_map(adj_matrix,
                                  np.array(data['B']),
                                  measure='mean')

    # Running Data Generating Mechanism for A
    pr_a = logistic.cdf(-0.5 + 0.05 * (data['B'] - 30) +
                        0.25 * data['G'] * data['E'] + 0.05 * data['E_mean'])
    diet = np.random.binomial(n=1, p=pr_a, size=nx.number_of_nodes(graph))
    data['diet'] = diet
    if restricted:  # if we are in the restricted scenarios
        attrs = exposure_restrictions(network=network.graph['label'],
                                      exposure='diet')
        data.update(
            pd.DataFrame(list(attrs.values()),
                         index=list(attrs.keys()),
                         columns=['diet']))

    data['diet_sum'] = fast_exp_map(adj_matrix,
                                    np.array(data['diet']),
                                    measure='sum')
    data['diet_t3'] = np.where(data['diet_sum'] > 3, 1, 0)

    # Running Data Generating Mechanism for Y
    bmi = (3 + data['B'] - 5 * data['diet'] - 5 * data['diet_t3'] +
           3 * data['G'] - 3 * data['E'] - 0.5 * data['E_sum'] +
           data['B_mean_dist'] +
           np.random.normal(0, scale=1, size=nx.number_of_nodes(graph)))
    data['bmi'] = bmi

    # Adding node information back to graph
    for n in graph.nodes():
        graph.nodes[n]['diet'] = int(data.loc[data.index == n, 'diet'].values)
        graph.nodes[n]['bmi'] = float(data.loc[data.index == n, 'bmi'].values)

    return graph
Ejemplo n.º 25
0
    def weightWithDropslop(self, weighted, scale):
        'weight the adjacency matrix with the sudden drop of ts for each col'
        if weighted:
            colWeights = np.multiply(self.tspim.dropslops,
                                     self.tspim.dropfalls)
        else:
            colWeights = self.tspim.dropslops
        if scale == 'logistic':
            from scipy.stats import logistic
            from sklearn import preprocessing
            'zero mean scale'
            colWeights = preprocessing.scale(colWeights)
            colWeights = logistic.cdf(colWeights)
        elif scale == 'linear':
            from sklearn import preprocessing
            #add a base of suspecious for each edge
            colWeights = preprocessing.minmax_scale(colWeights) + 1
        elif scale == 'plusone':
            colWeights += 1
        elif scale == 'log1p':
            colWeights = np.log1p(colWeights) + 1
        else:
            print '[Warning] no scale for the prior weight'

        n = self.nV
        colDiag = lil_matrix((n, n))
        colDiag.setdiag(colWeights)
        self.graphr = self.graphr * colDiag.tocsr()
        self.graph = self.graphr.tocoo(copy=False)
        self.graphc = self.graph.tocsc(copy=False)
        print "finished computing weight matrix"
Ejemplo n.º 26
0
    def RF(self, args):  ## Random Forest

        logger.info("Running Random Forest... ")

        if args.predictor.lower() == 'classifier':
            from sklearn.ensemble import RandomForestClassifier as randomforest

            rf = randomforest(criterion='entropy',
                              class_weight='balanced',
                              random_state=42)

        elif args.predictor.lower() == 'regressor':
            from sklearn.ensemble import RandomForestRegressor as randomforest
            ## Initialize RandomForest
            rf = randomforest(n_estimators=20000,
                              max_depth=4,
                              random_state=42,
                              max_samples=0.6,
                              n_jobs=-1)

        rf.fit(self.X_train, self.y_train)

        # Get the predicted values
        self.y_pred = rf.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)
        self.data['boosting_score'] = self.y_pred
        self.model = rf
        return self
    def run(self, x, y, U):
        # N: # of Examples, k: # of features
        (N, k) = x.shape
        (tmp, kNew) = U.shape
        UT = U.T

        if self.w is None:
            self.w = csc_matrix(np.zeros((kNew, 1)))
        # Start PA
        for i in range(N):
            if i % 100 == 0:
                print ('step: ', i)
                print ('Cumulative Error Rate', self.cumu_false / self.cumu_data)
                print ('Cumulative False Negative Rate', self.cumu_false_negative / self.cumu_data)
            xi = x[i, :].T
            yi = y[i, :][0]

            xiNew = UT.dot(xi)
            tmp = (self.w.T).dot(xiNew)
            prob_of_positive = logistic.cdf(tmp[0, 0])
            
            predict = 1 if prob_of_positive >= 0.5 else -1
            mistake = 1 if (predict != yi) else 0
            false_negative = 1 if yi == 1 and mistake else 0
            self.cumu_false += mistake
            self.cumu_false_negative += false_negative
            self.cumu_data += 1
            # update w
            self.w = self.w + self.gamma * xiNew * ((yi + 1) / 2 - prob_of_positive)

        return (self.cumu_false, self.cumu_false_negative, self.w)
Ejemplo n.º 28
0
def random_data(N=5000, K=3, unobservables=False, **kwargs):

	"""
	Function that generates data according to one of two simple models that
	satisfies the Unconfoundedness assumption.

	The covariates and error terms are generated according to
		X ~ N(mu, Sigma), epsilon ~ N(0, Gamma).
	The counterfactual outcomes are generated by
		Y0 = X*beta + epsilon_0,
		Y1 = delta + X*(beta+theta) + epsilon_1,
	Selection is done according to the following propensity score function:
		P(D=1|X) = Lambda(X*beta),
	where Lambda is the standard logistic CDF.

	Expected args
	-------------
		N: integer
			Number of units to draw. Defaults to 5000.
		K: integer
			Number of covariates. Defaults to 3.
		unobservables: Boolean
			Returns potential outcomes and true propensity score
			in addition to observed outcome and covariates if True.
			Defaults to False.
		mu, Sigma, Gamma, beta, delta, theta: NumPy ndarrays
			Parameter values appearing in data generating process.

	Returns
	-------
		Tuple (Y, D, X) or (Y, D, X, Y0, Y1), where
			Y: N-dimensional array of observed outcomes
			D: N-dimensional array of treatment indicator,
			   with 1=treated, 0=control
			X: N-by-K matrix of covariates
			Y0: N-dimensional array of non-treated outcomes
			Y1: N-dimensional array of treated outcomes
	"""

	mu = kwargs.get('mu', np.zeros(K))
	beta = kwargs.get('beta', np.ones(K))
	theta = kwargs.get('theta', np.ones(K))
	delta = kwargs.get('delta', 3)
	Sigma = kwargs.get('Sigma', np.identity(K))
	Gamma = kwargs.get('Gamma', np.identity(2))

	X = np.random.multivariate_normal(mean=mu, cov=Sigma, size=N)
	Xbeta = X.dot(beta)
	pscore = logistic.cdf(Xbeta)
	D = np.array([np.random.binomial(1, p, size=1) for p in pscore]).flatten()

	epsilon = np.random.multivariate_normal(mean=np.zeros(2), cov=Gamma, size=N)
	Y0 = Xbeta + epsilon[:,0]
	Y1 = delta + X.dot(beta+theta) + epsilon[:,1]
	Y = (1-D)*Y0 + D*Y1

	if unobservables:
		return Y, D, X, Y0, Y1, pscore
	else:
		return Y, D, X
Ejemplo n.º 29
0
def vaccine_baseline_dgm(graph, number_of_nodes):
    """Simulates baseline variables for the vaccine & infection data set

    A ~ Bernoulli(0.12)
    H ~ Bernoulli(0.65)

    Returns
    -------
    pandas DataFrame with the distribution of W
    """
    data = pd.DataFrame()
    nodes = []
    for nod, d in graph.nodes(data=True):
        nodes.append(nod)

    data['id'] = nodes

    # Asthma
    a = np.random.binomial(n=1, p=0.15, size=number_of_nodes)
    data['A'] = a

    # Hand hygiene
    d = np.random.binomial(n=1,
                           p=logistic.cdf(-0.15 + 0.1 * a),
                           size=number_of_nodes)
    data['H'] = d

    # Output W distribution data set
    return data
Ejemplo n.º 30
0
def get_data():
    np.random.seed(0)
    beta0 = 2
    beta = np.array([1] * 10 + [-1] * 10 + [0] * 80)[None, :]
    X = np.random.uniform(0, 1, p * n).reshape((p, n))
    f_true = logistic.cdf(beta0 + beta @ X)[0]
    return X, f_true
Ejemplo n.º 31
0
def random_data(N=5000, K=3, unobservables=False, **kwargs):
    """
	Function that generates data according to one of two simple models that
	satisfies the unconfoundedness assumption.

	The covariates and error terms are generated according to
		X ~ N(mu, Sigma), epsilon ~ N(0, Gamma).

	The counterfactual outcomes are generated by
		Y0 = X*beta + epsilon_0,
		Y1 = delta + X*(beta+theta) + epsilon_1.

	Selection is done according to the following propensity score function:
		P(D=1|X) = Lambda(X*beta).

	Here Lambda is the standard logistic CDF.

	Parameters
	----------
	N: int
		Number of units to draw. Defaults to 5000.
	K: int
		Number of covariates. Defaults to 3.
	unobservables: bool
		Returns potential outcomes and true propensity score
		in addition to observed outcome and covariates if True.
		Defaults to False.
	mu, Sigma, Gamma, beta, delta, theta: NumPy ndarrays, optional
		Parameter values appearing in data generating process.

	Returns
	-------
	tuple
		A tuple in the form of (Y, D, X) or (Y, D, X, Y0, Y1) of
		observed outcomes, treatment indicators, covariate matrix,
		and potential outomces.
	"""
    mu = kwargs.get('mu', np.zeros(K))
    beta = kwargs.get('beta', np.ones(K))
    theta = kwargs.get('theta', np.ones(K))
    delta = kwargs.get('delta', 3)
    Sigma = kwargs.get('Sigma', np.identity(K))
    Gamma = kwargs.get('Gamma', np.identity(2))

    X = np.random.multivariate_normal(mean=mu, cov=Sigma, size=N)
    Xbeta = X.dot(beta)
    pscore = logistic.cdf(Xbeta)
    D = np.array([np.random.binomial(1, p, size=1) for p in pscore]).flatten()

    epsilon = np.random.multivariate_normal(mean=np.zeros(2),
                                            cov=Gamma,
                                            size=N)
    Y0 = Xbeta + epsilon[:, 0]
    Y1 = delta + X.dot(beta + theta) + epsilon[:, 1]
    Y = (1 - D) * Y0 + D * Y1

    if unobservables:
        return Y, D, X, Y0, Y1, pscore
    else:
        return Y, D, X
Ejemplo n.º 32
0
def plot_logistic_fit(models, data, CV_info,num_columns = 2):
    num_cv = CV_info.n_folds
    num_rows = int(np.ceil(float(num_cv)/float(num_columns)))
    fig_temp = plot.subplots(nrows=num_rows, ncols=num_columns)
    fig = fig_temp[0]
    fig.tight_layout()
    axes = fig_temp[1]
    cv = 0
    #
    for train,test in CV_info:
        row_n = int(np.ceil(cv/num_columns))
        col_n = int(np.mod(float(cv),float(num_columns)))
        axes[row_n,col_n].set_title('CV fold %i' % (cv+1))
        intercept = models[cv].intercept_
        parameters = np.squeeze(np.asarray(models[cv].coef_))
#------------------------------------------------------------------------------ 
        # For plotting data along collapsed dimension
        collapsed_x_data = intercept + np.dot(parameters,data[test].transpose())
        y_data = models[cv].predict(data[test])
        y_data = np.asarray(y_data)
        axes[row_n,col_n].scatter(collapsed_x_data,y_data)
#------------------------------------------------------------------------------ 
        # For plotting function
        x_func = np.linspace(np.min(collapsed_x_data),np.max(collapsed_x_data),100)
        y_func = logistic.cdf(x_func)
        axes[row_n,col_n].plot(x_func,y_func)
#------------------------------------------------------------------------------ 
        cv += 1
#------------------------------------------------------------------------------ 
    plot.show()
Ejemplo n.º 33
0
    def SGBoost(self, args): ## Stochastic gradient Boosting

        logger.info("Running Stochastic Gradient Boosting ... ")
        
        if args.predictor.lower() == 'classifier': 
            from sklearn.ensemble import GradientBoostingClassifier as sgbt
        elif args.predictor.lower() == 'regressor':
            from sklearn.ensemble import GradientBoostingRegressor as sgbt
        
        ## Initialize model
        sgbt = sgbt(max_depth=6, 
                    subsample= 0.6,
                    n_estimators = 5000)
        

        ## Fit regressor to the training set
        sgbt.fit(self.X_train, self.y_train)
    
        ## Predict the labels
        self.y_pred = sgbt.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)
        
        self.data['boosting_score'] = self.y_pred
        self.model = sgbt

        return self
Ejemplo n.º 34
0
    def fit(self, X, y, beta=0., max_iter=10000, eps=10e-6):
        X, num_features = self.init_params_(X)

        n_iter = 0
        conv_criterion = True

        l_w = LogisticRegression.log_likelihood(
            self.eta_, X, y) + beta * np.inner(self.w_, self.w_) / 2

        while conv_criterion and n_iter < max_iter:
            nabla_l_w = LogisticRegression.grad_log_likelihood(
                self.eta_, X, y) + beta * self.w_
            Hl_w = LogisticRegression.hess_log_likelihood(
                self.eta_, X) + beta * np.eye(num_features + 1)
            inv_Hl_w = np.linalg.inv(Hl_w)
            self.w_ = self.w_ - np.matmul(inv_Hl_w, nabla_l_w)

            n_iter += 1
            conv_criterion = l_w
            self.eta_ = logistic.cdf(np.matmul(X, self.w_))
            l_w = LogisticRegression.log_likelihood(
                self.eta_, X, y) + beta * np.inner(self.w_, self.w_) / 2
            det_H = np.linalg.det(Hl_w)
            conv_criterion = (np.abs(conv_criterion - l_w) >
                              eps) and (np.abs(det_H) > eps)
Ejemplo n.º 35
0
def sigmoid(x):
    """ Activation function for LSTM gate layers
        x: a vector, to pass through the sigmoid, squashing it to the range [0,1]
    """
    # Could experiment with more activation functions: 
    #return np.tanh(x)
    return logistic.cdf(x) 
Ejemplo n.º 36
0
    def precompute_single_recommendations(self, user, N):

        user_ratings_so_far = self.matrix[user]
        #positive phase
        poshidprobs = np.zeros(self.numhids)
        poshidprobs += self.hidbiaises

        for i in range(len(user_ratings_so_far)):
            poshidprobs += self.Wijk[user_ratings_so_far[i][1] -
                                     1][user_ratings_so_far[i][0]]
        poshidprobs = logistic.cdf(poshidprobs)
        poshidstates = poshidprobs > np.random.rand(self.numhids)
        print np.mean(poshidstates)

        #start negative phase
        negdata = np.zeros([self.five, self.numdims])
        for tmp in range(self.five):
            negdata[tmp] = np.dot(self.Wijk[tmp], poshidstates)
            negdata[tmp] += self.visbiaises[tmp]
        negdata = np.exp(negdata)
        sum1 = np.sum(negdata, axis=0)
        negdata /= sum1
        tmp = np.zeros([self.five, self.numdims])
        for i in range(self.five):
            tmp[i] = i + 1
        score = np.sum(negdata * tmp, axis=0)
        self.score[user] = score
        print np.mean(score)
        for en in user_ratings_so_far:
            score[en[0]] = -1

        return list(np.argsort(score)[-N:])
    def XGBoost(self, args):  ## Gradient Boosting

        logger.info("Running Gradient Boosting ... ")

        if args.predictor.lower() == 'classifier':
            from xgboost import XGBClassifier as xgb
        elif args.predictor.lower() == 'regressor':
            from xgboost import XGBRegressor as xgb

        xg_regression_model = xgb(objective='binary:logistic',
                                  n_estimator=20000,
                                  colsample_bytree=0.6,
                                  max_depth=6)

        ## Fit the regressor to the training set
        xg_regression_model.fit(self.X_train, self.y_train)

        ## Predict the labels
        self.y_pred = xg_regression_model.predict(self.X_data)
        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)
        self.data['boosting_score'] = self.y_pred
        self.model = xg_regression_model

        return self
Ejemplo n.º 38
0
def diff_to_prob(
                differentials,
                sigmoid_mean = 0,
                sigmoid_slope = .19
                ):
    # Ted's slope is 1/scale
    prob = logistic.cdf(differentials,loc=sigmoid_mean,scale=1/sigmoid_slope)
    return prob
Ejemplo n.º 39
0
 def test(self, data):
     (rows, columns) = data.shape
     data = numpy.insert(data, 0, numpy.ones(rows), axis=1)
     (rows, columns) = data.shape
     y = logistic.cdf(numpy.dot(data,self.w))
     pred = numpy.around(y)
     
     return (y, pred)
     
Ejemplo n.º 40
0
Archivo: lr.py Proyecto: ay27/mpips
 def train(self):
     self.theta = np.random.rand(self.Ndim)
     if self.wk_rank == 0:
         self.push_vector("theta", self.theta)
     self.sync()
     for i in range(self.iter_num):
         # print('iter: %d' % i)
         self.local_theta = np.zeros(self.Ndim)
         for x, y in self.train_set:
             coef = self.learning_rate * (y - logistic.cdf(np.inner(x, self.theta)))
             self.local_theta += (coef * x)
         self.push_vector("theta", self.local_theta)
         self.sync()
         self.theta = self.pull_vector("theta")
     if self.wk_rank == 0:
         print(self.theta)
Ejemplo n.º 41
0
def forward(x0, w):
    """ NN forward propagation algorithm.

    Inputs (x0, w):
        x0: Network input values
        w: Tuple of weights, one for each layer
    
    Outputs (zL, xl_list):
        zL: Output neuron discriminative function (NOT passed through sigmoid)
        xl_list: list of activation values for each layer (YES passed through sigmoids)
    """
    xl = x0
    xl_list = [xl]
    for wl in w:
        zl = np.dot(wl, xl)
        xl = logistic.cdf(zl)
        xl_list.append(xl)
    return np.asscalar(zl), xl_list
Ejemplo n.º 42
0
def get_hull(x, y, n_bins):
    bins = np.linspace(-5., 5., n_bins)
    bins = logistic.cdf(bins) * (2) - 1
    bins = np.linspace(x.min(), x.max(), n_bins)
    down_hull = np.zeros((n_bins-1, ))
    up_hull = np.zeros((n_bins-1, ))
    x_hull = np.zeros((n_bins-1, ))
    for i in range(n_bins-1):
        down, up = bins[i], bins[i+1]
        bin_ids = (x >= down) & (x < up)
        down_id = y[bin_ids].argmin()
        up_id = y[bin_ids].argmax()
        down_hull[i] = y[bin_ids][down_id]
        up_hull[i] = y[bin_ids][up_id]
        x_hull[i] = x[bin_ids][down_id]

    x_hull = bins[:-1] + (bins[1] - bins[0])/2
    return x_hull, down_hull, up_hull
def calc_log_evidence(m, v_2, sigma_0, X, y, m_1, v_1, m_2, v, p, p_3, p_2, v_s, bias):
    ## TODO: Calculate properly with the bias!!

    sigma_0_inv = 1. / sigma_0
    V_2 = np.diag(v_2)
    v_2_inv = 1. / v_2
    V_2_inv = np.diag(v_2_inv)
    v_1_inv = 1. / v_1
    v_inv = 1. / v
    m_1_s_v_1 = np.multiply(m_1 ** 2, v_1_inv)
    m_2_s_v_2 = np.multiply(m_2 ** 2, v_2_inv)
    m_s_v = np.multiply(m ** 2, v_inv)

    n, d = X.shape
    try:
        alpha = scipy.linalg.det(np.identity(d) + sigma_0_inv * np.dot(V_2, np.dot(X.T, X)))
    except ValueError:
        import ipdb;
        ipdb.set_trace()

    cdf_p3 = log.cdf(p_3)
    cdf_m_p3 = log.cdf(-p_3)
    cdf_p2 = log.cdf(p_2)
    cdf_m_p2 = log.cdf(-p_2)
    # import ipdb; ipdb.set_trace()
    c = cdf_p3 * norm.pdf(0, m_1[:d - bias], np.sqrt(v_1 + v_s)[: d - bias]) + \
        cdf_m_p3 * norm.pdf(0, m_1[: d - bias], np.sqrt(v_1)[: d - bias])
    c[np.where(c == 0)[0]] = 0.0000000001

    log_s1 = 0.5 * (np.dot(m.T, np.dot(V_2_inv, m_2) + sigma_0_inv * np.dot(X.T, y)) -
                    n * np.log(2 * np.pi * sigma_0) - sigma_0_inv * np.dot(y.T, y) -
                    np.dot(m_2.T, np.dot(V_2_inv, m_2)) - np.log(alpha) +
                    np.sum(np.log(1. + np.multiply(v_2, v_1_inv)) + m_1_s_v_1 + m_2_s_v_2 - m_s_v))
    # import ipdb; ipdb.set_trace()
    log_s2 = 0.5 * np.sum(2. * np.log(c) + np.log(1. + np.multiply(v_1, v_2_inv)[: d - bias]) +
                          m_1_s_v_1[: d - bias] + m_2_s_v_2[: d - bias] - m_s_v[: d - bias] +
                          2. * np.log(log.cdf(p) * cdf_m_p3 + log.cdf(-p) * cdf_p3)
                          - 2. * np.log(cdf_m_p3 * cdf_p3))

    res = log_s1 + log_s2 + 0.5 * d * np.log(2. * np.pi) + \
          0.5 * np.sum(np.log(v) + m_s_v - m_1_s_v_1 - m_2_s_v_2) + \
          np.sum(np.log(np.multiply(cdf_p2, cdf_p3) + np.multiply(cdf_m_p2, cdf_m_p3)))

    if np.isinf(res) or np.isnan(res):
        import ipdb;
        ipdb.set_trace()
    return res
Ejemplo n.º 44
0
 def train(self, data, target, lamda, iterations, tolerance, learning_rate):
     (rows, columns) = data.shape
     data = numpy.insert(data, 0, numpy.ones(rows), axis=1)
     (rows, columns) = data.shape
     
     self.w = numpy.zeros(columns)
     
     ew_old = -numpy.inf
     for i in range(iterations):
         yx = numpy.dot(data, self.w)
         s = logistic.cdf(yx)
         
         ew = (target*numpy.log(s) + (1-target)*numpy.log(1-s)).sum() - (0.5*lamda)*(numpy.matmul(numpy.transpose(self.w), self.w))
         print('Iteration: {}, Cost function: {}'.format(i, ew));
         
         if abs(ew - ew_old) < tolerance:
             break;
         
         gradient = numpy.matmul(numpy.transpose(data), target-s) - lamda*self.w
         
         self.w = self.w + learning_rate*gradient
         ew_old = ew
def calc_b(p_2_new, p_3, m_1, v_1, v_s):
    tmp_1 = logistic.cdf(p_2_new + p_3) * (m_1 ** 2 - v_1 - v_s) / (v_1 + v_s) ** 2
    tmp_2 = logistic.cdf(- p_2_new - p_3) * (m_1 ** 2 * v_1 ** -2 - 1.0 / v_1)

    return tmp_1 + tmp_2
Ejemplo n.º 46
0
def sigmoid(value):
    return logistic.cdf(value)
    def test_forwardS_same_as_old(self):
        self.forS.astep(0.)
        pS0_Test = self.forS.compute_S0_GIVEN_X0(self.forS.computeLikelihoodOfS(self.myTestPoint['X'],logistic.cdf(self.myTestPoint['B_logodds']),logistic.cdf(self.myTestPoint['B0_logodds'])))
        pS0_Correct = load(open('pS0_Test_data.pkl', 'rb'))

        pSnt_Test = np.zeros((self.nObs,self.M))
        for n in xrange(self.N):
            n0 = self.zeroIndices[n]
            for t in xrange(self.T[n]-1):
                pSnt_Test[n0+t] = self.forS.compute_pSt_GIVEN_St1(n0,t,self.myTestPoint['S'][n0+t])
        pSnt_Test = pSnt_Test / pSnt_Test.sum(axis=1)[:,np.newaxis]
        pSnt_Correct = load(open('pSnt_Test_data.pkl', 'rb'))
        pSnt_Correct = np.concatenate([pSnt_Correct[i,0:self.T[i],:] for i in range(self.N)])
        pSnt_Correct = pSnt_Correct / pSnt_Correct.sum(axis=1)[:,np.newaxis]

        #import pdb; pdb.set_trace()
        np.testing.assert_array_almost_equal(pS0_Test, pS0_Correct, err_msg="forwardS test off",decimal = 6)
        np.testing.assert_array_almost_equal(pSnt_Test, pSnt_Correct, err_msg="forwardS test off",decimal = 6)
def sigmoid(z):
    
    """# Notice that z can be a scalar, a vector or a matrix
    # return the sigmoid of input z"""
    
    return  logistic.cdf(z)#your code here
Ejemplo n.º 49
0
    def __init__(self, num_features, num_output=1, hidden_layer=None, 
                 activation=("expit", 1), learn_rate=1, default_bias="random", 
                 max_epochs=10, scale=1, verbose=False, temperature=1,
                 online=True):
        """Constructor for the NeuralNet class.

        num_features:   The number of features that each sample has. This will
                        equal the number of neurons in the input layer.
        num_output:     The number of output labels each sample has. This will
                        equal the number of neurons in the output layer.
        hidden_layer:   A list containing the number of nodes in the (i+1)th 
                        hidden layer (for i starting at 0). If set to None (default
                        value), then the neural network will have one hidden
                        layer with num_features * 1.5 hidden nodes.
        activation:     The default activation function to use in each neuron.
                        Default is the inverse logistic function with 
                        temperature = 1:
                        s(x) = 1/(1 + e^(-x)). 
                        This uses scipy for optimization purposes.
        learn_rate:     The learning rate applied to the training process. Default
                        value is 1.
        default_bias:   The default weight assigned to the weight vector. Default
                        value is random, uniformly between (-scale, scale).
        max_epochs:     The max number of iterations on the training set. Default
                        value is 10.
        scale:          Determines the range of random values for the initial
                        weights of the model. The value of the weights will range
                        from (-scale, scale). For example, if scale=2, then the
                        initial weights can range from (-scale, scale). Default
                        value is 1.
        verbose:        Used to see how fast the neural network is being trained.
                        Indicates when an epoch has finished.
        online:         Indicates that the weights should be updated
                        for every training example.
        """

        self.verbose = verbose
        self.num_features = num_features
        self.num_output = num_output
        self.hidden_layer = hidden_layer

        self.learn_rate = learn_rate
        self.default_bias = default_bias

        self.max_epochs = max_epochs
        self.scale = scale

        self.online = online

        # NOTE: There is no proven evidence that the ideal number of
        # nodes in the hidden layer is 1.5, but it is suggested. Citation needed.
        if self.hidden_layer is None:
            num_nodes = int(math.floor(num_features * 1.5))
            self.hidden_layer = [num_nodes]

        # TODO: Address this.
        if len(self.hidden_layer) > 1:
            raise NotImplementedError(one_line("""Neural network containing 
            more than one hidden layer has not been implemented yet."""))

        # TODO: Address this.
        if self.num_output != 1:
            raise NotImplementedError(one_line("""Neural network containing more 
            than one output label has not been implemented yet."""))

        # Assign activation function here, depending on the argument.
        if activation[0] == "expit":
            temp = activation[1]

            # This is just an optimization using scipy
            if temp == 1:
                self.default_act = expit
                self.default_deriv = np.vectorize(lambda x: x * (1 - x))
            else:
                self.default_act = lambda x: logistic.cdf(x, scale=temp)
                self.default_deriv = np.vectorize(lambda x: temp * x * (1 - x))

#        elif activation[0] == "tanh":
#            if activation[1] != 1: print(one_line("""Warning: other temperatures
#            for the tanh activation function have not been implemented."""))
#            self.default_act = lambda x: 2 * expit(x) - 1
#            self.default_deriv = np.vectorize(lambda x: 2 * x * (1 - x))

        else:
            raise NotImplementedError(one_line("""Activation function not 
            supported yet: {0}""".format(activation)))

        self.init_weights()
Ejemplo n.º 50
0
import os
from scipy.stats import logistic

flag = "prod"
conf = Config(flag, "prod" , 300)

model = np.load(conf.path_model_npy + ".npy")
word_embed = model[0]
prod_embed = model[1]
transfer_w = model[2]
transfer_b = model[3]

dp = DataProvider(conf)

weight = np.dot(word_embed, transfer_w)
weight = logistic.cdf(np.add(weight, transfer_b))

for topic_id in range(conf.dim_item):
    word_ids = weight[:,topic_id]
    word_ids = np.argsort(word_ids)[::-1][:50]
    words = [(dp.idx2word[word_id], weight[word_id, topic_id]) for word_id in word_ids if weight[word_id, topic_id] > .9]
    print("Topic", topic_id)
    print(words)
    print("=========================\n")

print("finish")




Ejemplo n.º 51
0
def sigmoid(x):
    return logistic.cdf(x)
Ejemplo n.º 52
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import logistic
from sat_exp import saturating_exp

F = lambda x, (a,b,l): 0.5 + (1-0.5-l)*logistic.cdf(x, loc=a, scale=b)
Finv = lambda thresh_val, (a,b,l): logistic.ppf((thresh_val - 0.5)/(1-0.5-l), loc=a, scale=b)

def color_list(n, cmap=None):
    cm = plt.get_cmap("RdYlGn" if cmap is None else cmap)
    colors = [cm(i) for i in np.linspace(0, 1, n)]
    return colors*(n/len(colors)) + colors[:n%len(colors)]

def plot_pmf(cohs, pcor, res):
    cmap = color_list(len(res)+1, 'cool')
    xs = np.array(cohs)
    xsf = np.linspace(min(xs), max(xs), 50)
    for i, (theta, thresh) in enumerate(res):
        plt.scatter(cohs, pcor[i, :]/100.0, color=cmap[i])
        plt.plot(xsf, F(xsf, theta), color=cmap[i], linestyle='-')
    plt.xlim([0.01, None])
    plt.ylim([0.45, 1.05])
    plt.xscale('log')
    plt.xlabel('signal strength')
    plt.ylabel('accuracy')
    plt.show()

def plot_pmf_thresh(reses):
    cmap = color_list(len(reses)+1, 'Greens')
    for i, res in enumerate(reses):
        durs = np.arange(1, len(res)+1)
Ejemplo n.º 53
0
filename = "skeletons & matching cropped pics/cropped pics/v018-penn.9-1uB2D2-cropm.png"

img = io.imread(filename)
img = (img - np.mean(img)) / np.std(img)


sigma = .75

Hxx, Hxy, Hyy = hessian_matrix(img, sigma=sigma, mode="wrap")


e1, e2 = hessian_matrix_eigvals(Hxx, Hxy, Hyy)

# How much bigger is the first eigenvalue's magnitude
# compared with the second?

log_condition = np.log(abs(e1/e2))
log_condition = log_condition / np.std(log_condition)

out = logistic.cdf(log_condition)

markers = np.zeros_like(out)
markers[out < 0] = 1
markers[out > np.percentile(out, 90)] = 2

plt.imshow(out)
plt.set_cmap('binary')
plt.colorbar()
plt.show()

Ejemplo n.º 54
0
def trainProx(wRows, wData, n, b, X, y, eta, l1, l2, outputFreq):
    nr,nc = X.shape
    nl = y.shape[1]
    assert y.shape[0] == nr
    assert b.size == nl
    
    if useAdaGrad:
        if useSharedStep:
            assert n.size == nc
        else:
            assert n.shape == (nc,nl)

    # vector of time step at which each coordinate is up-to-date
    tVec = np.zeros(nc, dtype=np.int64)
    
    onlineLoss = 0
    totalOnlineLoss = 0
    
    subEpoch = 0    
    for t in range(nr):
    
        if t % 100 == 0:
            print "training row: " + str(t)

        (row, xInds, xVals) = getSample(X, t)
        
        if xInds.size == 0:
            continue

        # 1. Lazily update relevant rows of w, storing them in tempW        
        totalNnzW = sum(wRows[xInd].size for xInd in xInds)
            
        tempW = np.ndarray(totalNnzW)
        kVec = np.ndarray(totalNnzW, dtype=np.int64)

        if useAdaGrad:
            etaVec = np.ndarray(totalNnzW)
        else:
            etaVec = eta

        pos = 0
        for xInd in xInds:
            numW = wRows[xInd].size
            endPos = pos+numW
            kVec[pos:endPos] = t - tVec[xInd]
            if useAdaGrad:
                if useSharedStep:
                    etaVec[pos:endPos] = eta / (1 + math.sqrt(n[xInd]))
                else:
                    etaVec[pos:endPos] = eta / (1 + np.sqrt(n[xInd,wRows[xInd]]))
            tempW[pos:endPos] = wData[xInd]
            pos = endPos

        tempW = iteratedProx(tempW, kVec, l1*etaVec, l2*etaVec)
        tVec[xInds] = t
        
        # 2. Compute scores
        scores = b.copy()
        pos = 0
        for (xInd, xVal) in zip(xInds, xVals):
            numW = wRows[xInd].size
            endPos = pos+numW
            scores[wRows[xInd]] += tempW[pos:endPos] * xVal
            pos = endPos

        # 3. Compute loss and subtract labels from (transformed) scores for gradient        
        (startY, endY) = y.indptr[row], y.indptr[row+1]
        yCols = y.indices[startY:endY]
        yVals = y.data[startY:endY]
        
        if useSqErr:
            # linear probability model
            # quadratic loss for incorrect prediction, no penalty for invalid (out of range) correct prediction
            scores[yCols] = yVals - scores[yCols]
            scores = np.clip(scores, 0, np.inf)
            scores[yCols] *= -1
            loss = 0.5 * np.dot(scores, scores)
            onlineLoss += loss
            totalOnlineLoss += loss
        else:
            pos = logistic.logcdf(scores)
            neg = logistic.logcdf(-scores)
            pos -= neg
            
            scores = logistic.cdf(scores)
            loss = -np.dot(pos[yCols], yVals)-neg.sum()
            scores[yCols] -= yVals

            onlineLoss += loss
            totalOnlineLoss += loss
 
        # 4. Compute gradient as outer product
        # this will be dense in general, unfortunately       
        g = np.outer(xVals, scores)

        # 5. Compute updated point (store it in g)            
        if useAdaGrad:
            if useSharedStep:
                n[xInds] += np.square(g).sum(1)
                etaVec = np.tile(eta/(1+np.sqrt(n[xInds])), (nl,1)).T
            else:
                n[xInds,:] += np.square(g)
                etaVec = eta/(1+np.sqrt(n[xInds,:]))
        else:
            etaVec = eta
            
        g *= -etaVec

        pos = 0
        for xI in range(xInds.size):
            xInd = xInds[xI]
            numW = wRows[xInd].size
            endPos = pos+numW
            g[xI,wRows[xInd]] += tempW[pos:endPos]
            pos = endPos

        # 6. Sparsify updated point and store it back to W
        # now g holds dense (over labels) W - eta*g        
        reassignToConvertedW(wRows, wData, xInds, g)

        # Print output periodically        
        if (t+1) % outputFreq == 0:
            bringAllUpToDate(wRows, wData, tVec, t+1)
            tVec = np.tile(t+1, nc)
            printOutputLine(subEpoch, wRows, wData, b, testX, testY, l1, l2, onlineLoss / outputFreq)
            subEpoch += 1
            onlineLoss = 0

    # print output for whole epoch
    if nr % outputFreq != 0: # otherwise we are already up to date
        bringAllUpToDate(wRows, wData, tVec, nr)
    printOutputLine("*", wRows, wData, b, testX, testY, l1, l2, totalOnlineLoss / nr)
    print
Ejemplo n.º 55
0
def _make_weight(year):
    scaled = 2*(year-YEARS[0]) / (YEARS[-1] - YEARS[0]) - 1
    scaled *= -4
    return logistic.cdf(scaled)
def calc_a(p_2_new, p_3, m_1, v_1, v_s):
    tmp_1 = logistic.cdf(p_2_new + p_3) * m_1 / (v_1 + v_s)
    tmp_2 = logistic.cdf(-p_2_new - p_3) * m_1 / v_1

    return tmp_1 + tmp_2
Ejemplo n.º 57
0
 def sigmoid(self):
     return FeatureObj(logistic.cdf(self.v), True) # TODO pay attention to rounding errors
Ejemplo n.º 58
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 15 15:16:27 2017

@author: michellezhao
"""

from scipy.stats import logistic
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(1,1)


x = np.linspace(0,50)
ax.plot(x, logistic.cdf(x, loc = 3, scale = 1), 'r-', lw=5, alpha=0.6, label= 'gompertz cdf')
plt.show()