def predict(theta1, theta2, input_layer):
    layer2 = sigmoid.sigmoid(
        theta1.dot(input_layer))  # (25, 401) * (401, 5000) = (25 * 5000)
    layer2_new = np.r_[np.ones((1, input_layer.shape[1])),
                       layer2]  # (26 * 5000)
    output_layer = sigmoid.sigmoid(
        theta2.dot(layer2_new))  # (10, 26) * (26 * 5000) = (10 * 5000)
    return np.argmax(output_layer, axis=0) + 1
def predict(t1, t2, X):

    row, _ = X.shape
    X = np.concatenate([np.ones((X.shape[0], 1)), X], axis=1)

    a1 = sigmoid(X @ t1.T)
    pad = np.ones((row, 1))
    a1 = np.concatenate((pad, a1), axis=1)

    a2 = sigmoid(a1 @ t2.T)
    p = np.argmax(a2, axis=1)

    return p
Beispiel #3
0
def compute3(x, y, theta):
    m = y.size
    value = x.dot(theta)
    v = 1.0
    for idx, item in enumerate(y):
        if item == 1:
            v *= sigmoid.sigmoid(value[idx, 0])
        else:
            v *= (1 - sigmoid.sigmoid(value[idx, 0]))

    ret = -np.log(v) / m
    if np.isnan(ret):
        return np.inf
    return ret
Beispiel #4
0
def negSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               dataset,
                               K=10):
    """ Negative sampling cost function for word2vec models

    Implement the cost and gradients for one predicted word vector
    and one target word vector as a building block for word2vec
    models, using the negative sampling technique. K is the sample
    size.

    Note: See test_word2vec below for dataset's initialization.

    Arguments/Return Specifications: same as softmaxCostAndGradient
    """

    # Sampling of indices is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    indices = [target]
    indices.extend(getNegativeSamples(target, dataset, K))

    # cost from target sample
    uo = outputVectors[target, ].reshape((1, -1))  # (1, d)
    vc = predicted.reshape((-1, 1))  # (d, 1)
    uovc = np.dot(uo, vc)
    s_uovc = sigmoid(uovc)
    cost_from_ts = -1 * np.log(s_uovc)

    # cost from negative sample
    uk = outputVectors[indices[1:], ]  # (mk, d)
    ukvc = np.dot(uk, vc)  # (mk, 1)
    s_ukvc = sigmoid(-1 * ukvc)  # (mk, 1)
    cost_from_ns = -1 * np.sum(np.log(s_ukvc))

    cost = cost_from_ts + cost_from_ns

    # gradient of the predict vectors
    grad = np.zeros_like(outputVectors)
    gradPred = ((s_uovc - 1) * outputVectors[target, ] - np.dot(
        (s_ukvc.T - 1), uk)).reshape((-1, ))
    grad_k = -1 * np.dot((s_ukvc - 1.0), predicted.reshape((1, -1)))
    grad_target = (s_uovc - 1) * predicted

    for i, k in enumerate(indices[1:]):
        grad[k] += grad_k[i]
    grad[target] = grad_target

    return cost, gradPred, grad
Beispiel #5
0
    def get_crown_burn(self, FGPathway_object, loc, weather_today, sppr_dec):
        """Determines whether the overstory trees burn in a given fire

        RETURNS
        -------
        boolean, True indicates that this cell has a burned crown, False not.
        """
        #in the default model, ladder fuels range from 0 to 1.3 or so
        ladder_fuels = FGPathway_object.get_ladder_fuel(loc)

        #fwi varies from 0 to over 100, and are in units of meters/minute
        fwi = weather_today["FWI"]

        #forming an index for crown-fire risk, based on the ladder fuel load, and
        # weather conditions
        severe_fwi = 20 #meters per minute... seems like 20 is getting pretty fast??
        adjusted_fwi = sigmoid(min( fwi/severe_fwi, severe_fwi), center=severe_fwi, min_val=0.0, max_val=2.0)
        crowning_danger_index = adjusted_fwi + ladder_fuels

        #the cut-off for crown-fire
        flash_point = 1.5

        if crowning_danger_index > flash_point:
            return True
        else:
            return False
def gradient_reg(theta, reg, *args):
    y = args[1]
    X = args[0]
    m = y.size
    h = sigmoid.sigmoid(X.dot(theta.reshape(-1, 1)))
    grad = (1 / m) * X.T.dot(h - y) + (reg/m) * np.r_[[[0]], theta[1:].reshape(-1, 1)]
    return grad.flatten()
def cost_function_reg(theta, reg, *args):
    y = args[1]
    X = args[0]
    m = y.size
    h = sigmoid.sigmoid(X.dot(theta))
    J = -1 * (1 / m) * (np.log(h + epsilon).T.dot(y) + np.log(1 - h + epsilon).T.dot(1 - y)) + (reg/(2*m))*np.sum(np.square(theta[1:]))
    return J[0]
Beispiel #8
0
def predict(theta, X):

    m = X.shape[0]
    p = np.zeros(m)

    p = np.round(sigmoid(X.dot(theta)))
    return p
Beispiel #9
0
    def ladder_fuel_function_PIPO(self, stand_age, years_since_fire):
        """The ladder fuel value in a stand given the time since a fire.

        In Pinus ponderosa stands,
        the historical fire regime which maintained old trees had return intervals from 1-30 years.
        With fire exclusion, the understory will begin to fill with shade tolerant species. They
        should start representing a threat of crown fire sometime after 30 years or so, to simulate
        this historical dynamic.



        """
        #lodgepole-style build-up pattern:
        #change parameter 'center' to adjust where fuel hits it's halfway point
        lodgepole_style_fuels = sigmoid(x=min(years_since_fire,stand_age), center=30, min_val=0.0, max_val=1.3)
        
        #in lodgepole, pretty much any fire is stand-replacing. In Ponderosa, after about age 20, 
        # stands are very resilient to fire. Surface fires then act to reduce ladder fuels and 
        # maintain the overstory

        #so here, instead of stand age, the years_since_fire value is used. As long as fires of some
        #kind are happening every 30 years or so, the ladder fuel value will never rise very high 
        # (in the case above, with center=30, max=1.3, at thirty years, the fuel_loading will be 0.65)

        return lodgepole_style_fuels
def gradient(theta, X, y, _lambda):
    m, n = X.shape

    from utils.sigmoid import sigmoid
    h = sigmoid(np.dot(X, theta))
    t = h.T - y
    tmp = np.dot(t, X)
    grad_reg_without_reg = tmp / m
    return grad_reg_without_reg.flatten()
def predict_tweet(tweet, freqs, theta):

    # extract the features of the tweet and store it into x
    x = extract_features.extract_features(tweet, freqs)

    # make the prediction using x and theta
    y_pred = sigmoid.sigmoid(np.dot(x, theta))

    return y_pred
Beispiel #12
0
def predict(Theta1, Theta2, X):
    # PREDICT Predict the label of an input given a trained neural network
    #   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
    #   trained weights of a neural network (Theta1, Theta2)

    # Useful values
    if X.ndim == 1:
        X = np.reshape(X, (-1, X.shape[0]))
    m = np.shape(X)[0]
    num_labels = np.shape(Theta2)[0]

    # You need to return the following variables correctly
    X = np.column_stack((np.ones((m, 1)), X))
    a2 = sigmoid(np.dot(X, Theta1.T))
    a2 = np.column_stack((np.ones((m, 1)), a2))
    a3 = sigmoid(np.dot(a2, Theta2.T))
    p = a3.argmax(axis=1) + 1
    return p
def predictOneVsAll(all_theta, X):
    num_labels, _ = all_theta.shape
    X = np.insert(X, 0, 1, axis=1)

    from utils.sigmoid import sigmoid

    max_value = sigmoid(np.dot(X, all_theta.T))
    p = max_value.argmax(axis=1)
    return p
Beispiel #14
0
def gradient_descent(x, y, alpha, iteration):
    m = y.size
    theta = np.zeros(3).reshape(3, -1)
    j_list = []
    for i in range(iteration):
        v = computer_cost.compute2(x, y, theta)
        j_list.append(v)
        values = (sigmoid.sigmoid(x.dot(theta)) - y).T.dot(x).T * (alpha / m)
        theta = theta - values
    return theta, np.array(j_list)
Beispiel #15
0
def predictOneVsAll(all_theta, X):

    row, col = X.shape
    X_0 = np.ones((row, 1))
    X = np.concatenate((X_0, X), axis=1)

    hypothesis = sigmoid(X @ all_theta.T)
    p = np.argmax(hypothesis, axis=1)

    return p
def costFunction(theta, X, y):

    # Initialize useful parameters
    m = len(y)
    J = 0
    grad = np.zeros(theta.shape)

    hypothesis = sigmoid(X.dot(theta))
    J = (1 / m) * np.sum(-y.dot(np.log(hypothesis)) -
                         (1 - y).dot(np.log(1 - hypothesis)))
    grad = (1 / m) * (hypothesis - y).dot(X)

    return (J, grad)
def lrCostFunction(theta, X, y, l):

    m = len(y)

    hypothesis = sigmoid(X @ theta)
    reg_parameter = (l / (2 * m)) * np.sum(np.power(theta[1:], 2))

    J = (1 / m) * np.sum((-(y.T) @ np.log(hypothesis)) -
                         (1 - y.T) @ (np.log(1 - hypothesis))) + reg_parameter

    grad = (1 / m) * X.T @ (hypothesis - y)
    grad_parameter = theta[1:] * (l / m)
    grad[1:] += grad_parameter

    return J, grad
Beispiel #18
0
def costFunctionReg(theta, X, y, lamda = 1):
    
    # Initialize useful parameters
    m = len(y) 
    J = 0 
    grad = np.zeros(theta.shape)
    
    hypothesis = sigmoid(X.dot(theta))
    regularization_parameter = (lamda / (2 * m)) * np.sum(np.power(theta[1:theta.shape[0]],2))
    J = (1 / m) * np.sum(-y.dot(np.log(hypothesis)) - (1 - y).dot(np.log(1 - hypothesis))) + regularization_parameter
    
    grad = (1 / m) * (hypothesis - y).dot(X)
    grad_parameter = theta[1:grad.shape[0]] * lamda / m
    
    grad[1:grad.shape[0]] = grad[1:grad.shape[0]] +  grad_parameter
    
    return (J,grad)
Beispiel #19
0
def main():
    x = np.linspace(-8, 8, 1000)
    y = sigmoid.sigmoid(x)
    plot_data.plot(x, y, 'x', 'y', {
        'fmt': 'b-',
        'title': 'sigmoid',
        'label': 'sigmoid',
        'show': False
    })
    y2 = []
    for _ in x:
        y2.append(0.5)
    y2 = np.array(y2)
    plot_data.plot(x, y2, 'x', 'y', {
        'fmt': 'g-',
        'label': 'y = 0.5',
        'show': True
    })
Beispiel #20
0
def test_center():
    #function signature:
    #sigmoid(x, center=0.0, min_val=0.0, max_val=1.0)
    test_count = 1000
    sample_count = 1

    for i in range(test_count):
        a = random.uniform(-1000,1000)
        b = random.uniform(-1000,1000)
        while a == b:
            b = random.uniform(-1000,1000)

        max_val = max(a, b)
        min_val = min(a, b)

        center = random.uniform(-1000,1000)
        mid_val = (max_val - min_val)/2.0 + min_val

        for j in range(sample_count):
            center_val = sigmoid.sigmoid(x=center, center=center, min_val=min_val, max_val=max_val)
            assert np.allclose( center_val , mid_val)
    def _forward_propagate(self, X, w, b):
        """
        Computes h = sigmoid(w*x + b)

        Parameters
        ----------
        X : ndarray, shape (m_samples, n_features)
            Training data.
        w : ndarray, shape (n_features, 1)
            Coefficient vector.
        y : ndarray, shape (m_samples, 1)
            Array of labels.

        Returns
        -------
        h : ndarray, shape(m_samples, 1)
            Activation values (hypothesis) (probability to be class 1)
        """
        z = np.dot(X, w) + b
        h = sigmoid(z)
        return h
Beispiel #22
0
def test_min_max():
    #function signature:
    #sigmoid(x, center=0.0, min_val=0.0, max_val=1.0)
    test_count = 1000
    sample_count = 1000

    for i in range(test_count):
        a = random.uniform(-1000,1000)
        b = random.uniform(-1000,1000)
        while a == b:
            b = random.uniform(-1000,1000)

        max_val = max(a, b)
        min_val = min(a, b)

        center = random.uniform(-1000,1000)

        samples = [ sigmoid.sigmoid(random.uniform(-10000,10000), center, min_val, max_val) for j in range(sample_count)]

        sample_range = max_val - min_val

        assert (min(samples) - min_val) < (0.05 * sample_range)
        assert (max_val - max(samples)) < (0.05 * sample_range)
def main():
    fig, axes = plt.subplots(1, 3, sharey=True, figsize=(17, 5))
    data = load_data.load('data2.txt', dtype=np.float128)
    X = data[:, 0:2]
    X_map = feature_map.map(X)
    y = data[:, 2].reshape(-1, 1)
    initial_theta = np.zeros(X_map.shape[1])
    #C = 0
    #res = minimize(cost_function_reg, initial_theta, args=(C, X_map, y), method=None, jac=gradient_reg, options={'maxiter': 3000})
    #print(res)
    for i, C in enumerate([0, 1, 100]):
        # Optimize costFunctionReg
        res2 = minimize(cost_function_reg,
                        initial_theta,
                        args=(C, X_map, y),
                        method=None,
                        jac=gradient_reg,
                        options={'maxiter': 3000})
        accuracy = 100 * sum(predict(res2.x, X_map) == y.ravel()) / y.size
        plotData(data, 'Microchip Test 1', 'Microchip Test 2', 'y = 1',
                 'y = 0',
                 axes.flatten()[i])
        # Plot decisionboundary
        x1_min, x1_max = X[:, 0].min(), X[:, 0].max(),
        x2_min, x2_max = X[:, 1].min(), X[:, 1].max(),
        xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max),
                               np.linspace(x2_min, x2_max))
        h = sigmoid.sigmoid(
            feature_map.map(np.c_[xx1.ravel(),
                                  xx2.ravel()]).dot(res2.x.reshape(-1, 1)))
        h = h.reshape(xx1.shape)
        axes.flatten()[i].contour(xx1, xx2, h, [0.5], linewidths=1, colors='g')
        axes.flatten()[i].set_title(
            'Train accuracy {}% with Lambda = {}'.format(
                np.round(accuracy, decimals=2), C))

    plt.show()
def predict(score1, score2, theta):
    return sigmoid.sigmoid(np.array([1, score1, score2]).dot(theta.reshape(3, -1)).sum())
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, _lambda):
    # NNCOSTFUNCTION Implements the neural network cost function for a two layer
    # neural network which performs classification
    # [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    # X, y, lambda ) computes the cost and gradient of the neural network.The
    # parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    #
    # The returned parameter grad should be a "unrolled" vector of the
    # partial derivatives of the neural network.
    #

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, input_layer_size + 1),
                        order='F')

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                        (num_labels, hidden_layer_size + 1),
                        order='F')

    # Setup some useful variables
    m = len(X)

    # # You need to return the following variables correctly
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #

    # add bias 1
    X = np.column_stack((np.ones((m, 1)), X))

    # a2 = X . dot Theta1.T
    a2 = sigmoid(np.dot(X, Theta1.T))

    # a2 add bias
    a2 = np.column_stack((np.ones((a2.shape[0], 1)), a2))

    a3 = sigmoid(np.dot(a2, Theta2.T))

    # Also, recall that whereas the original labels (in the variable y) were 1, 2, ..., 10, for the purpose of
    # training a neural network, we need to recode the labels as vectors containing only values 0 or 1, so that For
    # example, if x(i) is an image of the digit 5, then the corresponding y(i) (that you should use with the cost
    # function) should be a 10-dimensional vector with y5 = 1, and the other elements equal to 0
    labels = y
    y = np.zeros((m, num_labels))
    for i in range(m):
        y[i, labels[i] - 1] = 1

    # You should implement the feedforward computation that computes hθ(x(i)) for every example i and sum the cost
    # over all examples. Your code should also work for a dataset of any size, with any number of labels (you can
    # assume that there are always at least K ≥ 3 labels).
    cost = 0
    for i in range(m):
        cost += np.sum(y[i] * np.log(a3[i]) + (1 - y[i]) * np.log(1 - a3[i]))

    J = -(1.0 / m) * cost

    theta1_square = np.sum(np.square(Theta1[:, 1:]))
    theta2_square = np.sum(np.square(Theta2[:, 1:]))

    J = J + _lambda * (theta1_square + theta2_square) / 2 / m

    from ex4_NN_back_propagation.sigmoidGradient import sigmoidGradient
    delta_l_1 = 0
    delta_l_2 = 0
    for i in range(m):
        # 1. Set the input layer’s values (a(1)) to the t-th training example x(t). Perform a feedforward pass (
        # Figure 2), computing the activations (z(2), a(2), z(3), a(3)) for layers 2 and 3. Note that you need to add
        # a +1 term to ensure that the vectors of activations for layers a(1) and a(2) also include the bias unit. In
        # Octave, if a 1 is a column vector, adding one corresponds to a 1 = [1 ; a 1].
        a1 = X[i]
        z2 = np.dot(a1, Theta1.T)
        a2 = sigmoid(z2)
        a2 = np.concatenate((np.ones(1), a2))
        z3 = np.dot(a2, Theta2.T)
        a3 = sigmoid(z3)

        # 2. For each output unit k in layer 3 (the output layer), set δ(3) = (a(3) − yk), where yk ∈ {0,1} indicates
        # whether the current training example belongs to class k (yk = 1), or if it belongs to a different class (
        # yk = 0). You may find logical arrays helpful for this task (explained in the previous programming
        # exercise).
        delta3 = np.zeros(num_labels)
        for l in range(num_labels):
            delta3[l] = a3[l] - y[i, l]

        # 3. For the hidden layer l = 2, set de
        # delta 2 = theta(2).T * theta(3) * g_pi(z2))
        delta2 = np.dot(Theta2[:, 1:].T, delta3) * sigmoidGradient(z2)

        # 4. Accumulate the gradient from this example using the following formula. Note that you should skip or
        # remove δ(2). In Octave, removing δ(2) corresponds to delta 2 = delta 2(2:end).
        # ∆(l) = ∆(l) + δ(l+1)(a(l))T
        delta_l_1 += np.outer(delta2, a1.T)
        delta_l_2 += np.outer(delta3, a2.T)

    # 5. Obtain the (unregularized) gradient for the neural network cost function by dividing the accumulated
    # gradients by 1/m
    Theta1_grad = delta_l_1 / m
    Theta2_grad = delta_l_2 / m

    # After you have successfully implemeted the backpropagation algorithm, you will add regularization to the
    # gradient. To account for regularization, it turns out that you can add this as an additional term after
    # computing the gradients using backpropagation. Specifically, after you have computed ∆(l) using
    # backpropagation, you ij should add regularization using
    Theta1_grad_unreg = np.copy(Theta1_grad)
    Theta2_grad_unreg = np.copy(Theta2_grad)
    Theta1_grad += _lambda / m * Theta1
    Theta2_grad += _lambda / m * Theta2
    Theta1_grad[:, 0] = Theta1_grad_unreg[:, 0]
    Theta2_grad[:, 0] = Theta2_grad_unreg[:, 0]

    # Unroll gradients
    grad = np.concatenate((Theta1_grad.reshape(np.size(Theta1_grad),
                                               order='F'),
                           Theta2_grad.reshape(Theta2_grad.size, order='F')))

    return J, grad
def costFunction(theta, X, y, _lambda):
    m, n = X.shape
    from utils.sigmoid import sigmoid
    item1 = y * (np.log(sigmoid(np.dot(X, theta))))
    item2 = (1 - y) * (np.log(1 - sigmoid(np.dot(X, theta))))
    return np.sum(-item1 - item2) / m
def gradient(theta, X, y):
    m = y.size
    h = sigmoid.sigmoid(X.dot(theta.reshape(-1, 1)))
    grad = (1 / m) * X.T.dot(h - y)
    return grad.flatten()
Beispiel #28
0
                        jac=True,
                        method='TNC',
                        options=options)

cost = res.fun
theta = res.x

print('Cost at theta found by optimize.minimize: {:.3f}'.format(cost))
print('Expected cost (approx): 0.203')

print('Theta:{:.3f}, {:.3f}, {:.3f}'.format(*theta))
print('Expected theta (approx):-25.161, 0.206, 0.201')

# Plot decision boundary
plotDecisionBoundary(theta, X_padded, y)

# Predict probability for a student with score 45 on exam 1 and score 85 on exam 2

grades = np.array([1, 45, 85])
prob = sigmoid(grades.dot(theta))
print(
    'For a student with scores 45 and 85, we predict an admission probability of {:.3f}'
    .format(prob))
print('Expected value: 0.775 +/- 0.002')

# Compute accuracy on our training set
p = predict(theta, X_padded)
accuracy = np.mean(y == p) * 100
print('Train Accuracy: {}%'.format(accuracy))
print('Expected accuracy (approx): 89.0')
def cost_function(theta, X, y):
    m = y.size
    h = sigmoid.sigmoid(X.dot(theta))
    J = -1 * (1 / m) * (np.log(h + epsilon).T.dot(y) + np.log(1 - h + epsilon).T.dot(1 - y))
    return J[0]
Beispiel #30
0
def compute2(x, y, theta):
    m = y.size
    h = sigmoid.sigmoid(x.dot(theta))
    j = -1 * (1 / m) * (np.log(h + epsilon).T.dot(y) + np.log(1 - h + epsilon).T.dot(1 - y))
    return j[0]
def predict(theta, X, threshold=0.5):
    p = sigmoid.sigmoid(X.dot(theta.T)) >= threshold
    return (p.astype('int'))