Esempi in Python per estimate_beta, esempi in Python per multiple_regression.estimate_beta

Esempio n. 1

0

Mostra file

File: logistic_regression.py Progetto: junglefire/data_science_from_scratch

        (4.2, 78000, 0),
        (1.1, 54000, 0),
        (6.2, 60000, 0),
        (2.9, 59000, 0),
        (2.1, 52000, 0),
        (8.2, 87000, 0),
        (4.8, 73000, 0),
        (2.2, 42000, 1),
        (9.1, 98000, 0),
        (6.5, 84000, 0),
        (6.9, 73000, 0),
        (5.1, 72000, 0),
        (9.1, 69000, 1),
        (9.8, 79000, 1),
    ]

    ##### 16.1 问题
    # change tuples to lists
    data = list(map(list, data))
    # each element is [1, experience, salary]
    x = [[1] + row[:2] for row in data]
    # each element is paid_account
    y = [row[2] for row in data]

    print("linear regression:")
    rescaled_x = rescale(x)
    beta = estimate_beta(rescaled_x, y)
    print(beta)

    ##### 16.2 Logistic函数

Esempio n. 2

0

Mostra file

File: DougLogisticRegression1.py Progetto: aderbique/cs5665

# What if we try to model such data as output?
#
# We will try to predict whether or not an individual has a PhD based on other information.
###########################################################################################

# This relationship doesn't show much promise.
plt.scatter(work_hours_per_day,noPhD)
plt.title("noPhD by work_hours_per_day")
plt.xlabel("work_hours_per_day")
plt.ylabel("noPhD")
plt.show()  

# The r-squared is about as low as it can be.
random.seed(0)
xtest = work_hours_per_day_list
beta = estimate_beta(xtest, noPhD) 
print "\nbeta=", beta, "\nr-squared=", multiple_r_squared(xtest, noPhD, beta), "\n\n"

# The regression line is almost flat.
plt.scatter(work_hours_per_day,noPhD)
plt.title("noPhD by work_hours_per_day")
plt.xlabel("work_hours_per_day")
plt.ylabel("noPhD")
plt.plot([min(work_hours_per_day),max(work_hours_per_day)],[predict([1,min(work_hours_per_day)],beta),predict([1,max(work_hours_per_day)],beta)])
plt.show()  

###########################################################################################

# This one is a bit more promising, but there is alot of overlap.
plt.scatter(daily_minutes_good,noPhD)
plt.title("noPhD by daily_minutes_good")

Esempio n. 3

0

Mostra file

File: ridge_regression.py Progetto: rileyL6122428/data-science-from-scratch-notes

def squared_error_ridge_gradient(x_i, y_i, beta, alpha):
    return add_vectors(squared_error_gradient(x_i, y_i, beta),
                       ridge_penalty_gradient(beta, alpha))


def estimate_beta_ridge(xs, ys, alpha):
    beta_initial = [random.random() for _ in range(len(xs[0]))]

    return minimize_stochastic(
        partial(squared_error_ridge, alpha=alpha),
        partial(squared_error_ridge_gradient, alpha=alpha), xs, ys,
        beta_initial, 0.0001)


random.seed(0)
trainer_beta = estimate_beta(trainer_party_stats, trainer_badge_counts)
print('trainer_beta = %s' % trainer_beta)

random.seed(0)
trainer_beta_ridge_0 = estimate_beta_ridge(trainer_party_stats,
                                           trainer_badge_counts, 0)
print('trainer_beta_ridge_0 = %s' % trainer_beta_ridge_0)

random.seed(0)
trainer_beta_ridge_point_01 = estimate_beta_ridge(trainer_party_stats,
                                                  trainer_badge_counts, 0.01)
print('trainer_beta_ridge_point_01 = %s' % trainer_beta_ridge_point_01)

random.seed(0)
trainer_beta_ridge_point_1 = estimate_beta_ridge(trainer_party_stats,
                                                 trainer_badge_counts, 0.1)

Esempio n. 4

0

Mostra file

def estimate_sample_beta(sample):
    x_sample, y_sample = zip(*sample)
    return estimate_beta(x_sample, y_sample)

Esempio n. 5

0

Mostra file

File: logistic_regression.py Progetto: 1800Blarbo/data-science-from-scratch

    return reduce(vector_add,
                  [logistic_log_gradient_i(x_i, y_i, beta)
                   for x_i, y_i in zip(x,y)])

if __name__ == "__main__":

    data = [(0.7,48000,1),(1.9,48000,0),(2.5,60000,1),(4.2,63000,0),(6,76000,0),(6.5,69000,0),(7.5,76000,0),(8.1,88000,0),(8.7,83000,1),(10,83000,1),(0.8,43000,0),(1.8,60000,0),(10,79000,1),(6.1,76000,0),(1.4,50000,0),(9.1,92000,0),(5.8,75000,0),(5.2,69000,0),(1,56000,0),(6,67000,0),(4.9,74000,0),(6.4,63000,1),(6.2,82000,0),(3.3,58000,0),(9.3,90000,1),(5.5,57000,1),(9.1,102000,0),(2.4,54000,0),(8.2,65000,1),(5.3,82000,0),(9.8,107000,0),(1.8,64000,0),(0.6,46000,1),(0.8,48000,0),(8.6,84000,1),(0.6,45000,0),(0.5,30000,1),(7.3,89000,0),(2.5,48000,1),(5.6,76000,0),(7.4,77000,0),(2.7,56000,0),(0.7,48000,0),(1.2,42000,0),(0.2,32000,1),(4.7,56000,1),(2.8,44000,1),(7.6,78000,0),(1.1,63000,0),(8,79000,1),(2.7,56000,0),(6,52000,1),(4.6,56000,0),(2.5,51000,0),(5.7,71000,0),(2.9,65000,0),(1.1,33000,1),(3,62000,0),(4,71000,0),(2.4,61000,0),(7.5,75000,0),(9.7,81000,1),(3.2,62000,0),(7.9,88000,0),(4.7,44000,1),(2.5,55000,0),(1.6,41000,0),(6.7,64000,1),(6.9,66000,1),(7.9,78000,1),(8.1,102000,0),(5.3,48000,1),(8.5,66000,1),(0.2,56000,0),(6,69000,0),(7.5,77000,0),(8,86000,0),(4.4,68000,0),(4.9,75000,0),(1.5,60000,0),(2.2,50000,0),(3.4,49000,1),(4.2,70000,0),(7.7,98000,0),(8.2,85000,0),(5.4,88000,0),(0.1,46000,0),(1.5,37000,0),(6.3,86000,0),(3.7,57000,0),(8.4,85000,0),(2,42000,0),(5.8,69000,1),(2.7,64000,0),(3.1,63000,0),(1.9,48000,0),(10,72000,1),(0.2,45000,0),(8.6,95000,0),(1.5,64000,0),(9.8,95000,0),(5.3,65000,0),(7.5,80000,0),(9.9,91000,0),(9.7,50000,1),(2.8,68000,0),(3.6,58000,0),(3.9,74000,0),(4.4,76000,0),(2.5,49000,0),(7.2,81000,0),(5.2,60000,1),(2.4,62000,0),(8.9,94000,0),(2.4,63000,0),(6.8,69000,1),(6.5,77000,0),(7,86000,0),(9.4,94000,0),(7.8,72000,1),(0.2,53000,0),(10,97000,0),(5.5,65000,0),(7.7,71000,1),(8.1,66000,1),(9.8,91000,0),(8,84000,0),(2.7,55000,0),(2.8,62000,0),(9.4,79000,0),(2.5,57000,0),(7.4,70000,1),(2.1,47000,0),(5.3,62000,1),(6.3,79000,0),(6.8,58000,1),(5.7,80000,0),(2.2,61000,0),(4.8,62000,0),(3.7,64000,0),(4.1,85000,0),(2.3,51000,0),(3.5,58000,0),(0.9,43000,0),(0.9,54000,0),(4.5,74000,0),(6.5,55000,1),(4.1,41000,1),(7.1,73000,0),(1.1,66000,0),(9.1,81000,1),(8,69000,1),(7.3,72000,1),(3.3,50000,0),(3.9,58000,0),(2.6,49000,0),(1.6,78000,0),(0.7,56000,0),(2.1,36000,1),(7.5,90000,0),(4.8,59000,1),(8.9,95000,0),(6.2,72000,0),(6.3,63000,0),(9.1,100000,0),(7.3,61000,1),(5.6,74000,0),(0.5,66000,0),(1.1,59000,0),(5.1,61000,0),(6.2,70000,0),(6.6,56000,1),(6.3,76000,0),(6.5,78000,0),(5.1,59000,0),(9.5,74000,1),(4.5,64000,0),(2,54000,0),(1,52000,0),(4,69000,0),(6.5,76000,0),(3,60000,0),(4.5,63000,0),(7.8,70000,0),(3.9,60000,1),(0.8,51000,0),(4.2,78000,0),(1.1,54000,0),(6.2,60000,0),(2.9,59000,0),(2.1,52000,0),(8.2,87000,0),(4.8,73000,0),(2.2,42000,1),(9.1,98000,0),(6.5,84000,0),(6.9,73000,0),(5.1,72000,0),(9.1,69000,1),(9.8,79000,1),]
    data = list(map(list, data)) # change tuples to lists

    x = [[1] + row[:2] for row in data] # each element is [1, experience, salary]
    y = [row[2] for row in data]        # each element is paid_account

    print("linear regression:")

    rescaled_x = rescale(x)
    beta = estimate_beta(rescaled_x, y)
    print(beta)

    print("logistic regression:")

    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33)

    # want to maximize log likelihood on the training data
    fn = partial(logistic_log_likelihood, x_train, y_train)
    gradient_fn = partial(logistic_log_gradient, x_train, y_train)

    # pick a random starting point
    beta_0 = [1, 1, 1]

    # and maximize using gradient descent

Esempio n. 6

0

Mostra file

for i, j in enumerate(list_of_dates):
    if (i == 34 or i == 217 or i == 399 or i == 582):
        multiplyer = multiplyer * -1

    season[i - 1] = prev + (multiplyer / 100.0)
    prev = season[i - 1]
alpha = ([1] * 731)
random.seed(0)
x_y = zip(alpha, discount_jack, season, count_by_city[0][0])
training = random.sample(x_y, 183)  # 25% of the entire data
alpha_train, discount_train, season_train, count_j_train = zip(*training)
x = zip(alpha_train, discount_train, season_train)
y = count_j_train
print len(x)
random.seed(0)
beta1 = estimate_beta(x, y)
print "beta", beta1
print "r-squared", multiple_r_squared(x, y, beta1)
print
#%% 1. Plot
plot_set1 = zip(alpha_train, discount_train, season_train, y)
plot_set1.sort(key=lambda x: x[2])
alpha_train1, discount_train1, season_train1, y1 = zip(*plot_set1)
y_pred1 = []
for i in range(len(count_j_train)):
    y_pred1.append(beta1[0] + beta1[1] * discount_train1[i] +
                   beta1[2] * float(season_train1[i]))
plt.figure(51)
plt.scatter(discount_train1, y1, color='g', label='Data')
plt.scatter(discount_train1, y_pred1, color='r', label='Regression', s=10)
plt.legend()