Ejemplo n.º 1
0
def compare_polynomials(x: np.ndarray, y: np.ndarray, i: int) -> None:
    theta = [2.5] * (x.shape[1] + 1)
    alpha = 1e-8
    max_iter = int(1e+6)
    linear_model = MyLR(theta, alpha, max_iter)

    x_train, x_test, y_train, y_test = data_spliter(x, y, 0.6)

    linear_model.fit_(x_train, y_train)
    y_hat = linear_model.predict_(x_test)
    this_cost = linear_model.cost_(y_test, y_hat)

    print(i, this_cost)
    print(linear_model.thetas)
    plt.bar(i, this_cost, label="$%d_{th} cost: %.3f$" % (i, this_cost))
Ejemplo n.º 2
0
def main():
    citizen_data = pd.read_csv("../resources/solar_system_census.csv")
    origin_data = pd.read_csv("../resources/solar_system_census_planets.csv")

    X = np.array(citizen_data[["height", "weight", "bone_density"]])
    Y = np.array(origin_data["Origin"])

    # Spliting train / test set
    x, x_test, y, y_test = data_spliter(X, Y, 0.7)

    # Training one logistic regression by categorie
    lr0 = train_cat(x,
                    y,
                    0.,
                    theta=[[8.99820209], [-0.04211505], [-0.05411521],
                           [3.46389832]],
                    alpha=4e-4,
                    n_cycle=1000)
    lr1 = train_cat(x,
                    y,
                    1.,
                    theta=[[1.66331158e+00], [-5.00887941e-02],
                           [2.81684154e-03], [7.91124245e+00]],
                    alpha=4e-4,
                    n_cycle=1000)
    lr2 = train_cat(x,
                    y,
                    2.,
                    theta=[[-5.44251112], [-0.02352356], [0.13866041],
                           [-5.77201278]],
                    alpha=4e-4,
                    n_cycle=1000)
    lr3 = train_cat(x,
                    y,
                    3.,
                    theta=[[-4.81852712], [0.10656838], [-0.10904182],
                           [-9.14504867]],
                    alpha=4e-4,
                    n_cycle=1000)

    # Predict x_test
    y_hat = predict_with_one_vs_all([lr0, lr1, lr2, lr3],
                                    np.array([0., 1., 2., 3.]), x_test)

    # Checking if prediction is right
    unique, counts = np.unique(y_hat == y_test, return_counts=True)
    print(dict(zip(unique, counts)))
Ejemplo n.º 3
0
def main():
    y_data = pd.read_csv("../resources/solar_system_census_planets.csv",
                         index_col=0)
    x_data = pd.read_csv("../resources/solar_system_census.csv", index_col=0)
    y = np.array(y_data)
    x = np.array(x_data)

    zipcodes = [0, 1, 2, 3]
    thetas = ([[4.90348242], [-0.02999681], [-0.03250215],
               [2.40047782]], [[1.28802845], [-0.06179008], [0.01894334],
                               [8.00000601]], [[-4.75798975], [-0.00574743],
                                               [0.09731946],
                                               [-4.55362614]], [[-2.20593027],
                                                                [0.08724529],
                                                                [-0.09877385],
                                                                [-8.59898021]])

    x_train, x_test, y_train, y_test = data_spliter(x, y, 0.7)
    #print(x_train, x_test, y_train, y_test)

    ys_hat = [
        ofa(x_train, y_train, zipcode, x_test, theta)
        for zipcode, theta in zip(zipcodes, thetas)
    ]
    ys_hat = np.concatenate(ys_hat, axis=1)
    """
    y_best = np.zeros(y_test.shape)
    for i in range(y_best.shape[0]):
        ith_hat = ys_hat[i]
        best_zipcode = np.where(ith_hat == np.amax(ith_hat))[0][0]
        y_best[i] = best_zipcode
    """
    y_best = np.argmax(ys_hat, axis=1).reshape(-1, 1)

    compare = np.concatenate((y_test, y_best), axis=1)
    compare = pd.DataFrame(compare.astype("int64"))
    print(compare)

    # Checking if prediction is right
    unique, counts = np.unique(y_best == y_test, return_counts=True)
    print(dict(zip(unique, counts)))
    y:a numpy.ndarray for the correct labels y_hat:a numpy.ndarray for the predicted labels
    Returns: The accuracy score as a float. None on any error.
    Raises: This function should not raise any Exception. """
    return np.count_nonzero(y == y_hat) / float(len(y))


csv_data_x = pd.read_csv("../resources/solar_system_census.csv")

csv_data_y = pd.read_csv("../resources/solar_system_census_planets.csv")

x = np.array(csv_data_x[["height", "weight", "bone_density"]])
y = np.array(csv_data_y["Origin"]).reshape(-1, 1)

x = zscore(x)

temp = data_spliter(x, y, 0.5)
x_train = temp[0]
x_test = temp[1]
y_train = temp[2]
y_test = temp[3]

y_train0 = np.array([1 if i == 0 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test0 = np.array([1 if i == 0 else 0 for i in y_test]).reshape(-1, 1)

y_train1 = np.array([1 if i == 1 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test1 = np.array([1 if i == 1 else 0 for i in y_test]).reshape(-1, 1)

y_train2 = np.array([1 if i == 2 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
from my_logistic_regression import MyLogisticRegression as MyLR


data1 = pd.read_csv("../resources/solar_system_census.csv")
data2 = pd.read_csv("../resources/solar_system_census_planets.csv")

X = np.array(data1[['height', 'weight', 'bone_density']]).reshape(-1,3)
Y = np.array(data2.Origin).reshape(-1,1)

# print(X)
# print(Y)

zipcodes = np.array(data2.Origin.drop_duplicates())
zipcodes = np.sort(zipcodes)

x_train, x_test, y_train, y_test = data_spliter(X, Y, 0.8)

mylrs = []
for i in range(0, len(zipcodes)):
    print("For zipcode = {}".format(zipcodes[i]))
    y_train_z = np.array([[1 if y_train[j] == zipcodes[i] else 0 for j, x in enumerate(x_train)]]).T

    mylrs.append(MyLR([0., 0., 0., 0.]))

    cost = mylrs[i].cost_(x_train, y_train_z)
    print("Initial cost = {}".format(cost))

    mylrs[i].fit_(x_train, y_train_z, alpha=1e-4, n_cycle=100000)
    print("new thetas = {}".format(mylrs[i].thetas))

    cost = mylrs[i].cost_(x_train, y_train_z)
Ejemplo n.º 6
0
		y_hat: has to be an numpy.ndarray, a vector of dimension m * 1.
		eps: has to be a float, epsilon (default=1e-15)
		Returns:
		The logistic loss value as a float.
		None on any error.
		Raises:
		This function should not raise any Exception.
		"""
		return -(1 / y.shape[0]) * np.sum((y * np.log(y_hat + eps)) + (1 - y) * np.log(1 - y_hat + eps))
	
if __name__ == "__main__":
	data = pd.read_csv("../resources/solar_system_census.csv")
	X = np.array(data[['height','weight','bone_density']])
	data = pd.read_csv("../resources/solar_system_census_planets.csv")
	Y = np.array(data[['Origin']])
	lst = data_spliter(X, Y, 0.5)
	x_train = lst[0]
	y_train = lst[2]
	y_train = y_train[:, np.newaxis]

	x_test = lst[1]
	y_test = lst[3]
	y_test = y_test[:, np.newaxis]

	j = 0.0
	mat = np.zeros((60,4))

	while j < 4.0:
		y_test2 = np.copy(y_test)
		y_train2 = np.copy(y_train)
		for i in range(y_train2.shape[0]):
Ejemplo n.º 7
0
import numpy as np
from data_spliter import data_spliter

x1 = np.array([1, 42, 300, 10, 59])
y = np.array([0, 1, 0, 1, 0])
# Example 1:
print(data_spliter(x1, y, 0.8))

# Output:(array([  1,  59,  42, 300]), array([10]), array([0, 0, 0, 1]), array([1])

print(
    "-------------------------------------------------------------------------------"
)
print(data_spliter(x1, y, 0.5))
# Output:(array([59, 10]), array([  1, 300,  42]), array([0, 1]), array([0, 1, 0]))
x2 = np.array([[1, 42], [300, 10], [59, 1], [300, 59], [10, 42]])
y = np.array([0, 1, 0, 1, 0])

# Example 3:
print(
    "-------------------------------------------------------------------------------"
)
print(data_spliter(x2, y, 0.8))
# Output:(array([[ 10,  42],[300,  59],[ 59,   1],[300,  10]]), array([[ 1, 42]]), array([0, 1, 0, 1]), array([0]))
# Example 4:
print(
    "-------------------------------------------------------------------------------"
)
print(data_spliter(x2, y, 0.5))
# Output:(array([[59,  1],[10, 42]]), array([[300,  10],[300,  59],[  1,  42]]), array([0, 0]), array([1, 1, 0]))
Ejemplo n.º 8
0
		None if there is a matching dimension problem.
		Raises:
		This function should not raise any Exceptions.
		"""
        if len(y) < 1 or len(y_hat) < 1 or y.shape != y_hat.shape:
            return None
        return np.sum((y_hat - y)**2) / float(y.shape[0])


if __name__ == "__main__":
    data = pd.read_csv("../resources/spacecraft_data.csv")
    x = np.array(data[['Age', 'Thrust_power', 'Terameters']])
    y = np.array(data[['Sell_price']])
    #my_lreg = MyLinearRegression([[1.0], [1.0], [1.0], [1.0]])

    lst = data_spliter(x, y, 0.7)
    x_train = lst[0]
    y_train = lst[2]
    y_train = y_train[:, np.newaxis]
    x_test = lst[1]
    y_test = lst[3]
    y_test = y_test[:, np.newaxis]

    i = 0.0
    while i <= 1.0:
        my_lr = MyRidge(np.ones(4).reshape(-1, 1), 0.1, 1000, i)
        my_lr.fit_(x_train, y_train)

        y_hat = my_lr.predict_(x_test)

        print(my_lr.mse_(y_test, y_hat))