def one_versus_all_train(data, lambda_): cat_unique = np.unique(data[2]) val = np.ones(data[0].shape[0]) prob = np.zeros(data[0].shape[0]) test = val.astype(float) y_train = data[2] x_train = add_polynomial_features_extend(data[0], 3) theta_list = [] for planete in cat_unique: verif = y_train == planete y_zero_train = verif.astype(float) mlr = MLR(np.ones(x_train.shape[1] + 1)) mlr.fit_(x_train, y_zero_train, alpha=5e-15, n_cycle=100000, lambda_=lambda_) y_hat = mlr.predict_(x_train) for i, planete_prob in enumerate(y_hat.tolist()): # print(prob[i],"\n", planete_prob) if prob[i] < planete_prob[0]: val[i] = planete prob[i] = planete_prob[0] theta_list.append(mlr.theta) ret_list = [] ret_list.append(val) ret_list.append(theta_list) return ret_list
def train_cat(x, y, cat, theta=None, alpha=0.001, n_cycle=1000): if theta is None: theta = np.ones(x.shape[1] + 1) y_prime = select_category(y, cat) lr = MyLogR(theta, alpha, n_cycle) lr.fit_(x, y_prime) # print(f"theta: {lr.theta}") return lr
def solve_multiclass_classification(self, iterations=500, learning_rate=0.01): classifier = MyLogisticRegression(iterations, learning_rate, multi_class=True) classifier.fit(self.train_inputs, self.train_outputs) computed_test_outputs = classifier.predict(self.test_inputs) print(self.test_outputs) print(computed_test_outputs) self.eval_classification(computed_test_outputs) # sklearn results classifier_sk = linear_model.LogisticRegression(multi_class="ovr") classifier_sk.fit(self.train_inputs, self.train_outputs) computed_test_outputs_sk = classifier_sk.predict(self.test_inputs) print(sklearn.metrics.classification_report(self.test_outputs, computed_test_outputs_sk, target_names=self.output_names))
def solve_binary_classification(self, iterations=100, learning_rate=0.01): nr_features = self.train_inputs[0] if len(nr_features) == 2: plot_classification_data(self.train_inputs, self.train_outputs, self.input_features, self.output_names) classifier = MyLogisticRegression(iterations, learning_rate) classifier.fit(self.train_inputs, self.train_outputs) b = classifier.get_coef() intercept = classifier.get_intercept() f = "f(x) = " + str(intercept) for i in range(len(b)): f += " + " + str(b[i]) + "*x" + str(i + 1) print("model: " + f) computed_test_outputs = classifier.predict(self.test_inputs) if len(nr_features) == 2: plot_predictions(self.test_inputs, self.test_outputs, computed_test_outputs, self.input_features, self.output_names) self.eval_classification(computed_test_outputs) # sklearn results classifier_sk = linear_model.LogisticRegression() classifier_sk.fit(self.train_inputs, self.train_outputs) b = classifier_sk.coef_.tolist()[0] intercept = classifier_sk.intercept_[0] f = "f(x) = " + str(intercept) for i in range(len(b)): f += " + " + str(b[i]) + "*x" + str(i + 1) print("model sk: " + f) computed_test_outputs_sk = classifier_sk.predict(self.test_inputs) print(sklearn.metrics.classification_report(self.test_outputs, computed_test_outputs_sk, target_names=self.output_names))
def one_versus_all_test(data, result): cat_unique = np.unique(data[2]) val = np.ones(data[1].shape[0]) prob = np.zeros(data[1].shape[0]) test = val.astype(float) y_test = data[3] x_test = data[1] for j, planete in enumerate(cat_unique): verif = y_test == planete y_zero_test = verif.astype(float) mlr = MLR(result[j]) # mlr.fit_(x_train, y_zero_train, alpha= 4/1000000, n_cycle=1000000) y_hat = mlr.predict_(x_test) for i, planete_prob in enumerate(y_hat.tolist()): if prob[i] < planete_prob[0]: val[i] = planete prob[i] = planete_prob[0] return val
def ofa(x: np.ndarray, y: np.ndarray, zipcode: float, x_test: np.ndarray, theta) -> np.ndarray: """ One For All Args: x (np.ndarray): shape(m * n) y (np.ndarray): shape(m * 1) zipcode (float): the one zipcode against the all theta: init value for theta Returns: np.ndarray: shape(m_2 * 1), percentage for each citizen of belonging to $zipcode planet None: if shapes doesn't match """ if x.shape[0] != y.shape[0] or y.shape[1] != 1: return None y = (y[...] == zipcode) alpha = 3e-4 max_iter = int(3e+5) lr_model = MyLR(theta, alpha, max_iter) lr_model.fit_(x, y) y_test_hat = lr_model.predict_(x_test) #print(lr_model.theta) return y_test_hat
def test_logistic_regression(): X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]]) Y = np.array([[1], [0], [1]]) # mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], 5e-5, 500000) mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], alpha=0.001, n_cycle=22000) # Example 0: print(mylr.predict_(X), end="\n\n") # Output: # array([[0.99930437], # [1.], # [1.]]) # Example 1: print(mylr.cost_(X, Y), end="\n\n") # Output: # 11.513157421577004 # Example 2: mylr.fit_(X, Y) print(mylr.theta, end="\n\n") # Output: # array([[1.04565272], # [0.62555148], # [0.38387466], # [0.15622435], # [-0.45990099]]) # Example 3: print(mylr.predict_(X), end="\n\n") # Output: # array([[0.72865802], # [0.40550072], # [0.45241588]]) # Example 4: print(mylr.cost_(X, Y), end="\n\n")
# test.py :+: :+: :+: # # +:+ +:+ +:+ # # By: mli <*****@*****.**> +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2020/12/26 18:14:12 by mli #+# #+# # # Updated: 2020/12/26 18:49:06 by mli ### ########.fr # # # # **************************************************************************** # import numpy as np from my_logistic_regression import MyLogisticRegression as MyLR if __name__ == "__main__": X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]]) Y = np.array([[1], [0], [1]]) mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], max_iter=int(2e4)) # Example 0: Y_HAT = mylr.predict_(X) print(Y_HAT) # Output: """ array([[0.99930437], [1. ], [1. ]]) """ # Example 1: print(mylr.cost_(Y, Y_HAT)) # Output: """
census = np.array(dcensus[["height", "weight", "bone_density"]]).reshape(-1, 3) planets = np.array(dplanets[["Origin"]]).reshape(-1, 1) data = data_spliter(census, planets, 0.7) #los 2 sets de datos, test y training, divididos en X e Y xtr_set = adp(data[0][0], 3) ytr_set = data[1][0] xtest_set = adp(data[0][1], 3) ytest_set = data[1][1] theta = np.zeros(xtr_set.shape[1] + 1) #Los zipcodes convertidos a binario, es decir, son iguales a 0 o no xtr_set = regularize(xtr_set) #xtest_set =regularize(xtest_set) for i in range(4): mylr = MyLR(theta, alpha=0.0001, n_cycle=100, penalty=1) mylr.fit_(xtr_set, np.array(ytr_set == i)) if i == 0: pred = np.array(mylr.predict_(xtest_set).reshape(-1, 1)) else: pred = np.c_[pred, mylr.predict_(xtest_set).reshape(-1, 1)] result = [] n_answer = [] for val in pred: result.append(val.argmax()) answer = np.array(result).reshape(-1, 1) for j in range(pred.shape[1]): for i in range(pred.shape[0]): if pred[i][j] > 0.01: n_answer.append(1)
Y = np.array(planets[["Origin"]]).reshape(-1, 1) X = np.array(people[["height", "weight", "bone_density"]]).reshape(-1, 3) #add polynomial features to a degree of 3 X = add_polynomial_features(X, 3) #normalise the data for i in range(X.shape[1]): X[:, i:i + 1] = minmax(X[:, i:i + 1]) #split data into training and testing sets X_train, X_test, Y_train, Y_test = data_splitter(X, Y, 0.5) thetas = np.zeros(X.shape[1] + 1) mlogr = MLogR(thetas, alpha=0.02, n_cycle=2000, penalty='l2') #for each category (0, 1, 2, 3) carry out logistic regression #and concatenate the 4 predicted probability vectors, giving a matrix #with the probablitiy that each element belongs to each category f1_scores = [] for l in range(0, 100, 10): for i in range(4): mlogr.thetas = thetas mlogr.lambda_ = float(l / 10) Y_train_label = select_label(Y_train, i) mlogr.fit_(X_train, Y_train_label) if i == 0: results = mlogr.predict_(X_test).reshape(-1, 1) if i > 0: results = np.concatenate( (results, mlogr.predict_(X_test).reshape(-1, 1)), axis=1)
for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test0 = np.array([1 if i == 0 else 0 for i in y_test]).reshape(-1, 1) y_train1 = np.array([1 if i == 1 else 0 for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test1 = np.array([1 if i == 1 else 0 for i in y_test]).reshape(-1, 1) y_train2 = np.array([1 if i == 2 else 0 for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test2 = np.array([1 if i == 2 else 0 for i in y_test]).reshape(-1, 1) y_train3 = np.array([1 if i == 3 else 0 for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test3 = np.array([1 if i == 3 else 0 for i in y_test]).reshape(-1, 1) mylr0 = MyLR([[-1.32069828], [-1.02177506], [-0.64913889], [-0.06329356]]) # The flying cities of Venus (0) mylr0.fit_(x_train, y_train0) mylr0.alpha = 0.03 mylr0.fit_(x_train, y_train0) mylr0.alpha = 0.3 mylr0.fit_(x_train, y_train0) mylr1 = MyLR([[-1.56373886], [-0.58824757], [0.28303058], [2.20809316]]) # United Nations of Earth (1) mylr1.fit_(x_train, y_train1) mylr1.alpha = 0.03 mylr1.fit_(x_train, y_train1) mylr1.alpha = 0.3 mylr1.fit_(x_train, y_train1) mylr2 = MyLR([[-2.58616195], [0.60780971], [2.8277886],
census = np.array(dcensus[["height", "weight", "bone_density"]]).reshape(-1, 3) planets = np.array(dplanets[["Origin"]]).reshape(-1, 1) data = data_spliter(census, planets, 0.5) #los 2 sets de datos, test y training, divididos en X e Y xtr_set = data[0][0] ytr_set = data[1][0] xtest_set = data[0][1] ytest_set = data[1][1] #Los zipcodes convertidos a binario, es decir, son iguales a 0 o no xtr_set = regularize(xtr_set) #xtest_set =regularize(xtest_set) for i in range(4): mylr = MyLogisticRegression([0, 0, 0, 0], alpha=0.0001, n_cycle=10000) mylr_t = MyLogisticRegression([0, 0, 0, 0], alpha=0.001, n_cycle=10000) mylr.fit_(xtr_set, np.array(ytest_set == i)) if i == 0: pred = np.array(mylr.predict_(xtest_set).reshape(-1, 1)) pred_t = np.array(mylr_t.predict_(xtest_set).reshape(-1, 1)) else: pred = np.c_[pred, mylr.predict_(xtest_set).reshape(-1, 1)] pred_t = np.c_[pred, mylr_t.predict_(xtest_set).reshape(-1, 1)] result = [] for val in pred: result.append(val.argmax()) result_t = [] for val in pred_t: result_t.append(val.argmax())
import pandas as pd from my_logistic_regression import MyLogisticRegression as MyLR import numpy as np import matplotlib.pyplot as plt X = pd.read_csv("solar_system_census.csv") Y = pd.read_csv("solar_system_census_planets.csv") X = np.array(X[['height', 'weight', 'bone_density']]).reshape(-1, 3) Y = np.array(Y['Origin']).reshape(-1, 1) mylr = MyLR([1, 1, 1, 1]) X = mylr.zscore(X) Y = mylr.label(Y, 2) x_train, x_test, y_train, y_test = mylr.data_spliter(X, Y, 0.6) print(mylr.fit_(x_train, y_train)) #print(mylr.predict_(x_train)) print(mylr.cost_(x_train, y_train)) plt.plot(x_train[:, 0], y_train, 'b.') plt.plot(x_train[:, 0], mylr.predict_(x_train), 'g.') plt.show()
y_test0 = np.array([1 if i == 0 else 0 for i in y_test]).reshape(-1, 1) y_train1 = np.array([1 if i == 1 else 0 for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test1 = np.array([1 if i == 1 else 0 for i in y_test]).reshape(-1, 1) y_train2 = np.array([1 if i == 2 else 0 for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test2 = np.array([1 if i == 2 else 0 for i in y_test]).reshape(-1, 1) y_train3 = np.array([1 if i == 3 else 0 for i in y_train]).reshape(-1, 1) #각각의 분류모델 데이터 전처리 y_test3 = np.array([1 if i == 3 else 0 for i in y_test]).reshape(-1, 1) theta = np.array([[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]], dtype=float) mylr0 = MyLR(theta, lambda_=0) # The flying cities of Venus (0) mylr1 = MyLR(theta, lambda_=0) # United Nations of Earth (1) mylr2 = MyLR(theta, lambda_=0) # Mars Republic (2) mylr3 = MyLR(theta, lambda_=0) # The Asteroids’ Belt colonies (3). y_n = [] y_n2 = [] for i in range(10): mylr0.thetas = np.array([[-0.38004857], [0.12257596], [-1.13496089], [0.64144711], [0.13721429], [-0.46771826], [-1.18485222], [-0.46742162], [0.03928006], [-0.1718098]]) mylr0.fit_(x_train_add_poly, y_train0) mylr0.alpha = 0.00003 mylr0.fit_(x_train_add_poly, y_train0) mylr0.alpha = 0.00007 mylr0.fit_(x_train_add_poly, y_train0)
import numpy as np from my_logistic_regression import MyLogisticRegression as MyLR X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]]) Y = np.array([[1], [0], [1]]) mylr = MyLR([[2], [0.5], [7.1], [-4.3], [2.09]]) # Example 0: print(mylr.predict_(X)) # Output: # array([[0.99930437], # [1. ], # [1. ]]) # Example 1: print(mylr.cost_(X, Y)) # Output: # 11.513157421577004 # Example 2: mylr.fit_(X, Y) print(mylr.thetas) # Output: # array([[ 1.04565272], # [ 0.62555148], # [ 0.38387466], # [ 0.15622435], # [-0.45990099]]) # Example 3: print(mylr.predict_(X))
x_test = add_polynomial_features(x_test, 3) # Training print("Train models") thetas = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.] alpha = 1e-4 n_cycle=100000 lambda_ = 0 models = [] for i in range(0, len(zipcodes)): lambda_ += 0.1 print("For zipcode = {}".format(zipcodes[i])) y_train_z = np.array([[1 if y_train[j] == zipcodes[i] else 0 for j, x in enumerate(x_train)]]).T mn = MyLR(thetas=thetas, alpha=alpha, n_cycle=n_cycle, penalty='l2', lambda_=lambda_) mn.fit_(x_train, y_train_z) print("new thetas = {}".format(mn.thetas)) models.append(mn) for i in range(0, len(models)): l = i * 0.1 y_hat = models[i].predict_(x_test) print(f1_score_(y_test, y_hat))
return np.array(new).reshape(-1, 1) planets = pd.read_csv("../subjects/day03/resources/solar_system_census_planets.csv") people = pd.read_csv("../subjects/day03/resources/solar_system_census.csv") origins = np.array(planets[["Origin"]]).reshape(-1,1) X = np.array(people[["height", "weight", "bone_density"]]).reshape(-1, 3) #normalise the data X = np.concatenate((minmax(X[:, :1]), minmax(X[:, 1:2]), minmax(X[:, 2:3])), axis=1) #split data into training and testing sets X_train, X_test, origins_train, origins_test = data_splitter(X, origins, 0.5) thetas = np.zeros(4) mlogr = MLogR(thetas, alpha=0.9, n_cycle=2000) for i in range(4): mlogr.thetas = thetas origins_train_label = select_label(origins_train, i) origins_test_label = select_label(origins_test, i) #mlogr.fit_(X_train, origins_train_label) plt.title(('label: ' + str(i))) mlogr.plot_convergence(X_train, origins_train_label) if i == 0: results = mlogr.predict_(X_test).reshape(-1, 1) if i > 0: results = np.concatenate((results, mlogr.predict_(X_test).reshape(-1, 1)), axis=1) def get_biggest_index(row): biggest = -1 index = -1
import numpy as np from my_logistic_regression import MyLogisticRegression as MyLR X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]]) Y = np.array([[1], [0], [1]]) # mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09]) mylr = MyLR([0., 0., 0., 0., 0.]) # Example 0: print(mylr.predict_(X)) # Output: # array([[0.99930437], [1. ],[1. ]]) # Example 1: print(mylr.cost_(X, Y)) # Output: 11.513157421577004 # Example 2: mylr.fit_(X, Y, alpha=1e-4, n_cycle=50000) print(mylr.thetas) # exit() # Output: # array([[ 1.04565272], [ 0.62555148], [ 0.38387466], [ 0.15622435], [-0.45990099]]) # Example 3: print(mylr.predict_(X)) # Output: # array([[0.72865802], [0.40550072], [0.45241588]]) # Example 4: