def Test2(): dataSet = read_dataset.DataSet('./DataSet/Exams_grad.csv') learningRate = 0.5 numofIteration = 500 samples_values_arr, target_feature_arr = dataSet.getDataSetAsNumPy() target_feature_arr = pd.DataFrame(target_feature_arr) logReg = LogisticRegression(samples_values_arr, target_feature_arr) model_weights, regressionLine = logReg.gradientDescent(initlearningRate=learningRate, numofIteration=numofIteration) plt.figure(1) plt.scatter(samples_values_arr.iloc[:,0], samples_values_arr.iloc[:,1], s=10 * target_feature_arr, label='faulty', marker='x') plt.scatter(samples_values_arr.iloc[:,0], samples_values_arr.iloc[:,1], s=10 * (1 - target_feature_arr), label='good', marker='o') plt.xlabel(dataSet.dataSet.columns.values[0]) plt.ylabel(dataSet.dataSet.columns.values[1]) print(regressionLine) plt.plot(samples_values_arr.iloc[:,0], regressionLine, color='r') plt.title(dataSet.data_set_name + ' data set') # print(logReg.square_error) # plt.figure(2) plt.plot(range(numofIteration), logReg.square_error) plt.xlabel('Num of iterations') plt.ylabel('Square Error') plt.title('Cost Function over iteration') plt.legend() plt.show()
def Test1(): dataSet = read_dataset.DataSet('./DataSet/franchise_rest.csv') learningRate = 0.5 numofIteration = 100 samples_values_arr, target_feature_arr = dataSet.getDataSetAsNumPy() linearReg = LinearRegression(samples_values_arr, target_feature_arr) model_weights, regressionLine = linearReg.gradientDescent( initlearningRate=learningRate, numofIteration=numofIteration) plt.figure(1) plt.scatter(samples_values_arr, target_feature_arr, label=dataSet.data_set_name, marker='x') plt.xlabel(dataSet.dataSet.columns.values[0]) plt.ylabel(dataSet.dataSet.columns.values[-1]) plt.plot(samples_values_arr, regressionLine) plt.title(dataSet.data_set_name + ' data set') print(linearReg.square_error) plt.figure(2) plt.plot(range(numofIteration), linearReg.square_error) plt.xlabel('Num of iterations') plt.ylabel('Square Error') plt.title('Cost Function over iteration') plt.legend() plt.show()
def Test3(): dataSet = read_dataset.DataSet('./DataSet/House_Price.csv') learningRate = 1.5 numofIteration = 100 samples_values_arr, target_feature_arr = dataSet.getDataSetAsNumPy() linearReg = LinearRegression(samples_values_arr, target_feature_arr) model_weights, regressionLine = linearReg.gradientDescent( initlearningRate=learningRate, numofIteration=numofIteration) t_stat, p_values, null_hypothesis = linearReg.stat_significance_calc() print('t-statistic :') print(t_stat) print('P_values :') print(p_values) print('significant impact') print(null_hypothesis) fig = plt.figure() ax = Axes3D(fig) ax.scatter3D(samples_values_arr.iloc[:, 0], samples_values_arr.iloc[:, 1], np.transpose(target_feature_arr), marker='x', label="The actual Values") ax.scatter3D(samples_values_arr.iloc[:, 0], samples_values_arr.iloc[:, 1], regressionLine, marker='o', label="The predicated Values") ax.set_xlabel(dataSet.dataSet.columns.values[0]) ax.set_ylabel(dataSet.dataSet.columns.values[1]) ax.set_zlabel(dataSet.dataSet.columns.values[2]) plt.title(dataSet.data_set_name + ' data set') print(linearReg.square_error) # # plt.figure(2) plt.plot(range(numofIteration), linearReg.square_error) plt.xlabel('Num of iterations') plt.ylabel('Square Error') plt.title('Cost Function over iteration') # # plt.legend() plt.show()