コード例 #1
0
    def setUp(self):

        self.model1 = Model.Model("./data/simple", ".csv")
        self.files = self.model1.set_files_in_directory()
        self.model2 = Model.Model("./data/student", ".csv")
        self.model3 = Model.Model("./datfff", ".csv")
        self.model4 = Model.Model("./data/student", ".txt")
        self.model1out = self.model1.set_dataframes(self.files)
        self.model2out = self.model2.set_dataframes(self.files)
        self.regression1 = Regression.Regression(self.model1out)
        self.regression2 = Regression.Regression(self.model2)
コード例 #2
0
def main():

    new_model = Model.Model("./data/simple", ".csv")

    files = new_model.set_files_in_directory()

    dic = new_model.set_dataframes(files)

    new_reg = rg.Regression(dic)

    training_data = new_reg.split_data()[0]

    column_names = new_reg.get_columnNames(training_data)

    ind, dep = new_reg.get_data(columns_names=column_names,
                                training_data=training_data)

    lr = new_reg.run(training_data)

    if lr.__class__.__name__ == "UnivariateLR":
        m, b = lr.run()
        print(m, b)
        y_hat = lr.predict(m, b)
        print(lr.evaluate_model(ind, y_hat))
        m, b = lr.get_params_history()
        lr.plot_history_m(m)

    elif lr.__class__.__name__ == "MultivariateLR":
        B, cost_history = lr.run()
        y_hat = lr.predict(B)
        print(lr.evaluate_model(dep, y_hat))
        lr.plot_cost(cost_history)
コード例 #3
0
def main(Expected1, Expected2, Dispersion1, Dispersion2, Number1, Number2,
         accuracy):
    fig = pb.figure()
    data, axes = support.gen_data1(Expected1, Expected2, Dispersion1,
                                   Dispersion2, Number1, Number2, fig)
    l_regression = reg.LogisticRegression()
    l_regression.fit(data)

    weights_by_grad = np.zeros(Expected1.shape[0] + 1)
    weights_by_grad, N = l_regression.find_weights(weights_by_grad, accuracy)

    weights_by_scipy = np.zeros(Expected1.shape[0] + 1)
    weights_by_scipy = minimize(l_regression.Q,
                                weights_by_scipy,
                                method='nelder-mead')

    norma2 = map(lambda x: x * x, weights_by_scipy.x)
    norma2 = math.sqrt(reduce(lambda x, y: x + y, norma2))
    weights_by_scipy.x /= norma2

    #print weights_by_grad, weights_by_scipy.x

    support.draw(weights_by_grad, 'black', axes)
    support.draw(weights_by_scipy.x, 'yellow', axes)
    pb.show()
    return N
コード例 #4
0
def main():
    X_train, y_train, X_test, y_test = reg.trainAndTestData()

    #Gauss Bayes
    gB = GaussBayes()
    gB.fit(X_train, y_train)  # "Bernoulli"
    y_hatgB = gB.predict(X_test)
    y_hat = gB.predict(X_train)
    accgb = accuracy(y_hatgB, y_test)

    #Gauss Naive Bayes
    gNB = GaussNB()
    gNB.fit(X_train, y_train)
    y_hatgNB = gNB.predict(X_test)
    accgNB = accuracy(y_hatgNB, y_test)

    #Gauss Bernoulli
    gBern = GenBayes()
    gBern.fit(X_train, y_train, "Bernoulli")
    y_hatBern = gBern.predict(X_test, "Bernoulli")
    accBern = accuracy(y_hatBern, y_test)

    data = {
        "Gauss Bayes": accgb,
        "Gauss Naive Bayes": accgNB,
        "Bernoulli": accBern
    }

    df = pd.DataFrame(
        data,
        columns=["Gauss Bayes", "Gauss Naive Bayes", "Bernoulli"],
        index=[0])

    #Graph of y_train
    fig = plt.figure(figsize=(15, 10))
    ax = fig.add_subplot(111)
    #cax = ax.scatter(pd.to_numeric(df.sqrt_ft), df.price_per_sqft, c = df.bathrooms, cmap='tab20c')
    cax = ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='tab20c')
    #plt.xlim(-1,30)
    #plt.ylim(-1,15000)
    plt.xlabel("sqrt_ft")
    plt.ylabel("price_per_sqft")
    plt.title("HOA based on sqrt_ft and price_per_sqft")
    fig.colorbar(cax)
    plt.show()

    #Graph of y_hat
    fig = plt.figure(figsize=(15, 10))
    ax = fig.add_subplot(111)
    #cax = ax.scatter(pd.to_numeric(df.sqrt_ft), df.price_per_sqft, c = df.bathrooms, cmap='tab20c')
    cax = ax.scatter(X_train[:, 0], X_train[:, 1], c=y_hat, cmap='tab20c')
    #plt.xlim(-1,30)
    #plt.ylim(-1,15000)
    plt.xlabel("sqrt_ft")
    plt.ylabel("price_per_sqft")
    plt.title("HOA based on sqrt_ft and price_per_sqft")
    fig.colorbar(cax)
    plt.show()

    return df
コード例 #5
0
ファイル: GUI.py プロジェクト: AyatsujiP/AKIHA
    def __init__(self, master=None):
        self.ver = master.ver
        self.myFont = settings.FONT
        self.picDir = settings.PICTURE_DIR
        self.ansText = u"Ranking:\n"
        self.seihekiText = u""

        tkinter.Frame.__init__(self, master)
        self.ans = tkinter.BooleanVar()
        self.ans.set(True)
        self.ansDialog = False
        self.array = []
        self.ansArray = []
        self.nextText = []
        self.pack()
        self.makeWidget()
        self.alignWidget()
        self.idolsContainer = ac.IdolsContainer()

        self.tmpCont = MergeSort.readTable(settings.SORT_FILE_NAME)
        self.tmpCont.shuffle()
        self.sugCont = MergeSort.readTable(settings.SUGGEST_FILE_NAME)
        self.nameArray = self.tmpCont.returnNameArray()
        self.setNameArray(self.nameArray)
        self.reg = Regression.RegressionClass()
        self.sugRet = False
コード例 #6
0
 def data_shape_test(self):
     with self.assertRaises(Exception) as context:
         beta, error, CL, CR = Regression.linearRegression(X, Y)
     self.assertTrue("X and Y must have equal shape!" in context.exception)
     self.assertEqual(beta, -1)
     self.assertEqual(error, -1)
     self.assertEqual(CL, -1)
     self.assertEqual(CR, -1)
コード例 #7
0
 def __init__(self,
              dtype="float64",
              learning_rate=0.01,
              iters=300,
              normalize=False,
              copy_X=True,
              method='normal',
              alpha=0.1,
              batch_size=32,
              tolerance=1e-07,
              is_shuffle=True,
              random_state=42,
              metric='mse'):
     # invoking the __init__ of the Optimization class
     Regression.__init__(self, dtype, learning_rate, iters, normalize,
                         copy_X, method, alpha, batch_size, tolerance,
                         is_shuffle, random_state, metric)
コード例 #8
0
 def empty_test(self):
     with self.assertRaises(Exception) as context:
         beta, error, CL, CR = Regression.linearRegression(X, Y)
     self.assertTrue("X or Y should not be Empty!" in context.exception)
     self.assertEqual(beta, -1)
     self.assertEqual(error, -1)
     self.assertEqual(CL, -1)
     self.assertEqual(CR, -1)
コード例 #9
0
 def parameter_type_test(self):
     with self.assertRaises(Exception) as context:
         beta, error, CL, CR = Regression.linearRegression(X, Y)
     self.assertTrue("X and Y must be numpy arrays or python lists!" in
                     context.exception)
     self.assertEqual(beta, -1)
     self.assertEqual(error, -1)
     self.assertEqual(CL, -1)
     self.assertEqual(CR, -1)
コード例 #10
0
    def get_model(model, type, params):
        if model == 'class':

            if type == 'tree':
                if params is not None:
                    model = Classifier.DecisionTreeClassifier(
                        max_depth=params['max_depth'],
                        min_samples_split=params['min_samples_split'])
                else:
                    model = Classifier.DecisionTreeClassifier()
                    print(model)
                    print('here')

            else:
                if params is not None:
                    model = Classifier.RandomForestClassifier(
                        max_depth=params['max_depth'],
                        max_features=params['max_features'],
                        n_trees=params['n_trees'],
                        min_samples_split=params['min_samples_split'])
                else:
                    model = Classifier.RandomForestClassifier()

        else:

            if args.type == 'tree':
                if params is not None:
                    model = Regression.DecisionTreeRegressor(
                        max_depth=params['max_depth'],
                        min_samples_split=params['min_samples_split'])
                else:
                    model = Regression.DecisionTreeRegressor()

            else:
                if params is not None:
                    model = Regression.RandomForestRegressor(
                        max_depth=params['max_depth'],
                        max_features=params['max_features'],
                        n_trees=params['n_trees'],
                        min_samples_split=params['min_samples_split'])
                else:
                    model = Regression.RandomForestRegressor()
        print(model)
        return model
コード例 #11
0
ファイル: GUI.py プロジェクト: AyatsujiP/AKIHA
    def nextCommand(self):
        """
		GUIの状態を変えるための関数。
		"""
        self.ansArray.append(self.ans.get())
        #次にどの2人を比較するかを決定する。
        self.nextText = MergeSort.mergeWithoutRecWithAns(
            self.array, self.ansArray)
        #もし2人が返ってきた場合は、画像を表示する。
        if len(self.nextText) == 2:
            self.imageConfig()
        else:
            #そうでない場合(終了の場合)は、結果を表示する。
            for i in range(0, len(self.nextText)):
                self.ansText = self.ansText + u"\tNo. %d:\t%s\n" % (
                    i + 1, self.nextText[i])

            #文末の改行コードを消して、結果をログに保存
            logging.info(self.ansText.rstrip(u"\n"))

            for a in self.nextText:
                #マージソートされた結果が返却されるため、その順序を保持して新しいコンテナに格納する。
                self.idolsContainer.appendIdol(
                    self.tmpCont.returnIdolByName(a))
            self.nextButton.configure(state=tkinter.DISABLED)

            #回帰分析用のインスタンスに登録
            self.reg.register(self.idolsContainer.returnContainer())
            self.reg.normalizeCoef()

            #回帰分析の実行
            regAns = self.reg.regression()
            self.seihekiText = Regression.seihekiChecker(regAns)

            #ログに係数を出力
            logging.info(self.seihekiText)

            self.sugText = self.reg.returnPredict(
                self.sugCont.returnContainer())
            #メッセージウィンドウを出す
            self.messageWindow()

            #新しい画面を出す
            if self.ansDialog == True:
                self.sugWindow = SugWindow(master=self,
                                           picDir=self.picDir,
                                           sugCont=self.sugCont,
                                           sugText=self.sugText,
                                           myFont=self.myFont,
                                           addText=self.addText,
                                           seihekiText=self.seihekiText)
                self.sugWindow.mainloop()
コード例 #12
0
def cross_validation(X, y, n_folds, method='ols', lambda_=0.01):

    if len(y.shape) > 1:
        y = np.ravel(y)

    kf = KFold(n_splits=n_folds, random_state=0, shuffle=True)

    mse = np.zeros((n_folds, 2))
    r2 = np.zeros((n_folds, 2))
    b = np.zeros((n_folds, 2))
    var = np.zeros((n_folds, 2))

    
    i = 0
    for train_index, val_index in kf.split(X):
        model = Regression(method, lambda_)
        model.fit(X[train_index], y[train_index])

        model.predict(X[train_index])
        y_pred_train = model.y_pred
        
        model.predict(X[val_index])
        y_pred_test = model.y_pred


        mse[i][0] = mean_squared_error(y[train_index], y_pred_train)
        mse[i][1] = mean_squared_error(y[val_index], y_pred_test)
        r2[i][0] = r2_score(y[train_index], y_pred_train)
        r2[i][1] = r2_score(y[val_index], y_pred_test)
        b[i][0] = bias(y[train_index], y_pred_train)
        b[i][1] = bias(y[val_index], y_pred_test)
        var[i][0] = np.var(y_pred_train)
        var[i][1] = np.var(y_pred_test)

        i += 1



    mse_train = np.mean(mse[:,0])
    mse_test = np.mean(mse[:,1])
    r2_train = np.mean(r2[:,0])
    r2_test = np.mean(r2[:,1])
    b_train = np.mean(b[:,0])
    b_test = np.mean(b[:,1])
    var_train = np.mean(var[:,0])
    var_test = np.mean(var[:,1])


    return mse_train, mse_test, r2_train, r2_test, b_train, b_test, var_train, var_test
コード例 #13
0
def analyze_regression(x1, x2, y, method='ols', n_folds=5, data_name='data'):

    max_degree = 20
    n_lambdas = 9
    lambdas = np.logspace(-3, 3, n_lambdas)

    error_scores = pd.DataFrame(columns=['degree', 'lambda', 'MSE_train',
        'MSE_test', 'R2_train', 'R2_test', 'bias_train', 'bias_test',
        'var_train', 'var_test'])

    if method=='ols':
        lambdas = [0]
    
    filename = 'error_scores_' + data_name + '_' + method


    if n_folds > 1: 
        filename += '_cv'

    for lambda_ in lambdas:
        for deg in range(1, max_degree+1):
            X = create_design_matrix(x1, x2, deg=deg)

            if n_folds > 1:
                mse_train, mse_test, r2_train, r2_test, bias_train, bias_test, var_train, var_test = cross_validation(X, y, n_folds, method, lambda_)
                
                
            else:
                model = Regression(method, lambda_=lambda_)
                model.fit(X, y)
                model.predict(X)
                mse_train = mean_squared_error(model.y, model.y_pred)
                r2_train = r2_score(model.y, model.y_pred)
                bias_train = bias(model.y, model.y_pred)
                var_train = np.var(model.y_pred)
                mse_test = None
                r2_test = None
                bias_test = None
                var_test = None



            error_scores = error_scores.append({'degree': deg, 
                                                'lambda': lambda_, 
                                                'MSE_train': mse_train, 
                                                'MSE_test': mse_test,
                                                'R2_train': r2_train, 
                                                'R2_test': r2_test,
                                                'bias_train': bias_train,
                                                'bias_test': bias_test,
                                                'var_train': var_train,
                                                'var_test': var_test},
                                                ignore_index=True)

    

    print(error_scores)
    error_scores.to_csv(filename + '.csv')
コード例 #14
0
def kFoldErrorChoose(x,y,maxOrder,k): 
	e = [0 for i in range(0,maxOrder)]
	d = kSplit([x,y],k)
#	pdb.set_trace()
	for order in range(1,maxOrder+1):
		sumError = 0
		for i in range(0,k): #The current partition to use: the ith partition is used as test data.
			Dcopy = copy.copy(d)
			dtest = Dcopy.pop(i)
			dtrain = Dcopy[0]
			f = Regression.polyTrain(dtrain[0],dtrain[1],order)
			sumError += meanSquaredError(dtest[0],dtest[1],f)
		e[order-1] = sumError/(k * 1.0)
	return min(e[i] for i in range(0,len(e))),(argmin(e)+1)
コード例 #15
0
ファイル: app.py プロジェクト: dtapian94/numericalMethods
 def build(self):
     root = ScreenManager()
     root.transition = SwapTransition()
     root.add_widget(MainMenu())
     root.add_widget(
         bm.BracketMethods(screenManager=root, name='bracket_methods'))
     root.add_widget(om.OpenMethods(screenManager=root,
                                    name='open_methods'))
     root.add_widget(
         soe.SystemOfEquations(screenManager=root, name='system_equations'))
     root.add_widget(
         ip.Interpolation(screenManager=root, name='interpolation'))
     root.add_widget(rg.Regression(screenManager=root, name='regression'))
     return root
コード例 #16
0
ファイル: MainUI.py プロジェクト: picuzzo2/CSCAMP
def on_click():
    fixed_acidity = var1.get()
    volatile_acidity = var2.get()
    citric_acid = var3.get()
    residual_sugar = var4.get()
    chlorides = var5.get()
    free_sulfur_dioxide = var6.get()
    total_sulfur_dioxide = var7.get()
    sulphates = var8.get()
    alcohol = var9.get()

    if is_float(fixed_acidity) != True or is_float(
            volatile_acidity
    ) != True or is_float(citric_acid) != True or is_float(
            residual_sugar) != True or is_float(chlorides) != True or is_float(
                free_sulfur_dioxide) != True or is_float(
                    total_sulfur_dioxide) != True or is_float(
                        sulphates) != True or is_float(alcohol) != True:
        messagebox.showerror("Error", "Float or Integer only")
    else:
        data = [
            float(fixed_acidity),
            float(volatile_acidity),
            float(citric_acid),
            float(residual_sugar),
            float(chlorides),
            float(free_sulfur_dioxide),
            float(total_sulfur_dioxide),
            float(sulphates),
            float(alcohol)
        ]
        with open('collectedData.csv', 'w', newline='') as f:
            #fieldnames = ['colum1','colum3','colum2']
            thewriter = csv.writer(f)
            thewriter.writerow([
                'fixed acidity', 'volatile acidity', 'citric acid',
                'residual sugar', 'chlorides', 'free sulfur dioxide',
                'total sulfur dioxide', 'sulphates', 'alcohol'
            ])
            thewriter.writerow(data)
        predicted = Regression.predicted()
        # print(predicted)
        messagebox.showinfo("Quality:", predicted[0])
コード例 #17
0
ファイル: __init__.py プロジェクト: miemieyanglove/MLStudy
# Regression.lwlr(dataMat[0], dataMat, labelMat, 1.0)

# yHat = Regression.lwlrTest(dataMat, dataMat, labelMat, 0.01)

# Regression.plotLwlr(dataMat, labelMat, yHat)
'''

# Reduce coefficient
'''
# dataMat, labelMat = Regression.loadDataSet("E:/TestDatas/MachineLearningInAction/Ch08/abalone.txt")

# ridgeWeights = Regression.ridgeTest(dataMat, labelMat)

# returnMat = Regression.stageWise(dataMat, labelMat, 0.005, 1000)

# Regression.plotParamTrend(returnMat)
'''

# LEGO

# Regression.legoDataCollect("E:/TestDatas/MachineLearningInAction/Ch08/lego/")

dataArr, labelArr = Regression.loadDataSet("E:/TestDatas/MachineLearningInAction/Ch08/lego/legoData.txt")

# ws = Regression.legoStandRegres(dataArr, labelArr)

Regression.crossValidation(dataArr, labelArr, 10)

ridgeWeights = Regression.stageWise(dataArr, labelArr)

コード例 #18
0
ファイル: Demo.py プロジェクト: lmc2179/Polynomial-Regression
	raise Exception("Invalid command line argument")


#In the following, D is the data set which has all the x values as its first entry and the y values as its second.

error,order = CV.kFoldErrorChoose(D[0],D[1],10,5)

#Graph the points on the base polynomial
Graph.lineColor(D[0],D[1],'red')

#Add Gaussian noise to the data outputs
D[1] = Data.addGaussianNoise(D[1],1.0/2000)

#Graph them as points in blue
Graph.pointsSimple(D[0],D[1])

#Estimate the coefficients of the polynomial with best order
fit = Regression.polyTrain(D[0],D[1],order)

#Get the function's estimates for the training x values
z = [fit(i) for i in D[0]]

#Graph the points
Graph.lineColor(D[0],z,'g')

#Show the plot
Graph.show()

if(len(sys.argv) == 1):
	print "True function was an order " + str(trueOrder) + " polynomial, fit with order " + str(order)
コード例 #19
0
from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
#initialize LR model
LRModel = reg.LinearRegression()

#initilizse the RR model
RRModel = reg.RidgeRegression()
RRModel.set_params(alpha=alpha)

#put both models into a list
models = [LRModel, RRModel]

#initialize empty list to store the scores of the models
score = []

#iterate over the models
for model in models:
    model.fit(X_train, y_train)
    score.append(model.score(X_test, y_test))
    print(model.params)

#print the computed scores for the different models in nice format
コード例 #20
0
# used to remove car names from data array
cols_rmv = [8]

# represents the data split (traingin, validation)
size = [.75, .25]

split_selection = list()

runs = 15
for x in range(0, runs):
    split_selection.append(size)

print('Number of Runs: ', runs)
print("Data Split: ", split_selection[0])

# get the data using data cleaner
# returns a 2D array where rows are observations and columns
# are attributes of a specific observations
data_array = DataCleaner.data_cleaner("CarData.txt")

# used to do Linear Regression.
# Arguments are:
#               data_array: The data array created with DataCleaner
#               imputation: The users choice of imputation
#               cont_dis: The array that represents which cols/attributes are continuous or discrete(0,1)
#               cols_rmv: The columns the user would like to be removed from the data set here it is the car_name
#               bad data signal: This will be used to determine if and what data points are missing
#               split_selection: Array controlling how many tests are run and the split between test and validation sets
Regression.perform_regression(list(data_array), imputation, cont_dis, cols_rmv,
                              '?', 0, split_selection)
コード例 #21
0
# plt.axvline(x= (noms(w_0)/(1000*2*np.pi))+(noms(w_pl)/(1000*2*np.pi)) , color='r', linestyle='--', label=r'Omega 0')
# plt.axvline(x= (noms(w_0)/(1000*2*np.pi))+(noms(w_mi)/(1000*2*np.pi)) , color='g', linestyle='--', label=r'Omega 0')
plt.ylabel(r'$U_c/U_er $')
plt.xlabel(r'$v \:/\: \si{\kilo\hertz}$')
plt.legend(loc='best')
plt.tight_layout(pad=0, h_pad=1.08, w_pad=1.08)
plt.savefig('build/U_gegen_v.pdf')
plt.clf()

#Zoom
Indem_max = np.argmax(U_c_ges/U_er_ges)

# positive Flanke
params = ucurve_fit(reg.reg_linear, noms(fre_ges[Indem_max-6:Indem_max-1]) ,  noms(U_c_ges[Indem_max-6:Indem_max-1]/U_er_ges[Indem_max-6:Indem_max-1]))
t_plot = np.linspace(32.5, 33.95, 2)
plt.plot(t_plot, reg.reg_linear(t_plot, *noms(params)), 'b-', label='$Fit_\t{1}$')


X = np.array([((np.amax(U_c_ges/U_er_ges))/unp.sqrt(2) - params[1]) / params[0]])

print('X')
print(X)

# negative Flanke
params = ucurve_fit(reg.reg_linear, noms(fre_ges[Indem_max+1:Indem_max+6]) , noms(U_c_ges[Indem_max+1:Indem_max+6]/U_er_ges[Indem_max+1:Indem_max+6]))
t_plot = np.linspace(33.95, 35, 2)
plt.plot(t_plot, reg.reg_linear(t_plot, *noms(params)), 'y-', label='$Fit_\t{2}$')

Y = np.array([((np.amax(U_c_ges/U_er_ges)/unp.sqrt(2)) - params[1]) / params[0]])
print('Y')
print(Y)
コード例 #22
0
#
# plt.plot(t_plot, reg.reg_linear(t_plot, *noms(params)), 'b-', label='Fit')
# plt.xlim(t_plot[0], t_plot[-1])
# # plt.xlabel(r'$t \:/\: \si{\milli\second}$')
# # plt.ylabel(r'$U \:/\: \si{\kilo\volt}$')
# plt.legend(loc='best')
# plt.tight_layout(pad=0, h_pad=1.08, w_pad=1.08)
# plt.savefig('build/test-plot.pdf')

# Ablesen der Grenzfrequenzen und Umrechnen
f_gr = 9
f_gr1 = 5.60
f_gr2 = 14.73
f_gr3 = 18.15

f_gr = np.exp(reg.reg_linear(f_gr, noms(m1), noms(b1)))
f_gr1 = np.exp(reg.reg_linear(f_gr1, noms(m2), noms(b2)))
f_gr2 = np.exp(reg.reg_linear(f_gr2, noms(m2), noms(b2)))
f_gr3 = np.exp(reg.reg_linear(f_gr3, noms(m2), noms(b2)))

write('build/Z_w_gr.tex', make_SI(Wellenwiderstand(2*np.pi*f_gr), r'\ohm', figures=0))
write('build/f_mess.tex', make_SI(f_gr*1e-3, r'\kilo\hertz', 'e-3',figures=1))
write('build/f1_mess.tex', make_SI(f_gr1*1e-3, r'\kilo\hertz', 'e-3',figures=1))
write('build/f2_mess.tex', make_SI(f_gr2*1e-3, r'\kilo\hertz', 'e-3',figures=1))
write('build/f3_mess.tex', make_SI(f_gr3*1e-3, r'\kilo\hertz', 'e-3',figures=1))

# Theoriewerte der Grenzfrequenzen
w_th = 2 / np.sqrt(L*C1)
w1_th = np.sqrt(2/(L*C1))
w2_th = np.sqrt(2/(L*C2))
w3_th = np.sqrt( 2/L * (C1+C2)/(C1*C2) )
コード例 #23
0
    'build/Tabelle_Verdampfungskurve.tex',
    [],
    [r'$T \:/\: \si{\kelvin}$',
    r'$p \:/\: \si{\bar}$',
    r'$\frac{1}{T} \:/\: 10^{-3}\si{\per\kelvin}$',
    r'$\ln{(p/\si{\pascal})}$']))

# Fit Verdampfungskurve
params = ucurve_fit(reg.reg_linear, 1/T1, np.log(p1), p0=[-1, 1])
m1, b1 = params
write('build/m1.tex', make_SI(m1, r'\kelvin', '', 1))   # 1 signifikante Stelle
write('build/b1.tex', make_SI(b1, r'', '', 1))   # 1 signifikante Stelle

# Plot ln(p) vs 1/T  -> Verdampfungskurve
T_plot = np.linspace(np.amin(1/T1), np.amax(1/T1), 100)
plt.plot(T_plot*1e3, reg.reg_linear(T_plot, *noms(params)), 'b-', label='Fit')
plt.plot(1/T1*1e3, np.log(p1), '.r', label='Messdaten')
plt.xlim(1e3*(T_plot[0]-1/np.size(T1)*(T_plot[-1]-T_plot[0])), 1e3*(T_plot[-1]+1/np.size(T1)*(T_plot[-1]-T_plot[0])))
plt.xlabel(r'$T^{-1} \:/\: 10^{-3}\si{\per\kelvin}$')
plt.ylabel(r'$\ln(p / \si{\pascal})$')
plt.legend(loc='best')
plt.tight_layout(pad=0, h_pad=1.08, w_pad=1.08)
plt.savefig('build/Verdampfungskurve.pdf')

R = const.physical_constants["molar gas constant"]      # value, unit, error
R_unc = ufloat(R[0],R[2])
write('build/R.tex', make_SI(R_unc, r'\joule\per\mol\per\kelvin'))
L1 = -R_unc * m1
write('build/L.tex', make_SI(L1*1e-3, r'\kilo\joule\per\mol', '', 1))      # eine signifikante Stelle

#####################################################################################################################################################
コード例 #24
0
########DEFINE COLLECTION FIELDS##########
print('DATA_COLLECTION_BEGIN')
inputPeriods = [5, 10, 20, 50, 100, 200]
pastReturnPeriods = [1, 2, 5, 10, 20, 50, 100]
retPeriods = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50]
adjClose = ifld.AdjClose()
longVolume = ifld.SMA(100, ifld.AdjVolume())
collectionFields = []

#import random
#randomSymbols = random.sample(list(stockData.index.get_level_values('Symbol').unique()),2)
#stockData = stockData[stockData.index.get_level_values('Symbol').isin(randomSymbols)]

linRegressions = []
for period in inputPeriods:
    linearReg = reg.Regression(period, adjClose)
    linRegressions.append(linearReg)
    collectionFields.extend(linearReg.getRegFieldsList())
    sdPeriod = ifld.SD(period, ifld.PcntChange(1, False, adjClose),
                       'SD_PCNT_' + str(period))
    rollingMin = ifld.RollingMin(period, adjClose)
    rollingMax = ifld.RollingMax(period, adjClose)
    minDuration = ifld.ExtremeDuration(period, adjClose, False,
                                       'Min_Duration_' + str(period))
    maxDuration = ifld.ExtremeDuration(period, adjClose, True,
                                       'Max_Duration_' + str(period))
    minDurationLag = ifld.Lag(minDuration, 1,
                              'Min_Duration_' + str(period) + '_Lag')
    maxDurationLag = ifld.Lag(maxDuration, 1,
                              'Max_Duration_' + str(period) + '_Lag')
    retracedFromHigh = ifld.Divide(ifld.RetracementPcnt(period, True),
コード例 #25
0
from pylab import *
from numpy import *
from Regression import *

Reg = Regression()

"""Load in data and calculate the split ratio"""
data = loadtxt('Q1.data')
p = 13

"""Shuffle Data"""
data = data.reshape(-1,p+1)
order = range(shape(data)[0])
random.shuffle(order)
data = data[order,:]
split = int(len(data)*.66)

covX = cov(transpose(data)) 
sdX = sqrt(diag(covX))
for i in range(p+1):    
    data[:,i] = data[:,i]/sdX[i]
  

traindata = data[0:split,:] 
testdata = data[split:len(data),:]

"""Response splitting"""
ytrain = traindata[:,p] 
ytrain = transpose(matrix(ytrain))
N = len(ytrain) 
ytest = testdata[:,p] 
コード例 #26
0
from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
rdg_regress = reg.RidgeRegression()
rdg_regress.set_params(alpha=alpha)
models = [reg.LinearRegression(), rdg_regress]

model_scores = {}
model_params = {}

for model in models:
    model.fit(X_train, y_train)
    model_scores[model.__class__.__name__] = model.score(X_test, y_test)
    model_params[model.__class__.__name__] = model.get_params()
    print("The model is : {}. The R-square value in the test dataset is : {}.".
          format(model.__class__.__name__, model.score(X_test, y_test)))

best_model = max(model_scores, key=model_scores.get)
print("The best model is : {} \nParameters are : \n{}".format(
    best_model, model_params[best_model]))
コード例 #27
0
# write('build/D2.tex', make_SI(D2*1e5, r'\kilogram\square\meter\per\square\second', 'e-5', figures=1))

y = 4*np.pi**2*(Theta_Kugel+Theta_Aufhaengung)/T**2
params = ucurve_fit(reg.reg_linear, B, y)         # linearer Fit
m, D = params
write('build/m.tex', make_SI(m*1e3, r'\ampere\square\meter', 'e-3', figures=1))
write('build/D.tex', make_SI(D*1e5, r'\kilogram\square\meter\per\square\second', 'e-5', figures=1))
# D = 4*(np.pi**2)*(Theta_Kugel+Theta_Aufhaengung)/(T**2)
m_th = 1/B * (4*(np.pi**2) * (Theta_Kugel+Theta_Aufhaengung) / T**2 - D_ohneB)
m_th_unc = ufloat(np.mean(noms(m_th)), MeanError(noms(m_th)))
write('build/m_th.tex', make_SI(m_th_unc*1e3, r'\ampere\square\meter', 'e-3', figures=1))
# print(m_th)

t_plot = np.linspace(np.amin(B), np.amax(B), 100)
#
plt.plot(t_plot*1e3, reg.reg_linear(t_plot, *noms(params))*1e5, 'b-', label='Methode 1')
plt.plot(t_plot*1e3, reg.reg_linear(t_plot, np.mean(noms(m_th)), np.mean(noms(D)))*1e5, 'g-', label='Methode 2')
# plt.plot(B * 1e3, noms(y)*1e5, 'rx', label='Messdaten')
plt.errorbar(B * 1e3, noms(y) * 1e5, fmt='r.', yerr=stds(y) * 1e5, label='Messdaten')
## plt.xscale('log')    # logarithmische x-Achse
plt.xlim((t_plot[0]-1/np.size(B)*(t_plot[-1]-t_plot[0]))*1e3, (t_plot[-1]+1/np.size(B)*(t_plot[-1]-t_plot[0]))*1e3)
plt.xlabel(r'$B \:/\: \SI{e-3}{\tesla}$')
plt.ylabel(r'$\frac{4\pi^2 \Theta_\text{Gesamt}}{T^2} \:/\: \SI{e-5}{\kilogram\square\meter\per\square\second}$')
plt.legend(loc='best')
plt.tight_layout(pad=0, h_pad=1.08, w_pad=1.08)
plt.savefig('build/zeta.pdf')

# Berechnung des Erdmagnetfeldes
B_Erde = (D_mitB-D_ohneB)/m
write('build/B_Erde.tex', make_SI(B_Erde*1e6, r'\micro\tesla', figures=1))
コード例 #28
0
print(stds(Falldauer_unc))
# Plot
# plt.plot(1/T_2*1e3, noms(eta_gr_b_log), 'rx', label='Messdaten')
# plt.plot(t * 1e3, U * 1e3, 'rx', label='Messdaten')
plt.errorbar(1/T_2*1e3, noms(eta_gr_b_log), yerr=stds(eta_gr_b_log),fmt='r.', label='Messdaten')
# plt.xscale('log')    # logarithmische x-Achse
# plt.xlim(t_plot[0] * 1e3, t_plot[-1] * 1e3)

# t_plot = np.linspace(np.amin(1/T_2), np.amax(1/T_2), 10)
t_plot = np.linspace(0.003, 0.00345, 10)
# plt.xlim(t_plot[0]-1/np.size(T_2)*(t_plot[-1]-t_plot[0]), t_plot[-1]+1/np.size(T_2)*(t_plot[-1]-t_plot[0]))
#
print('Max')
print(np.amin(1/T_2), np.amax(1/T_2))

plt.plot(t_plot * 1e3, reg.reg_linear(t_plot, *noms(params)), 'b-', label='Fit')

plt.xlabel(r'$\frac{1}{T} \:/\: \SI{e-3}{\per\kelvin}$')
plt.ylabel(r'$\text{ln}\left(\frac{\eta}{\si{\kilogram\meter\per\second}}\right)$')
plt.legend(loc='best')
plt.tight_layout(pad=0, h_pad=1.08, w_pad=1.08)
plt.savefig('build/Plot1.pdf')

print(Falldauer_roh[1])
write('build/Tabelle_b_1.tex', make_table([T[1:], Falldauer_roh[1:]],[0, 0]))
# FULLTABLE
write('build/Tabelle_b_1_texformat.tex', make_full_table(
    'Messdaten Falldauer in Abhängigkeit der Temperatur.',
    'table:b_1',
    'build/Tabelle_b_1.tex',
    [],              # Hier aufpassen: diese Zahlen bezeichnen diejenigen resultierenden Spaltennummern,
コード例 #29
0
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
import matplotlib.pyplot as plt

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'], 
                                                    dataset['target'], 
                                                    test_size=0.2, 
                                                    random_state=42)


alpha_list = np.arange(0.05,1,0.09)
score_list = []
for a in alpha_list:
    #model = reg.RidgeRegression(alpha)
    model = reg.RidgeRegression(0.1)
    model.set_params(alpha = a)
    
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    score = model.score(X_test,y_test)
    score_list.append(score)
    
plt.plot(alpha_list,score_list,label = 'Ridge Regression')

score_list_l = []
for alpha in alpha_list:
    model = reg.LinearRegression()
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    score = model.score(X_test,y_test)
コード例 #30
0
plt.axhline(y=85, color='k', linestyle='--', label='85%')
plt.xticks(np.arange(1, features + 1, 1))

plt.xlabel('Number of Components')
plt.ylabel('Variance Explained')
plt.legend()

plt.show()

#####################

#####################
## Linear Regression
#####################

linear = R.LinearRegression()
X_train, X_test, y_train, y_test = data.getSplitData()
linear.train(features,
             X_train,
             X_test,
             y_train,
             y_test,
             n_jobs=1,
             verbose=True,
             startIndex=1)
linear.fit(X, y)
#func = linear.function(columnNames=['D','E', 'F', 'G', 'L', 'P', 'U', 'AA', 'AB', 'AD'], featureStartIndex = 3)
#func = linear.function(columnNames=['D','E', 'F', 'G', 'P','W','X','Y','AA', 'AB', 'AD'], featureStartIndex = 3)

linear.function(columnNames=[
    feature_columns[letter - ord('A')]
コード例 #31
0
    plt.figure(1)
    plt.subplot(221)
    plt.plot(shortData['beta0'])
    plt.title('Beta 0')
    plt.subplot(222)
    plt.plot(shortData['CNBrepo'])
    plt.title('CNB repo rate')
    plt.subplot(223)
    plt.plot(shortData['y10Yforecast'])
    plt.title('10Y Yield forecast')
    plt.subplot(224)
    plt.plot(shortData['PriborSpread'])
    plt.title('Implied forward 1Y')
    plt.show()

    ols = Regression.EstimateOLS(shortData,
                                 'beta0 ~ y10Yforecast + ImpFwd1Y + CNBrepo')

if True:
    diffsdata = data['2011-02-01':].diff()[1:len(data)]
    #print(diffsdata)
    ols = Regression.EstimateOLS(diffsdata, 'beta0 ~ y10Yforecast + CNBrepo',
                                 False)
    #print(ols.summary())
    #plt.figure(1)
    #plt.plot(diffsdata['beta0'])
    #plt.plot(ols.fittedvalues)
    #plt.show()
    shortData['b0_fitted'] = Regression.getFittedLevels(
        ols.fittedvalues, shortData.ix[0, 'beta0'], 'FittedBeta0')
    plt.figure(1)
    plt.plot(shortData['beta0'])
コード例 #32
0
def regres(filename):
    serv = Regression.Regres(filename)
    result = serv.predictValue(0)
    resultString = str(result).strip('[]')

    return resultString
コード例 #33
0
from sklearn import datasets
from sklearn.model_selection import train_test_split
#import regression classes
import Regression as Reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
linreg = Reg.LinearRegression()
ridreg = Reg.RidgeRegression()
ridreg.set_params(alpha=alpha)
models = [linreg, ridreg]

model_scores = []
for model in models:
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    model_scores.append(score)
    print(str(type(model).__name__) + " has R^2 score of: " + str(score))

best_model = models[model_scores.index(max(model_scores))]
print("The best model is " + str(type(best_model).__name__))
print("And params for the best model are: ")
print(best_model.get_params())
コード例 #34
0
def squaredErrorChoose(x,y,maxOrder):
	e = [0 for i in range(0,maxOrder)]
	for order in range(1,maxOrder+1):
		f = Regression.polyTrain(x,y,order)
		e[order-1] = meanSquaredError(x,y,f)
	return min(e[i] for i in range(0,len(e))),(argmin(e)+1)
コード例 #35
0
# Sandbox.py
# Ashish D'Souza and Stephen Brown
# July 26th, 2018
#    _____       __       _____ __          ____        __            ____               _           __
#   / ___/____ _/ /____  / / (_) /____     / __ \____ _/ /_____ _    / __ \_________    (_)__  _____/ /_
#   \__ \/ __ `/ __/ _ \/ / / / __/ _ \   / / / / __ `/ __/ __ `/   / /_/ / ___/ __ \  / / _ \/ ___/ __/
#  ___/ / /_/ / /_/  __/ / / / /_/  __/  / /_/ / /_/ / /_/ /_/ /   / ____/ /  / /_/ / / /  __/ /__/ /_
# /____/\__,_/\__/\___/_/_/_/\__/\___/  /_____/\__,_/\__/\__,_/   /_/   /_/   \____/_/ /\___/\___/\__/
#                                                                                 /___/

import netCDF4
import Regression

data = []
lat_array = []
lon_array = []
for i in range(1, 13):
    string = str(i)
    if i < 10:
        string = "0" + string
    dataset = netCDF4.Dataset(
        "C:/Users/skillsusa/Downloads/CH4/CH4_flux_2010" + string + "01.nc",
        "r")
    data.append(dataset.variables["emissions"][0])
    lat_array = dataset.variables["Lat"]
    lon_array = dataset.variables["Lon"]
Regression.predict(data, lat_array, lon_array, 13, 3, [0.5, 0.667],
                   "C:/Users/skillsusa/Downloads/Map.html")
コード例 #36
0
# Problem 2 -- Model Scoring -- for Homework 3 of CS107
# Author: Max Li

from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

linear_model = reg.LinearRegression()
ridge_model = reg.RidgeRegression()
ridge_model.set_params(alpha=0.1)
models = [linear_model, ridge_model]
scores = []

for model in models:
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    scores.append(score)
    print("R-squared: " + str(score))
    print(model.get_params())
コード例 #37
0
##model_performance.py
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import Regression as myReg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
olsreg = myReg.LinearRegression()
rigreg = myReg.RidgeRegression()

rigreg.set_params(alpha=0.1)

models = [olsreg, rigreg]

alpha_array = np.logspace(-2, 1, 10)
score_array_ols = np.zeros(alpha_array.shape)
score_array_rig = np.zeros(alpha_array.shape)

cnt = 0
for alpha_i in alpha_array:
    for model in models:
        model.set_params(alpha=alpha_i)
        model.fit(X_train, y_train)
コード例 #38
0
def fun():
    seed(1)
    crtDir = os.getcwd()
    filePath = os.path.join(crtDir, 'date.txt')

    inputs, outputs = loadDataSingleFeature(filePath,
                                            'Economy..GDP.per.Capita.',
                                            'Happiness.Score')
    print('in:  ', inputs[:5])
    print('out: ', outputs[:5])

    indexes = [i for i in range(len(inputs))]
    trainSample = np.random.choice(indexes,
                                   int(0.8 * len(inputs)),
                                   replace=False)
    testSample = [i for i in indexes if not i in trainSample]

    trainInputs = [inputs[i] for i in trainSample]
    testInputs = [inputs[i] for i in testSample]

    trainOutputs = [outputs[i] for i in trainSample]
    testOutputs = [outputs[i] for i in testSample]

    norm = stdNorm()
    featuresComplet = []
    for feat in trainInputs:
        featuresComplet.append(feat)
    for feat in testInputs:
        featuresComplet.append(feat)
    '''for feat in trainOutputs:
        featuresComplet.append(feat)
    for feat in testOutputs:
        featuresComplet.append(feat)'''
    norm.statisticalNormalisation(featuresComplet)
    #NORMALIZATION OF TRAIN DATA
    trainInputs = norm.statisticalNormalisation(trainInputs)
    #NORMALIZATION OF TEST DATA
    testInputs = norm.statisticalNormalisation(testInputs)

    plotDataHistogram(trainInputs + testInputs, 'Capita GDP')

    plotDataHistogram(trainOutputs + testOutputs, 'Happiness score')

    plotData2D(trainInputs + testInputs, trainOutputs + testOutputs)

    xx = [[el] for el in trainInputs]

    #regressor = linear_model.LinearRegression()
    #regressor = regression.MyLinearUnivariateRegression()
    #regressor = Regression.MySGDRegression()
    regressor = Regression.MySGDRegression()
    regressor.fit(xx,
                  trainOutputs)  # FIT SINGLE MATRIX of noSamples x noFeatures

    w0, w1 = regressor.intercept_, regressor.coef_[0]

    feature1 = [el for el in trainInputs]
    feature1train = trainInputs

    noOfPoints = 50
    xref1 = []
    val = min(feature1)
    step1 = (max(feature1) - min(feature1)) / noOfPoints
    for _ in range(1, noOfPoints):
        for _ in range(1, noOfPoints):
            xref1.append(val)
        val += step1

    yref = [w0 + w1 * el1 for el1 in xref1]

    plot2DModel(feature1train, trainOutputs, xref1, yref)

    xx = [[el] for el in testInputs]
    computedTestOutputs = regressor.predict(xx)
    #computedTestOutputs = [w0 + w1 * el for el in testInputs]

    noOfPoints = 50
    xref1 = []
    val = min(testInputs)
    step1 = (max(testInputs) - min(testInputs)) / noOfPoints
    for _ in range(1, noOfPoints):
        for _ in range(1, noOfPoints):
            xref1.append(val)
        val += step1

    #plot2DModel(feature1test, computedTestOutputs, xref1, yref) # "predictions vs real test data"
    plot2DModel(testInputs, testOutputs, xref1,
                yref)  # "predictions vs real test data"
    #plotData(inputs, outputs, testInputs, computedTestOutputs, testInputs, testOutputs, "predictions vs real test data")

    #compute the differences between the predictions and real outputs
    error = 0.0
    for t1, t2 in zip(computedTestOutputs, testOutputs):
        error += (t1 - t2)**2
    error = error / len(testOutputs)
    print("prediction error (manual): ", error)

    error = mean_squared_error(testOutputs, computedTestOutputs)
    print("prediction error (tool): ", error)
コード例 #39
0
    meanY = float(Y.mean().values)
    X = dataset.drop(columns=['ERP', 'PRP', 'vendor name', 'model name'])
    # # Separation between train dataset and test dataset with train_frac
    index_separation = int(data_lenght * train_frac)
    Xtrain = X.iloc[:index_separation]
    Ytrain = Y.iloc[:index_separation]
    Xtest = X.iloc[index_separation:]
    Ytest = Y.iloc[index_separation:]

    return Xtrain, Ytrain, Xtest, Ytest, meanY


# Preparing the values and initiating the regression class
# ----------------------------------------------------------
X, Y, Xtest, Ytest, meanY = prepareValues(verbose=True)
Regression = rd.Regression(X, Y, verbose=True, unified=False)

# # Training the model with X and Y sets
# # -------------------------------------
print(tc.WARNING + "--> Training our regression model..." + tc.ENDC)
Regression.train_model()
print(tc.OKGREEN + "  Training phase of the model finished!" + tc.ENDC)
print(tc.OKGREEN + "  Output model of the training (beta) :" + tc.ENDC)
print(Regression.beta)

# # Training the model with X and Y sets
# # -------------------------------------
print(tc.WARNING + "--> Testing the model with the last 20% of the dataset!" +
      tc.ENDC)
average_error = Regression.test_model(Xtest, Ytest)
print(tc.OKGREEN + "  Average error :" + tc.ENDC, average_error)