def traniner(epoo=5): # initialize number of epochs epochs = epoo for epo in range(epochs): # create loss placeholder iterm_loss = 0 # iterate through the data for img, labels in traindownloader: img, labels = img.to(device), labels.to(device) # clear the gradients optimin.zero_grad() # compute the forward pass output = model(img) # compute loss loss = criterion(output, labels) # store the loss iterm_loss += loss.item() # compute backward pass loss.backward() # update weights optimin.step() else: print2(f"Epoch {epo+1}/{epochs}..>..>.> " + f"Training loss:{iterm_loss/len(traindownloader):.4f}")
def strcbreak(data, breakpoint): import statsmodels.api as sm from scipy import stats as st mr = data before = mr.loc[:breakpoint] after = mr.loc[breakpoint:] # print2("#"*20, before.tail()) kmr = np.ones([mr.shape[0]]) kkb = np.ones([before.shape[0]]) kka = np.ones([after.shape[0]]) mr_intercept = sm.add_constant(kmr) before_with_intercept = sm.add_constant(kkb) after_with_intercept = sm.add_constant(kka) # # Fit OLS regressions to each tota; period # result = sm.OLS(mr, mr_intercept).fit() # # # Retrieve the sum-of-squared residuals # ssr_total = result.ssr # # Fit OLS regressions to each sub-period # r_b = sm.OLS(before, before_with_intercept).fit() # r_a = sm.OLS(after, after_with_intercept).fit() # # # Retrieve the sum-of-squared residuals # ssr_before = r_b.ssr # ssr_after = r_a.ssr # Fit OLS regressions to each total period result = sm.RLM(mr, mr_intercept, M=sm.robust.norms.HuberT()).fit() # Retrieve the sum-of-squared residuals ssr_total = np.sqrt(np.power(result.resid, 2).sum()) # Fit OLS regressions to each sub-period r_b = sm.RLM(before, before_with_intercept, M=sm.robust.norms.HuberT()).fit() r_a = sm.RLM(after, after_with_intercept, M=sm.robust.norms.HuberT()).fit() # Get sum-of-squared residuals for both regressions ssr_before = np.sqrt(np.power(r_b.resid, 2).sum()) ssr_after = np.sqrt(np.power(r_a.resid, 2).sum()) # Compute and display the Chow test statistic d_f = 1 df2 = 2*d_f numerator = ((ssr_total - (ssr_before + ssr_after)) / d_f) denominator = ((ssr_before + ssr_after) / (mr.shape[0]/2 - df2)) print("Chow test statistic: ", numerator / denominator) f = st.f.ppf(q=1-0.01, dfn=d_f, dfd=(mr.shape[0]/2 - df2)) print2(f"F Critical point: {f}")
def return_var(prices, probb): r = [np.dot(np.array(i[0]), np.array(i[1])) for i in zip(prices, probb)] t = [ np.sqrt( np.dot(np.array(i[2]), np.subtract(np.array(i[0]), np.array(i[1]))**2)) for i in zip(prices, r, probb) ] print2(r, t) return r, t
def check__nulls(df): """ Test and report number of NAs in each column of the input data frame :param df: pandas.DataFrame :return: None """ for col in df.columns: _nans = np.sum(df[col].isnull()) if _nans > 0: print(f'{_nans} NaNs in column {col}') print2(f'New shape of {get__name(df)}: {df.shape}')
def getload_decade(start=1920, end=1929, extension='prn'): "specify the starting year of the decade eg. 1900, 2010, 2009" webaddress = f'https://www.nyse.com/publicdocs/nyse/data/Daily_Share_Volume_{start}-{end}.{extension}' try: link = requests.get(webaddress) print2(link.status_code) if link.status_code == 404: raise else: if extension == "prn": data = pd.read_csv(webaddress, sep=' ', parse_dates=['Date'], engine='python').iloc[2:, 0:2] print2(data.head(), data.columns) data.loc[:, " Stock U.S Gov't"] = pd.to_numeric( data.loc[:, " Stock U.S Gov't"], errors='coerce') data.Date = pd.to_datetime(data.Date, format='%Y%m%d', errors="coerce") data.columns = ['Date', 'Volume'] print2(f"Successfully downloaded {start}-{end}") return data else: data = pd.read_csv(webaddress) data.iloc[:, 0] = data.iloc[:, 0].apply(lambda x: str(x).strip(' ')) data = data.iloc[:, 0].str.split(' ', 1, expand=True) data.columns = ['Date', 'Volume'] data.loc[:, "Volume"] = pd.to_numeric(data.loc[:, "Volume"], errors='coerce') data.Date = pd.to_datetime(data.Date, format='%Y%m%d', errors="coerce") print2(f"Successfully downloaded {start}-{end}") return data except: print2("There was an issue with the download \n\ You may need a different date range or file extension.\n\ Check out https://www.nyse.com/data/transactions-statistics-data-library" )
def load_data(start=1920, end=1929, extension="prn"): # get the path path = os.path.join(path33, "Data", f"Daily_Share_Volume_{start}-{end}.{extension}") # path = os.path.join(path33, f"Daily_Share_Volume_{start}-{end}.{extension}") if extension == "prn": data = pd.read_csv(path, sep=' ', parse_dates=['Date'], engine='python').iloc[2:, 0:2] print2(data.head(), data.columns) data.loc[:, " Stock U.S Gov't"] = pd.to_numeric( data.loc[:, " Stock U.S Gov't"], errors='coerce') data.Date = pd.to_datetime(data.Date, format='%Y%m%d', errors="coerce") data.columns = ['Date', 'Volume'] print2(f"Successfully downloaded {start}-{end}") return data else: data = pd.read_csv(path) data.iloc[:, 0] = data.iloc[:, 0].apply(lambda x: str(x).strip(' ')) data = data.iloc[:, 0].str.split(' ', 1, expand=True) data.columns = ['Date', 'Volume'] data.loc[:, "Volume"] = pd.to_numeric(data.loc[:, "Volume"], errors='coerce') data.Date = pd.to_datetime(data.Date, format='%Y%m%d', errors="coerce") print2(f"Successfully downloaded {start}-{end}") return data
self.name = name self.id = id self.gender = gender def __eq__(self, other): return self.name == other.name and self.id == other.id\ and type(self) == type(other) # return self.name == other.name and self.id == other.id\ # and isinstance(other, Patients) class Staff: def __init__(self, name, id, gender): self.name = name self.id = id self.gender = gender def __eq__(self, other): return self.name == other.name and self.id == other.id\ and isinstance(other, Staff) patient1 = Patients("Charles", 459234, "Male") patient2 = Patients("Charles", 876323, "Male") patient3 = Patients("Marylene", 459234, "Female") patient4 = Patients("Charles", 459234, "Male") patient5 = Staff("Charles", 459234, "Male") print2(patient1 == patient2, patient3 == patient1, patient1 == patient4) print2("$" * 20) print2(patient1 == patient5, patient5 == patient1)
from sklearn.cluster import KMeans from printdescribe import print2 nn = np.array([-8.0, -3.0, 0.0, 6.5, 9.0, 45.5]).reshape(-1, 1) cc = np.array([-1.5, -1.0, -0.5, 1.5, 2.0, 2.5]).reshape(-1, 1) gm = GaussianMixture(n_components=2, covariance_type='full') gmm = gm.fit(nn) pred = gmm.fit_predict(nn) logprob = gmm.score_samples(nn) responsibilities = gmm.predict_proba(nn) pdf = np.exp(logprob) pdf_individual = responsibilities * pdf[:, np.newaxis] print2(nn, logprob, responsibilities, pdf, pdf_individual) print2(np.round(gmm.weights_, 2), np.round(gmm.means_, 2), np.round(gmm.covariances_, 2)) print2(np.round(gmm.precisions_, 2), np.round(gmm.precisions_cholesky_, 2)) print2(np.round(pred, 2)) print2(gmm.score(nn)) print("######################################################") print("This is for [-1.5,-1.0,-0.5,1.5,2.0,2.5] data") gcc = gm.fit(cc) pred = gcc.fit_predict(cc) logprob = gcc.score_samples(cc) responsibilities = gcc.predict_proba(cc) pdf = np.exp(logprob) pdf_individual = responsibilities * pdf[:, np.newaxis]
from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() from printdescribe import print2, describe2, changepath # import excel sheets path = r"D:\Wqu_FinEngr\Portfolio Theory and Asset Pricing\GroupWork" with changepath(path): data = pd.read_excel("GWP_PTAP_Data_2010.10.08.xlsx", skiprows=1, nrows=13, sheet_name='10 SPDRs and S&P 500', index_col=0) describe2(data) print2(data) df_return = data.pct_change().dropna() print2(df_return) # df_activeReturn = df_return.sub(df_return.iloc[:,-1], axis=0).drop(['SP_500'], axis=1) df_activeReturn = df_return.sub(df_return['S&P 500'], axis=0).drop(['S&P 500'], axis=1) print2(df_activeReturn) tracking_error = df_activeReturn.std() mate_ = np.sqrt((df_activeReturn**2).sum() / df_activeReturn.shape[0]) print2(tracking_error, mate_) # for col in df_return.columns[:-1]: # plt.figure(figsize=[10, 8])
# My two other ETFs are # 1. Vanguard S$P500 ETF (VOO) # 2. iShares Core S&P500 ETF (IVV) etfs_tickers = ["IVV", "SPY", "VOO", "^GSPC"] # using 2 years of data from January 01, 2018 to December 31, 2019 starttime = datetime.datetime(2018, 1, 1) endtime = datetime.datetime(2019, 12, 31) # get only the closing prices etfs = pdr.get_data_yahoo(etfs_tickers, starttime, endtime)['Close'] etfs.columns = ["iShares", "SPDR", "Vanguard", "S&P500"] # print out dataset head print2(etfs.head()) # compute simple reurns etfs_return = etfs.pct_change().dropna() # etfs_return.fillna(0, inplace=True) returns2 = round(etfs_return * 100, 3) print2(etfs_return, returns2) # compute active returns eft_index = etfs_return["S&P500"] ppp = returns2.sub(returns2.iloc[:, -1], axis=0).drop(['S&P500'], axis=1) ppp2 = returns2.sub(returns2['S&P500'], axis=0).drop(['S&P500'], axis=1) etfs_activeR = etfs_return.sub([eft_index, eft_index, eft_index, eft_index], axis='columns') etfs_activeR.drop("S&P500", axis=1, inplace=True)
import torch from printdescribe import print2, describe2, changepath from pytorchFunctions import sigmoid_activation # set the seed torch.manual_seed(90) # create features features = torch.randn((1, 10)) # define sizes of layers input_size = features.shape[1] n_hiddenlayers = 4 n_output = 1 # create weights feature_weights = torch.randn((input_size, n_hiddenlayers)) hiddenlayer_weights = torch.randn((n_hiddenlayers, n_output)) # create biases feature_bias = torch.randn((1, n_hiddenlayers)) hiddenlayer_bias = torch.randn((1, n_output)) # y = f2(f1(xW1)W2) hiddenlayer_output = sigmoid_activation( torch.mm(features, feature_weights) + feature_bias) y = sigmoid_activation( torch.mm(hiddenlayer_output, hiddenlayer_weights) + hiddenlayer_bias) print2(y)
pp_labels = ["JPMorgan Chase", "Goldman Sachs", "BofA Securities", "Morgan Stanley", "Citigroup", "Credit Suisse"] starttime = datetime.datetime(2000, 1, 1) endtime = datetime.datetime(2019, 10, 1) # get only the closing prices assets = pdr.get_data_yahoo(stocklist, starttime, endtime)['Close'] # initialize the weights weights = [0.2, 0.15, 0.2, 0.15, 0.2, 0.1] # compute the simple returns returns = assets.pct_change().dropna() # visualse the data print2(assets.head(), returns.head()) describe2(assets, returns) # compute portfolio returns portfolioReturn = returns.dot(weights) # compute portfolio value for $1 investment portfolioValue = (1 + portfolioReturn).cumprod() print2(portReturn, meanDailyReturns, portfolioValue) # Calculate individual mean returns meanDailyReturns = returns.mean() # Define weights for the portfolio weights = np.array([0.2, 0.2, 0.2, 0.1, 0.15, 0.15])
from statsmodels.tsa.vector_ar.vecm import coint_johansen import scipy.stats from printdescribe import print2 plt.style.use("ggplot") plt.rcParams["figure.figsize"] = 10,8 plt.rcParams["axes.facecolor"] = "0.92" show = plt.show # Obtaining Stock Data of Microsoft and Benchmark Data start_date = '2013-01-01' end_date = '2019-12-31' assets = ['MSFT', 'FDN', 'JPM', 'XLF'] datasets = dr.DataReader(assets, data_source='yahoo', start=start_date, end=end_date) print2(datasets['Adj Close'].head()) # matplotlib.rcParams['figure.figsize'] = [15, 7] plt.plot(datasets['Adj Close']) plt.ylabel('Price') plt.legend(assets) plt.grid() show() # Obtaining the mean and standard devivation of the Assests means = datasets['Adj Close'].mean() stddevs = datasets['Adj Close'].std() print2(means, stddevs)
parse_dates=True, index_col="Date") # datasets = pd.read_csv("assets.csv", compression='gzip', index_col=0) # datasets, dataset2 = pd.read_csv(["assets.csv","assets2.csv"]) dataset2.rename(columns={"Adj Close": "SPX"}, inplace=True) dataset3.drop(columns=["^GSPC"], inplace=True) # # df.set_index('Date', inplace=True) # print2(dataset2.head()) # datasets.reindex(dataset2.index) alldata = pd.concat([dataset3, dataset2], axis=1) data2 = alldata.copy() data2 = data2.loc[:"2013-12-20", :] print2(data2.iloc[:, :5].tail(), data2.shape) # tt = "https://dumbstockapi.com/stock?format=tickers-only&exchange=NYSE" # pp = pd.read_json(tt) # pp = list(pp.values.ravel()) # download data and view # data2 = dr.DataReader(pp, data_source='yahoo', start=start_date)['Adj Close'] # print2(f"Asset Adjusted Closing Pices shape: {data2.shape}", data2.iloc[:,:10].head()) # drop columns with NaN data2.dropna(axis=1) print(data2.iloc[:, :5].head()) # clean the datasets, remove NaN smartly # Get a summary view of NaNs oo = data2.isnull().sum()
plt.style.use("ggplot") plt.rcParams["figure.figsize"] = 10, 8 plt.rcParams["axes.facecolor"] = "0.92" np.random.seed(42) # create matrix A = A = np.linspace(1, 9, 9).reshape(-1, 3) B = np.arange(10, 26).reshape(-1, 4) # PLU decomposition; used for square matrix P, L, U = lu(A) Pb, Lb, Ub = lu(B) # print results print2(P, L, U) print2(Pb, Lb, Ub) # recombine the triangular factor matrices A_ = P @ L @ U B_ = Pb @ Lb @ Ub # print results print2(A_, B_) ######################################################################################### ######################################################################################### # QR decomposition for all matrix # create matrix A2 = np.linspace(1, 35, 35).reshape(-1, 5) B2 = np.arange(10, 34).reshape(-1, 4)
#!/usr/bin/env python import torch from printdescribe import print2, describe2, changepath from pytorchFunctions import sigmoid_activation # set the seed torch.manual_seed(90) # generate features vector features = torch.randn((1, 10)) # generate weights weights = torch.randn_like(features) # generate bias bias = torch.randn((1, 1)) # computer the prediction prob = sigmoid_activation(torch.sum(features * weights) + bias) # also can do sigmoid_activation((features * weights).sum() + bias) prob2 = sigmoid_activation(torch.mm(features, weights.view(-1, 1)) + bias) if __name__ == "__main__": print2(prob, prob2) ## y = f2(f1(xW1)W2)
"~/.pytch/MNIST_data/", download=True, train=True, transform=transformer, ) traindownloader = tch.utils.data.DataLoader(traindata, batch_size=64, shuffle=True) # create an iterator to read the dataset iterloader = iter(traindownloader) img, labels = iterloader.next() # if __name__ == "__main__": print2(type(img), type(labels), img.shape, labels.shape) # display the image plt.imshow(img[1].numpy().squeeze(), cmap="Greys") plt.show() # Flatten the 2D images to 1D images flat1d_img = img.view(img.shape[0], -1) # create model parameters input_size = flat1d_img.shape[1] n_hiddenlayers = 256 n_output = 10 # create weights feature_weights = tch.randn((input_size, n_hiddenlayers))
# timeout=30, session=None, interval='day', # span='year').read().reset_index() # dw = durbin_watson(pd.to_numeric(apple.close_price).pct_change().dropna().values) # print2(f'DW_Statistics: {dw}') # Define start and end dates starttime = '2018-01-01' endtime = '2019-01-01' # Download apple stock prices apple = pdr.get_data_yahoo('AAPL', starttime, endtime) dw = durbin_watson(pd.to_numeric(apple.Close).pct_change().dropna().values) # Compute durbin_watson statistics print2(f'DW_Statistics: {dw}') # Get Nasdaq tickers tickers = pdr.nasdaq_trader.get_nasdaq_symbols(retry_count=3, timeout=300, pause=None) etfs = tickers.loc[tickers.ETF == True, :] symbols = etfs.sample(75).index.tolist() print2(etfs.head(), etfs.shape, symbols) # packet = pdr.robinhood.RobinhoodHistoricalReader(symbols, retry_count=3, pause=0.1, # timeout=30, session=None, interval='day', # span='year') # data = packet.read().reset_index() # pivot = data.loc[:['symbol', 'begins_at', 'close_price']].drop_duplicates(),pivot( # index='begins_at', columns = 'symbol', values='close_price'
# download test dataset testdata = datasets.FashionMNIST( "~/.pytch/F_MNIST_data/", download=True, train=False, transform=transformer, ) testdownloader = tch.utils.data.DataLoader(testdata, batch_size=64, shuffle=True) # view the images img, label = next(iter(traindownloader)) helper.imshow(img[10, :]) print2(label[10]) plt.show() # define new classifier class class MyNeuroNetwork(nn.Module): _inputs = 784 _neuron1 = 128 _neuron2 = 64 _neuron3 = 32 _output = 10 def __init__(self): super().__init__()
import pandas as pd import matplotlib.pyplot as plt from sklearn import preprocessing from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline import seaborn as sns from printdescribe import print2 genes = ["gene" + str(i) for i in range(1, 101)] wt = ["wt" + str(i) for i in range(1, 7)] ko = ["ko" + str(i) for i in range(1, 7)] data = pd.DataFrame(columns=[*wt, *ko], index=genes) print2(wt, ko) data = pd.DataFrame(columns=[*wt, *ko], index=genes) print2(data.head()) n = 2 for gene in data.index: data.loc[gene, :"wt6"] = np.random.poisson(lam=np.random.randint(10, 100), size=6) np.random.seed(90 + n) data.loc[gene, "ko1":] = np.random.poisson(lam=np.random.randint(10, 100), size=6) n += 5 scaled_data = preprocessing.scale(data.T) scaled_data[:20]
def findIntersection(fun1, fun2, x0): return fsolve(lambda x: fun1(x) - fun2(x), x0) country_list = ['USA', 'GBR', 'MEX', 'CAN', 'ZAF', 'NGA'] startdate = '1970' enddate = '2019' crisis_year = pd.to_datetime('1987-01-01') gdp = wb.download(indicator='NY.GDP.PCAP.KD', country=country_list, start=pd.to_datetime(startdate, yearfirst=True), end=pd.to_datetime(enddate, yearfirst=True))\ .reset_index().dropna().iloc[::-1, :] print2(gdp.shape, gdp.head(), gdp.info(), gdp.country.value_counts(dropna=False)) gdp2 = gdp.copy() gdp2['year'] = pd.to_datetime(gdp2['year']) gdp2.set_index('year', inplace=True) gdp2.loc[:, "NY.GDP.PCAP.KD"] = gdp2.groupby('country')["NY.GDP.PCAP.KD"]\ .apply(lambda x: pd.Series(x).interpolate()) gdp2.groupby(['country'])['NY.GDP.PCAP.KD'].plot() plt.axvline(crisis_year, color="black") plt.legend() plt.show() print2(gdp2.info()) gdp3 = gdp.copy()
import sys import matplotlib.pyplot as plt import numpy as np import shutil # import tensorflow as tf from ipywidgets import interact import tensorflow.compat.v1 as tf tf.disable_v2_behavior() from printdescribe import print2, changepath from datetime import datetime print2(" ") path22 = r"D:\PythonDataScience" sys.path.insert(0, path22) import input_data path2 = r"D:\Wqu_FinEngr\Machine Learning in Finance\CourseMaterials\Module5\WQU_MLiF_Module5_Notebooks\ML M5 Notebooks (updated)" with changepath(path2): print2(os.getcwd()) mnist = input_data.read_data_sets('MNIST_data', one_hot=True) m, n = mnist.train.images.shape number_to_show = 100
warnings.filterwarnings('ignore') # instantiate start date start_date = '2013-01-01' # end_date = '2020-02-29' # Download daily Amazon stock Adjusted close prices and indexes assets = ['AMZN', "^GSPC", "^DJI", "^IXIC", "^RUT", "CL=F"] datasets = dr.DataReader(assets, data_source='yahoo', start=start_date)["Adj Close"] datasets.tail() # Name of the columns col = ["Amazon", "Sp500", "Dow20", "Nasdaq", "R2000", "Crude20"] datasets.columns = col print2(datasets.head()) datasets.iloc[:, ~datasets.columns.isin(["Dow20", "Nasdaq"])].plot(figsize=(10, 5)) data = datasets.copy() data['close'] = data["Amazon"] # compute moving averages fast_window = 20 slow_window = 50 data['fast_mavg'] = data['close'].rolling(window=fast_window, min_periods=fast_window, center=False).mean() data['slow_mavg'] = data['close'].rolling(window=slow_window, min_periods=slow_window,
# %matplotlab inline # %config InlineBackend.fig_format = "retina" import numpy as np import matplotlib.pyplot as plt import helper import torch as tch from torch import nn, optim import torch.nn.functional as F from torchvision import datasets, transforms from printdescribe import print2, describe2, changepath plt.style.use("dark_background") print2(tch.cuda.is_available()) # transformer to transform and normalize transformer = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, ))]) traindata = datasets.MNIST( "~/.pytch/MNIST_data/", download=True, train=True, transform=transformer, ) traindownloader = tch.utils.data.DataLoader(traindata, batch_size=64, shuffle=True)
#!/usr/bin/env python import numpy as np import pandas as pd from printdescribe import print2, describe2, changepath S0 = [[120, 90], [140, 90], [130, 100]] prob = [[0.5, 0.5], [0.5, 0.5], [0.5, 0.5]] def return_var(prices, probb): r = [np.dot(np.array(i[0]), np.array(i[1])) for i in zip(prices, probb)] t = [ np.sqrt( np.dot(np.array(i[2]), np.subtract(np.array(i[0]), np.array(i[1]))**2)) for i in zip(prices, r, probb) ] print2(r, t) return r, t vars = return_var(S0, prob) print2(S0, prob, vars)
# select the Average Value Weighted Returns -- Daily (1220 rows x 100 cols) portfolios100 = portfolios100[0] factors5 = factors5[0] # Checking for missing values porfolios dataset print(portfolios100[portfolios100.iloc[:,0] >98.0].sum().sum(), portfolios100.isnull().sum().sum()) # Checking for missing values in factors dataset print(factors5[portfolios100.iloc[:,0] >98.0].sum().sum(), factors5.isnull().sum().sum()) portfolios100.iloc[:, 90:100].head() print2(portfolios100.shape, factors5.head(), factors5.shape) # # 1a. Visually analyze the covariance between various factors and identify <br> the variance explained in principle components of these factors. # # ## 1b. Next, consider the ACF and PACF of the process and its square. # pd.melt(factors5.add(1).cumprod().reset_index(), id_vars=["Date"]).hvplot.line(x='Date', y='value', by='variable') factors_cov = factors5.cov() factors_cov plt.figure(figsize = [10, 6]) # Visualize the covariance matrix using a heatmap
# print(pv_fcfe) comp1 = sum(pv_fcfe) # print(comp1) comp2_ = comp2(round(FCFE[-1], 2), listt[2], listt[3], n, one) # print(comp2_) mp_ggm = comp1 + comp2_ return round(mp_ggm, 2) Year = ["2008", "2009", "2010", "2011", "2012", "2013"] FCFE_growth = [0.18, 0.18, 0.16, 0.12, 0.11, 0.06] equity_discount_rate = 0.125 fcfe2007 = 2.0 g = 0.06 print(multi_ggm22(FCFE_growth, fcfe2007, g, equity_discount_rate)) initial = 5000 mmm = [] for i in range(40): final = initial * 1.05 mmm.append(final) final += 5000 initial = final if i > 20: initial -= 5000 print2(final, mmm, mmm[-1] / 5000)
classifier.means_init = np.array([X_train[y_train == i].mean(axis=0) for i in range(n_classes)]) # define colors and markers markers = ["*","o", "+"] # colors = ["r", "y", "k"] colors = ['navy', 'turquoise', 'darkorange'] col = ['r*','yo','k+'] labels = y_test # Fit to data and predict using pipelined scaling, PCA. gmm = make_pipeline(StandardScaler(), classifier) gmm.fit(X_train) pred_train = gmm.predict(X_train) train_accuracy = np.mean(pred_train.ravel() == y_train.ravel()) * 100 print2(f"Train accuracy: {np.round(train_accuracy,2)}") pred_test = gmm.predict(X_test) test_accuracy = np.mean(pred_test.ravel() == y_test.ravel()) * 100 print2(f"Test accuracy: {np.round(test_accuracy,2)}") for n, color in enumerate(colors): # plot the original data data = iris.data[iris.target == n] plt.scatter(data[:, 0], data[:, 1], s=2.8, color=color,label=iris.target_names[n]) # plot the test data data = X_test[y_test == n] plt.plot(data[:, 0], data[:, 1], 'x', color=color)
#!/usr/bin/env python import numpy as np from printdescribe import print2, describe2, changepath nlist = [0.06, 0.07, 0.08, 0.09, 0.10] def forward_rate(spotList, tstar, t): """The forward_rate function calculates the forward rate e.g f(T*, T) given yearly spot rates. Inputs: spotList (list) : yearly spot rates tstar (int): time until initiation of the rate t (int): the tiemto maturity from tstar Output: result (float): forward rate """ nume = pow(1 + spotList[t + tstar - 1], t + tstar) denom = pow(1 + spotList[tstar - 1], tstar) result = pow((nume / denom), 1 / t) - 1 return round(result * 100, 2) print2(forward_rate(nlist, 1, 2))
import numpy as np import pandas as pd from printdescribe import print2 # download dataset url2 = "https://archive.ics.uci.edu/ml/machine-learning-databases/00529/diabetes_data_upload.csv" df = pd.read_csv(url2) # expore dataset print2(df.shape, df.dtypes, df.columns, df.info(), df.describe()) # Explore categorical features for col in [x for x in df.select_dtypes(include=['object'])]: print2(df[col].value_counts(ascending=True, dropna=False)) # Add nan to dataset num = df.Age.max() for col in [x for x in df.select_dtypes(include=['object'])]: numm = num - 3 df[col] = np.where(df.Age > numm, np.nan, df[col]) num = numm # view the new dataset print2(df.isnull().sum()) print(df.info()) # explore the categorical datasets for col in [x for x in df.select_dtypes(include=['object'])]: print2(df[col].value_counts(ascending=True, dropna=False)) # drop nan row