def capm(start_date, end_date, ticker1, ticker2): #get the data from Yahoo Finance stock1 = pdr.get_data_yahoo(ticker1, start_date, end_date) stock2 = pdr.get_data_yahoo(ticker2, start_date, end_date) #we prefer monthly returns instead of daily returns return_stock1 = stock1.resample('M').last() return_stock2 = stock2.resample('M').last() #creating a dataFrame from the data - Adjusted Closing Price is used as usual data = pd.DataFrame( { 's_adjclose': return_stock1['Adj Close'], 'm_adjclose': return_stock2['Adj Close'] }, index=return_stock1.index) #natural logarithm of the returns data[['s_returns', 'm_returns']] = np.log(data[['s_adjclose', 'm_adjclose']] / data[['s_adjclose', 'm_adjclose']].shift(1)) #no need for NaN/missing values values so let's get rid of them data = data.dropna() #covariance matrix: the diagonal items are the vairances - off diagonals are the covariances #the matrix is symmetric: cov[0,1] = cov[1,0] !!! covmat = np.cov(data["s_returns"], data["m_returns"]) print(covmat) #calculating beta according to the formula beta = covmat[0, 1] / covmat[1, 1] print("Beta from formula:", beta) #using linear regression to fit a line to the data [stock_returns, market_returns] - slope is the beta beta, alpha = np.polyfit(data["m_returns"], data['s_returns'], deg=1) print("Beta from regression:", beta) #plot fig, axis = plt.subplots(1, figsize=(20, 10)) axis.scatter(data["m_returns"], data['s_returns'], label="Data points") axis.plot(data["m_returns"], beta * data["m_returns"] + alpha, color='red', label="CAPM Line") plt.title('Capital Asset Pricing Model, finding alphas and betas') plt.xlabel('Market return $R_m$', fontsize=18) plt.ylabel('Stock return $R_a$') plt.text(0.08, 0.05, r'$R_a = \beta * R_m + \alpha$', fontsize=18) plt.legend() plt.grid(True) plt.show() #calculate the expected return according to the CAPM formula expected_return = risk_free_rate + beta * (data["m_returns"].mean() * 12 - risk_free_rate) print("Expected return:", expected_return)
def get_data(t1, t2): df1 = load_ticker(t1) df2 = load_ticker(t2) data = pd.concat([df1, df2], axis=1) data = data.dropna() data['t1'] = data[t1] data['t2'] = data[t2] data['t1_returns'] = data[t1 + '_returns'] data['t2_returns'] = data[t2 + '_returns'] return data
def capm(start, end, ticker1, ticker2): #Get stock data stock1 = pdr.get_data_yahoo(ticker1, start, end) #Get market data stock2 = pdr.get_data_yahoo(ticker2, start, end) #Resample for monthly data return_s1 = stock1.resample('M').last() return_s2 = stock2.resample('M').last() #Create a dataframe with the adjusted close data = pd.DataFrame( { 's_adjclose': return_s1['Adj Close'], 'm_adjclose': return_s2['Adj Close'] }, index=return_s1.index) #Calc the stock and market retuens by computing log(n)/log(n-1) data[['s_returns', 'm_returns']] = np.log(data[['s_adjclose', 'm_adjclose']] / data[['s_adjclose', 'm_adjclose']].shift(1)) #Drop null values data = data.dropna() #Generate covarience matrix covmat = np.cov(data["s_returns"], data["m_returns"]) #Calc beta from matrix beta = covmat[0, 1] / covmat[1, 1] print("Beta from formula: ", beta) #Calc beta from regression beta, alpha = np.polyfit(data["m_returns"], data["s_returns"], deg=1) print("Beta from regression: ", beta) #Calc expected return expected_return = risk_free_return + beta * ( data["m_returns"].mean() * 12 - risk_free_return) print("Expected Return: ", expected_return)
ax.lines[0].set_alpha(0.3) #%% # example: Visualizing Seattle Bycicle Counts data = pd.read_csv('data/Fremont_Bridge.csv', index_col = 'Date', parse_dates =True) #%% print(data.head()) print(data.columns) #%% data.columns = ['Total', 'East', 'West'] #%% data.dropna().describe() #%% # Visualizing the data data.plot() plt.ylabel('Hourly Bicycle Count') #%% resample data to a coarser grid weekly = data.resample('W').sum() weekly.plot(style = [':', '--', '-']) plt.ylabel('Weekly bicycle count') #%% # aggregating data with rolling mean daily = data.resample('D').sum() daily.rolling(30, center = True).sum().plot(style = [':', '--', '-'])
pd.value_counts(obj.values,sort=False) mask = obj.isin(['b','c']) mask obj[mask] data = DataFrame(({'Qu1':[1,3,4,3,4],'Qu2':[2,3,1,2,3],'Qu3':[1,5,2,4,4]})) data result = data.apply(pd.value.counts).fillna(0) result string_data = Series(['aardvark','artichoke',np.nan,'avocado']) string_data string_data.isnull() string_data[0]=None string_data.isnull() from numpy import nan as NA data = Series([1, NA, 3.5, NA, 7]) data.dropna() data[data.notnull()] data= DataFrame([[1.,6.5,3.],[1.,NA,NA],[NA,NA,NA],[NA,6.5,3]]) cleaned = data.dropna() data cleaned cleaned = data.dropna(how='all') cleaned data[4]=NA data data.dropna(axis=1,how='all') df = DataFrame(np.random.randn(7,3)) df.ix[:4,1]=NA df.ix[:2,2] =NA df df.dropna(thresh=3)
'/Users/xiangliu/Desktop/CSC560 Data/pollution_us_2000_2016.csv') data.shape # In[3]: data.head(3) # In[4]: le = data['CO AQI'] le[le.isnull()] # In[5]: ##Look Into each value data = data.dropna() data.to_csv("/Users/xiangliu/Desktop/CSC560 Data/pollution_AQI.csv", index=True, sep=',') # In[6]: data = pd.read_csv('/Users/xiangliu/Desktop/CSC560 Data/pollution_AQI.csv') data.shape # In[7]: data.head(3) # In[8]:
def initialize(startdate, enddate, traintestchangedate, listticker): data = yf.download(listticker, start=startdate, end=enddate)['Adj Close'] data.dropna(axis=0, inplace=True) train = data[data.index < traintestchangedate] test = data[data.index >= traintestchangedate] return train, test
obj.value_counts() pd.value_counts(obj.values, sort=False) mask = obj.isin(['b', 'c']) obj[mask] data = DataFrame({ 'Qu1': [1, 3, 4, 3, 4], 'Qu2': [2, 3, 1, 2, 3], 'Qu3': [1, 5, 2, 4, 4] }) data.apply(pd.value_counts).fillna(0) string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado']) string_data.isnull() data = Series([1, NA, 3.5, NA, 7]) data.dropna() data[data.notnull()] data = DataFrame([[1, 6.5, 3], [1, NA, NA], [NA, NA, NA], [NA, 6.5, 3]]) data.dropna() data.dropna(how='all') data[4] = NA data.dropna(axis=1, how='all') df = DataFrame(np.random.randn(7, 3)) df.ix[:4, 1] = NA df.ix[:2, 2] = NA df.dropna(thresh=3) df.fillna(0) df.fillna({1: 0.5}) data = Series(np.random.randn(10), index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
'LIN.DE', 'LXS.DE', 'MRK.DE', 'MUV2.DE', 'RWE.DE', 'SAP.DE', 'SDF.DE', 'SIE.DE', 'TKA.DE', 'VOW3.DE', '^GDAXI', ] #DAX30指数各个股票的代码以及德国30指数代码 data = pd.DataFrame() for sym in symbols: #获取数据 data[sym] = web.DataReader(sym, data_source='google')['Close'] data = data.dropna() #丢弃缺失值 dax = pd.DataFrame(data.pop('^GDAXI')) #将指数数据单独拿出来,采用pop在获取的时候已经从原来的地方删除这一列数据了 scale_function = lambda x: (x - x.mean()) / x.std() pca = KernelPCA().fit( data.apply(scale_function)) #这里用到了apply函数,做PCA前我们要对数据做标准化 get_we = lambda x: x / x.sum() print(get_we(pca.lambdas_))[:10] pca = KernelPCA(n_components=1).fit(data.apply(scale_function)) dax['PCA_1'] = pca.transform(data) dax.apply(scale_function).plot(figsize=(8, 4)) pca = KernelPCA(n_components=5).fit(data.apply(scale_function)) weights = get_we(pca.lambdas_) dax['PCA_5'] = np.dot(pca.transform(data), weights) plt.figure(figsize=(8, 4))
SP500.columns = ['GSPC.O'] raw = SP500 symbol = ['GSPC.O'] data = (pd.DataFrame(raw[symbol]).dropna()) SMA1 = 42 SMA2 = 252 data['SMA1'] = data[symbol].rolling(SMA1).mean() data['SMA2'] = data[symbol].rolling(SMA2).mean() data.plot(figsize=(10, 6)) data.dropna(inplace=True) data['Position'] = np.where(data['SMA1'] > data['SMA2'], 1, -1) data.tail() ax = data.plot(secondary_y='Position', figsize=(10, 6)) ax.get_legend().set_bbox_to_anchor((0.25, 0.85)) data['Returns'] = np.log(data[symbol] / data[symbol].shift(1)) data['Strategy'] = data['Position'].shift(1) * data['Returns'] data.round(4).head() data.dropna(inplace=True)