from Data import YahooData import numpy as np import pandas as pd from Data import Utility universe = Utility.get_stock_universe('stock_universe.csv') u_tick = universe['Tick'].unique().tolist() universe.set_index('Tick', inplace=True) returns = YahooData.get_returns(u_tick) ev_ebitda = YahooData.get_ev_ebitda(u_tick) ocf_ev = YahooData.get_ocf_ev(u_tick) ratios = YahooData.get_ratios(u_tick, ['Price', 'PS', 'PB', 'PE', '50ma', '200ma']) comp_info = YahooData.get_sector_industry(u_tick) df = universe.join(comp_info) df = df.join(ratios) df = df.join(ev_ebitda) df = df.join(ocf_ev) df = df.join(returns) df = df.replace('N/A', np.nan) df['ma_ratio'] = df['50ma'].astype(float) / df['200ma'].astype(float) df['ebitda_ev_rank'] = df['ebitda_ev'].astype(float).rank(ascending=True) df['ocf_ev_rank'] = df['ocf_ev'].astype(float).rank(ascending=True) df['PS_rank'] = df['PS'].astype(float).rank(ascending=False) df['PB_rank'] = df['PB'].astype(float).rank(ascending=False) df['PE_rank'] = df['PE'].astype(float).rank(ascending=False) df['ma_ratio_rank'] = df['ma_ratio'].rank(ascending=True) df['return_rank'] = df['1yr_rtn'].rank(ascending=True)
STAY.U STAY BF.B BF-B BF.A BF-A BRK.B BRK-B LEN.B LEN-B """ universe = Utility.get_stock_universe("stock_universe.csv") u_tick = universe["Tick"].unique().tolist() # comp_MF_data = Utility.get_compustat_data('CQA_MF_data.csv', exchanges=['11', '12', '14']) # comp_MOH_ad_data = Utility.get_compustat_data('CQA_MOH_AD_data.csv', exchanges=['11', '12', '14']) # MOH_result = MOH_calc.Calc(comp_MOH_data, u_tick, comp_MOH_ad_data) # get data from Yahoo returns = YahooData.get_returns(u_tick) values = YahooData.get_ev_ebitda(u_tick) df = universe.join(values) mkt_cap_df = YahooData.get_value(u_tick, "Mkt_cap") """ mkt_cap_df.ix['FLOW'] = 22720 mkt_cap_df.ix['MSG'] = 4450 mkt_cap_df.ix['ALLE'] = 5730 mkt_cap_df.ix['GHC'] = 3370 mkt_cap_df.ix['HME'] = 4370 """ # if 'N/A' in mkt_cap_df['Mkt_cap']: mkt_cap_df[mkt_cap_df["Mkt_cap"] == "N/A"] = np.nan # calculate score based on compustat data
def Calc(PIO_data, tickers=None, details=False): if tickers is not None: raw_data = PIO_data[PIO_data['tic'].isin(tickers)].copy() else: raw_data = PIO_data.copy() raw_data[['revtq', 'cogsq', 'ibq', 'atq', 'dlttq', 'actq', 'lctq', 'cshoq']] = raw_data[['revtq', 'cogsq', 'ibq', 'atq', 'dlttq', 'actq', 'lctq', 'cshoq']].fillna(0) raw_data[['revtq', 'cogsq', 'ibq', 'atq', 'dlttq', 'actq', 'lctq', 'cshoq']] = raw_data[['revtq', 'cogsq', 'ibq', 'atq', 'dlttq', 'actq', 'lctq', 'cshoq']].astype(float) #keep the first eight rows of each tic f_8q = lambda x:x.sort('datadate', ascending=False).head(8) raw_data = raw_data.groupby('tic').apply(f_8q) data = raw_data.copy() data['profit'] = data['revtq'] - data['cogsq'] #calculate two year's trailling ibq, profit. rev f1st_y = lambda x:x.sort('datadate', ascending=False)[0:4] group_cur = data.groupby('tic').apply(f1st_y) f2nd_y = lambda x:x.sort('datadate', ascending=False)[4:8] group_pre = data.groupby('tic').apply(f2nd_y) cur_trail = group_cur.groupby(['tic'])[['ibq','profit', 'revtq']].sum() pre_trail = group_pre.groupby(['tic'])[['ibq','profit', 'revtq']].sum() cur_trail.columns = ['trail_ibq', 'trail_profit', 'trail_rev'] pre_trail.columns = ['trail_ibq', 'trail_profit', 'trail_rev'] #calculate most recent and one year before most recent values f3 = lambda x:x.sort('datadate', ascending=False)[0:1] cur_q = data.groupby('tic').apply(f3) f4 = lambda x:x.sort('datadate', ascending=False)[4:5] pre_q = data.groupby('tic').apply(f4) trail_cfo = YahooData.get_cfo(tickers) #combine trailing data with most recent data cur = cur_q.join(cur_trail) cur = cur.join(trail_cfo) #calculate current year's ratios cur['roa'] = cur['trail_ibq'] / cur['atq'] cur['cfo'] = cur['trail_cfo'] / cur['atq'] cur['lever'] = cur['dlttq'] / cur['atq'] cur['liquid'] = cur['actq'] / cur['lctq'] cur['eq_offer'] = cur['cshoq'] cur['margin'] = cur['trail_profit'] / cur['trail_rev'] cur['turnover'] = cur['trail_rev'] / cur['atq'] cur['accrual'] = cur['trail_ibq'] - cur['trail_cfo'] #calculate last year's ratios pre = pre_q.join(pre_trail) pre['roa'] = pre['trail_ibq'] / pre['atq'] pre['lever'] = pre['dlttq'] / pre['atq'] pre['liquid'] = pre['actq'] / pre['lctq'] pre['eq_offer'] = pre['cshoq'] pre['margin'] = pre['trail_profit'] / pre['trail_rev'] pre['turnover'] = pre['trail_rev'] / pre['atq'] data_set = pd.concat([cur, pre]) #calculate pio score def pio_score_calc(x): x = x.sort('datadate', ascending=False) score = 0 if (x['roa'].head(1) > x['roa'].tail(1)).bool(): score = score + 1 if (x['cfo'].head(1) > 0).bool(): score = score + 1 if (x['roa'].head(1) > 0).bool(): score = score + 1 if (x['accrual'].head(1) <= 0).bool(): score = score + 1 if (x['lever'].head(1) <= x['lever'].tail(1)).bool(): score = score + 1 if (x['liquid'].head(1) > x['liquid'].tail(1)).bool(): score = score + 1 if (x['eq_offer'].head(1) <= x['eq_offer'].tail(1)).bool(): score = score + 1 if (x['margin'].head(1) > x['margin'].tail(1)).bool(): score = score + 1 if (x['turnover'].head(1) > x['turnover'].tail(1)).bool(): score = score + 1 return score PIO_result = data_set.groupby('tic').apply(pio_score_calc) PIO_result.name = 'pio_score' #cur.reset_index(inplace=True) cur.set_index('tic', inplace=True) pre.set_index('tic', inplace=True) PIO_score = cur.join(PIO_result) if details: detailed_result = pd.concat([PIO_score, pre]) detailed_result['ticker'] = detailed_result.index.tolist() detailed_result = detailed_result.sort(['ticker', 'datadate'], ascending=[True, False]) detailed_result = detailed_result[['datadate', 'trail_ibq', 'trail_cfo', 'trail_profit', 'trail_rev', 'dlttq', 'atq', 'actq', 'lctq', 'cshoq', 'roa', 'cfo', 'accrual', 'lever', 'liquid', 'eq_offer', 'margin', 'turnover', 'pio_score']] detailed_result.columns = ['datadate', 'trail_ibq', 'trail_cfo', 'trail_profit', 'trail_rev', 'long_term_debt', 'total_assets', 'current_assets', 'current_liability', 'shares_outstanding', 'roa', 'cfo', 'accrual', 'lever', 'liquid', 'eq_offer', 'margin', 'turnover', 'pio_score'] return detailed_result else: return PIO_result