def BackTest_1_Beta(self): d = self.Dates[0] for i in xrange( int( self.Periods_Diff(self.Dates[0], self.Dates[1]) / self.Frequency)): # Compute the date for historical data extract d_f = d - BMonthBegin() * self.Histo_Length # Extract the Benchmark index returns bnch = self.Benchmark.Extract_Index_Returns([d_f, d]) returns = self.Benchmark.Extract_Returns([d_f, d], d - MonthEnd()) assets = self.Benchmark.Extract_Compo(d - MonthEnd()) std_Index = np.std(bnch) beta = np.empty(len(assets)) for i in xrange(len(beta)): # Delete missing value in the estimation beta[i] = bnch.iloc[:, 0].cov(returns.iloc[:, i]) / std_Index self.RB_Weighted_1_risk(beta) bckData = self.Benchmark.Extract_Returns( [d + BDay(), (d + BDay()) + BMonthEnd() * self.Frequency], d - MonthEnd()).loc[:, assets] self.Compute_Performance(bckData) self.Wght_Histo[d] = pd.DataFrame(self.Weights, index=assets) d = (d + BMonthEnd() * self.Frequency) + BDay()
def Test_Sarima_M2(self, c, params, frec='M'): #Calcula MAAPE mensual para variable results = params pred = results.get_prediction(start=pd.to_datetime(self.begin_train + MonthEnd(1)), dynamic=False) pred_uc = results.get_forecast(steps=self.test_tomonth) train = pred.predicted_mean train.rename('forecast_train', inplace=True) test = pred_uc.predicted_mean test.rename('forecast_test', inplace=True) #both=train.add(test,fill_value=0) test_obs = self.Mts1_test[c] train_obs = self.Mts1_train[c][self.begin_train + MonthEnd(1):] result_test = pd.merge(test, test_obs, how='left', left_index=True, right_index=True) result_train = pd.merge(train, train_obs, how='left', left_index=True, right_index=True) result_test = result_test.groupby(pd.Grouper(freq=frec)).sum() result_train = result_train.groupby(pd.Grouper(freq=frec)).sum() MAAPE_train = 0 MAAPE_test = 0 z = 0 for i in range(result_train.shape[0]): if np.isnan( math.atan( math.fabs(result_train.iat[i, 1] - result_train.iat[i, 0]) / result_train.iat[i, 1])) == False: MAAPE_train += math.atan( math.fabs(result_train.iat[i, 1] - result_train.iat[i, 0]) / result_train.iat[i, 0]) z += 1 if z != 0: MAAPE_train = MAAPE_train / (z) else: MAAPE_train = 999999999 z = 0 for j in range(result_test.shape[0]): if np.isnan( math.atan( math.fabs(result_test.iat[j, 1] - result_test.iat[j, 0]) / result_test.iat[j, 1])) == False: MAAPE_test += math.atan( math.fabs(result_test.iat[j, 1] - result_test.iat[j, 0]) / result_test.iat[j, 1]) z += 1 if z != 0: MAAPE_test = MAAPE_test / (z) else: MAAPE_test = 999999999 #print('MAAPE train',MAAPE_train, ' \nMAAPE test', MAAPE_test) return [MAAPE_train, MAAPE_test]
def create_first_production_first_payment_and_last_payment_date(df): # initialize new columns of dataframe df['First Payment Date'] = pd.to_datetime(0) df['Last Payment Date'] = pd.to_datetime(0) # Make first production date - the end of month one month after inservice date df['First Production Date'] = pd.to_datetime( df['InService Date']) + pd.DateOffset(months=1) + MonthEnd(0) # Set all values for first payment date to first production date - the ppa systems will be changed later df['First Payment Date'] = df['First Production Date'] # Set all values for last payment date = 300 month offset. df['Last Payment Date'] = df['First Payment Date'] + pd.DateOffset( months=300) + MonthEnd(0) df_ppas = get_contract_type( df, ['PPA', 'PPA-EZ', 'EZ PPA-Connect']).loc[:, 'InService Date'] df.loc[df_ppas.index, 'First Payment Date'] = df_ppas + pd.DateOffset( months=2) + MonthEnd(0) df.loc[df_ppas.index, 'Last Payment Date'] = df_ppas + pd.DateOffset( months=301) + MonthEnd(0) return df
def GetDefaultAnnouncementDate(report_periods, adjusted=False): """ 根据财报披露的时间规则找到公告日期 Parameters: ----------- report_periods: str or list of str 报告期 adjusted: bool 调整前的财报还是调整后的财报。 Returns: DatetimeIndex """ from pandas.tseries.offsets import MonthEnd periods = pd.to_datetime(report_periods) quarters = periods.quarter if isinstance(quarters, int): quarters = 1 if quarters == 3 else quarters else: quarters = np.where(quarters == 3, 1, quarters) announcements = periods + MonthEnd(1) * quarters if adjusted: announcements = announcements + MonthEnd(12) return announcements
def _reduce(price_data, period, term, region): calendar = build_trading_calendar(region) if term == 'start': if period == 'weekly': pmap = price_data.index.dayofweek price_data[pmap == 0] if period == 'monthly': calendar = CustomBusinessMonthBegin(calendar=calendar) st = price_data.index[0] + MonthBegin() end = price_data.index[-1] + MonthBegin() pmap = pd.date_range(st,end,freq = calendar) price_data['pmap'] = [1 if idx in pmap else 0 for idx in price_data.index] price_data = price_data[price_data['pmap'] == 1].drop('pmap', axis = 1) if term == 'end': if period == 'weekly': pmap = price_data.index.dayofweek price_data[pmap == 4] if period == 'monthly': calendar = CustomBusinessMonthEnd(calendar=calendar) st = price_data.index[0] + MonthEnd() end = price_data.index[-1] + MonthEnd() pmap = pd.date_range(st,end,freq = calendar) price_data['pmap'] = [1 if idx in pmap else 0 for idx in price_data.index] price_data = price_data[price_data['pmap'] == 1].drop('pmap', axis = 1) return price_data
def resample_index(dat, to_freq): ''' 例如: 20180808 -> 20180831 20180809 -> 20180831 注意: 1、使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错; 2、此方法会掩盖真实交易日期(全都转换为自然年月末尾值) :param dat: :param to_freq: :return: ''' data = dat.copy() if to_freq == 'M': data.index = data.index.where( data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) elif to_freq == 'W': # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误 week_day = 5 #0-6分别对应周一至周日 data.index = data.index.where( data.index == ((data.index + Week(weekday=week_day)) - Week()), data.index + Week(weekday=week_day)) elif to_freq == 'Y': data.index = data.index.where( data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) return data
def format_date(curr_date, prev_date, freq): if freq == '6': # Quarterly if int(curr_date[6:]) == 1: curr_date = curr_date[:5] + '01/01' elif int(curr_date[6:]) == 2: curr_date = curr_date[:5] + '04/01' elif int(curr_date[6:]) == 3: curr_date = curr_date[:5] + '07/01' else: curr_date = curr_date[:5] + '10/01' report_date = datetime.strptime(curr_date, "%Y/%m/%d") + QuarterEnd(1) elif freq == '8': curr_date = curr_date + '/01' report_date = datetime.strptime(curr_date, "%Y/%m/%d") + MonthEnd(1) elif freq == '9': if not prev_date or prev_date != curr_date: curr_date = curr_date + '/15' report_date = datetime.strptime(curr_date, "%Y/%m/%d") else: curr_date = curr_date + '/01' report_date = datetime.strptime(curr_date, "%Y/%m/%d") + MonthEnd(1) return report_date
def GetEndDateList(data, freq, trim_end=False): ''' trim主要用于日度数据 :param data: :param freq: :param trim_end: :return: ''' if freq=='M': date_list=data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) #date_list=pd.to_datetime(date_list.astype(str),format='%Y%m')+MonthEnd() elif freq=='W': week_day = 5 # 0-6分别对应周一至周日 date_list = data.index.where(data.index == ((data.index + Week(weekday=week_day)) - Week()), data.index + Week(weekday=week_day)) #date_list=pd.to_datetime(date_list.astype(str).str.pad(7,side='right',fillchar='6'),format='%Y%W%w') elif freq=='Y': date_list = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) #date_list = pd.to_datetime(date_list.astype(str), format='%Y') + YearEnd() if trim_end: return sorted(set(date_list))[:-1] else: return sorted(set(date_list))
def calculate_factors(self, prices, dividends, assets, ROE, marketcap): # Lining up dates to end of month prices.columns = prices.columns + MonthEnd(0) dividends.columns = dividends.columns + MonthEnd(0) assets.columns = assets.columns + MonthEnd(0) ROE.columns = ROE.columns + MonthEnd(0) marketcap.columns = marketcap.columns + MonthEnd(0) # Padronizing columns dividends, assets, ROE = self._padronize_columns( prices.columns, dividends, assets, ROE) # Basic information self.securities = { 'assets': assets, 'ROE': ROE, 'price': prices, 'marketcap': marketcap, 'dividends': dividends } # Gathering info self.securities = self._get_IA_info(self.securities) self.securities = self._get_return(self.securities) self.securities = self._get_benchmarks(self.securities) self.securities['sizecls'] = self._get_sizecls(self.securities) self.securities['iacls'] = self._get_iacls(self.securities) self.securities['ROEcls'] = self._get_ROEcls(self.securities) self.securities['cls'] = self.securities['sizecls'] + self.securities[ 'iacls'] + self.securities['ROEcls'] # Calculating factors self.HXLInvestment = self.get_investment() self.HXLProfit = self.get_profit()
def prep_FF_monthly(path): # Load Fama French three factors FF3 = pd.read_csv(path + '\\Data\\F-F_Research_Data_Factors.csv') # Convert date column to date FF3['DATE'] = pd.to_datetime(FF3['DATE'].astype(str), format='%Y%m') # Make sure data is at the end of the month FF3['DATE'] = FF3['DATE'] + MonthEnd(0) # Divide columns be 100 FF3[['Mkt-RF', 'SMB', 'HML', 'RF']] = FF3[['Mkt-RF', 'SMB', 'HML', 'RF']].div(100) # Load faux Fama French weekly momentum FF_mom = pd.read_csv(path + '\\Data\\F-F_Momentum_Factor_monthly.csv') # Convert date column to datetime FF_mom['DATE'] = pd.to_datetime(FF_mom['DATE'].astype(str), format='%Y-%m-%d') # Make sure date at end of month (Excel messed it up and put at first) FF_mom['DATE'] = FF_mom['DATE'] + MonthEnd(0) # Merge Fama French data FF = FF3.merge(FF_mom, on='DATE') # Save risk-free rate RF = FF[['DATE', 'RF']] # Drop risk free date from Fama French dataframe FF.drop('RF', axis=1, inplace=True) return FF, RF
def dayMoveOutFast(): sundays = 0 days = [0, 1, 2] date = datetime.now() fom = date.replace(day=1) for n in days: wd = timedelta(days=n) bd = fom + wd wdCheck = bd.weekday() if wdCheck == 6: sundays = sundays + 1 if sundays > 0: fbdom = fom + timedelta(days=3) else: fbdom = fom + timedelta(days=2) if fbdom >= date: date = date + relativedelta(months=+2) dmo = pd.to_datetime(date, format='%Y-%m-%d') + MonthEnd(1) dmo = datetime.strftime(dmo, "%d.%m.%Y") output = "Ihr schnellstmöglicher Kündigungstermin ist der " + dmo + "." print(output) return output else: date = date + relativedelta(months=+3) dmo = pd.to_datetime(date, format='%Y-%m-%d') + MonthEnd(1) dmo = datetime.strftime(dmo, "%d.%m.%Y") output = "Ihr schnellstmöglicher Kündigungstermin ist der " + dmo + "." print(output) return output
def GetEndDateList(data, freq, trim_end=False): ''' trim主要用于日度数据resample成低频数据 :param data: :param freq: :param trim_end: :return: ''' if freq == 'M': date_list = data.index.where( data.index == ((data.index + MonthEnd()) - MonthEnd()), data.index + MonthEnd()) elif freq == 'W': week_day = 5 # 0-6分别对应周一至周日 date_list = data.index.where( data.index == ((data.index + Week(weekday=week_day)) - Week()), data.index + Week(weekday=week_day)) elif freq == 'Y': date_list = data.index.where( data.index == ((data.index + YearEnd()) - YearEnd()), data.index + YearEnd()) if trim_end: return sorted(set(date_list))[:-1] else: return sorted(set(date_list))
def crsp_block(upload_type, conn): if upload_type == 'from_wrds': #Market data Querry_crsp = """ select a.permno, a.permco, a.date, b.shrcd, b.exchcd, a.ret, a.retx, a.shro$^ut, a.prc from crsp.msf as a left join crsp.msenames as b on a.permno=b.permno and b.namedt<=a.date and a.date<=b.nameendt where a.date between '01/01/1959' and '12/31/2017' and b.exchcd between 1 and 3 """ Querry_dlret = """ select permno, dlret, dlstdt from crsp.msedelist """ try: crsp_m = conn.raw_sql(Querry_crsp) dlret = conn.raw_sql(Querry_dlret) except: # OperationalError: conn = wrds.Connection() crsp_m = conn.raw_sql(Querry_crsp) dlret = conn.raw_sql(Querry_dlret) elif upload_type == 'from_file': #crsp_m = pd.read_csv('.data/crsp_m.csv.gz', compression='gzip') crsp_m = pd.read_pickle('data/crsp_m_modified.pkl') if bool(np.isin('Unnamed: 0', crsp_m.columns)): crsp_m = crsp_m.drop('Unnamed: 0', axis=1) #dlret = pd.read_csv('.data/dlret.csv.gz', compression='gzip') dlret = pd.read_pickle('data/dlret.pkl') if bool(np.isin('Unnamed: 0', dlret.columns)): dlret = dlret.drop('Unnamed: 0', axis=1) # change variable format to int crsp_m[['permco', 'permno', 'shrcd', 'exchcd']] = crsp_m[['permco', 'permno', 'shrcd', 'exchcd']].astype(int) # Line up date to be end of month crsp_m['date'] = pd.to_datetime(crsp_m['date']) crsp_m['jdate'] = crsp_m['date'] + MonthEnd(0) # add delisting return dlret.permno = dlret.permno.astype(int) dlret['dlstdt'] = pd.to_datetime(dlret['dlstdt']) dlret['jdate'] = dlret['dlstdt'] + MonthEnd(0) crsp = pd.merge(crsp_m, dlret, how='left', on=['permno', 'jdate']) crsp['dlret'] = crsp['dlret'].fillna(0) crsp['ret'] = crsp['ret'].fillna(0) crsp['retadj'] = (1 + crsp['ret']) * (1 + crsp['dlret']) - 1 crsp['me'] = crsp['prc'].abs() * crsp['shrout'] # calculate market equity crsp = crsp.drop(['dlret', 'dlstdt', 'shrout'], axis=1) crsp = crsp.sort_values(by=['jdate', 'permco', 'me']) return crsp
def meses_3(): ini=[0 for r in range(3)] hoy= pd.to_datetime('today').replace(hour=0,minute=0,second=0,microsecond=0,nanosecond=0) hoy_inicio_mes=hoy.replace(day=1) ini[0]= hoy_inicio_mes + relativedelta(months=1) + MonthEnd(1) ini[1]= hoy_inicio_mes + relativedelta(months=2) + MonthEnd(1) ini[2]= hoy_inicio_mes + relativedelta(months=3)+ MonthEnd(1) return ini
def create_first_payment_and_last_payment_date(df): # Make first payment date df['First Payment Date'] = pd.to_datetime( df['InService Date']) + pd.DateOffset(months=1) + MonthEnd(0) # Make Last Payment Date 300 months after the first payment df['Last Payment Date'] = df['First Payment Date'] + pd.DateOffset( months=299) + MonthEnd(0) return df
def test(policy, market, optimizer, scheduler, test_begin): global date print('Test Begins') test_log = pd.DataFrame() test_port = pd.DataFrame() # subtract 1 month so that we can use next_state method directly for test_begin date = test_begin - MonthEnd(1) # Initiate test iteration for _ in count(1): # run episode stock_code, weights, avg_ret, cum_ret, sr, dl = run_episode( policy, market, optimizer, scheduler, train_flag=False) # episode start date and end date ep_start_date = datetime.strftime(date - MonthEnd(11), '%Y-%m') ep_end_date = datetime.strftime(date, '%Y-%m') # Test_port report prep temp_port = pd.DataFrame() temp_port['Stock_Codes'] = stock_code temp_port['Weights'] = weights temp_port['Invest_Date'] = dl temp_port.set_index('Invest_Date', inplace=True) # Concat new report test_port = pd.concat([test_port, temp_port], axis=0) del temp_port # Test_log report prep test_log.at[f'{ep_start_date}:{ep_end_date}', 'Average_Return'] = avg_ret test_log.at[f'{ep_start_date}:{ep_end_date}', 'Cumulative_Return'] = cum_ret test_log.at[f'{ep_start_date}:{ep_end_date}', 'Sharpe_Ratio'] = sr # Calculate episode stop date(subtract 12 month + 1 month) stop_date = market.last_date() - relativedelta( months=13) + relativedelta(day=31) # Stop episode on a specific date if date > stop_date: print(f'last episode end date: {datetime.strftime(date, "%Y-%m")}') break # Name index of test_log test_log.index.name = 'Investment Period' return test_log, test_port
def to_monthend(dt): """Return calendar monthend date given an int date or list""" if is_list_like(dt): return [to_monthend(d) for d in dt] if dt <= 9999: d = datetime.datetime(year=dt, month=12, day=1) + MonthEnd(0) elif dt <= 999999: d = datetime.datetime(year=dt // 100, month=dt % 100, day=1) + MonthEnd(0) else: d = pd.to_datetime(str(dt)) + MonthEnd(0) return int(d.strftime('%Y%m%d'))
def get_mom_ind(delay, signal): # Only select observations with stock price less than 5 dollars good_firm = (stocks_month['TOT_EQUITY'] > 0) & (stocks_month['PRICE_UNADJUSTED'] >= 5) stocks_sub = stocks_month.loc[good_firm, ['DATE', 'RETURN', 'INDUSTRY']] # Compute equal weighted returns ind = stocks_sub.groupby(['DATE', 'INDUSTRY'])['RETURN'].mean().reset_index() # Compute number of clusters ind['n'] = stocks_sub.groupby(['DATE', 'INDUSTRY'])['RETURN'].transform('count') # Sort values ind.sort_values(['INDUSTRY', 'DATE'], inplace=True) # Determine the valid observations ind['valid'] = ind['DATE'].shift(signal + delay) + dt.timedelta( days=7) + MonthEnd(signal + delay) == ind['DATE'] + MonthEnd(0) # Determine momentum ind['MOM'] = ind.groupby('INDUSTRY')['RETURN'].transform( lambda x: x.shift(1 + delay).rolling(signal).apply(prod)) # Get rid of the the invalid observations ind.loc[ind['valid'] == False, 'MOM'] = np.nan # Remove observations with undefined momentum or fewer than 5 firms in cluster ind = ind.loc[ind['MOM'].notna() & (ind['n'] > 4), :] # Drop variables that have done their jobs ind.drop(['valid', 'n'], axis=1, inplace=True) # Create quantiles; add 1 to avoid zero-indexing confusion ind['quintile'] = 1 + ind[[ 'DATE', 'MOM' ]].groupby('DATE')['MOM'].transform( lambda x: pd.qcut(x, 5, duplicates='raise', labels=False)) # Calculate equal weighted returns within each quintile I = ind.groupby(['DATE', 'quintile'])['RETURN'].mean().reset_index() # Make quintiles the columns ind_5 = I.pivot(index='DATE', columns='quintile', values='RETURN').reset_index() # Construct winners minus losers ind_5['Q5-Q1'] = ind_5[len(ind_5.columns) - 1] - ind_5[1] return ind_5
def calc_weighted_rtn(stock, column, wt, monthly=True): # Create deep copy so don't modify original data frame data = copy.deepcopy(stock) if (wt == 'vw') | (wt == 'ivw'): # Create market equity column data['ME'] = data['TOTAL_SHARES'] * data['PRICE_UNADJUSTED'] # Sort values data.sort_values(['DW_INSTRUMENT_ID', 'DATE'], inplace=True) if wt == 'vw': # Shift results; some dates may be more than a month previous data['wt'] = data.groupby('DW_INSTRUMENT_ID')['ME'].shift(1) else: data['wt'] = 1 / data.groupby('DW_INSTRUMENT_ID')['ME'].shift(1) # Check if valid if monthly == True: data['valid'] = data['DATE'].shift(1) + dt.timedelta( days=7) + MonthEnd(0) == data['DATE'] + MonthEnd(0) else: data['valid'] = data['DATE'].shift(1) + dt.timedelta( days=7) == data['DATE'] data.loc[data['valid'] == False, 'wt'] = np.nan # Drop ME and valid flag data.drop(['ME', 'valid'], axis=1, inplace=True) else: # If EW all weights are 1 data.loc[data['RETURN'].notna(), 'wt'] = 1 # Collect total ME_lag value data['sum'] = data.groupby(['DATE', column])['wt'].transform('sum') # Divide ME_lag by sum data['wt'] = data['wt'] / data['sum'] # Weight returns data['RETURN'] = data['RETURN'] * data['wt'] # Calculate weighted average data['RETURN'] = data.groupby(['DATE', column])['RETURN'].transform('sum') # Record number data['N*'] = data.groupby(['DATE', column])['DW_INSTRUMENT_ID'].transform('count') return data['RETURN'], data['N*']
def make_monthly(df: pd.DataFrame, method='ffill', add_endpoints: list = None) -> pd.DataFrame: if add_endpoints: ind = df.index.to_series() if add_endpoints[0]: ind[0] = ind[0] - MonthEnd(1) if add_endpoints[1]: ind[-1] = ind[-1] + MonthEnd(1) df.set_index(ind, inplace=True) df = df.asfreq(freq='1M', method=method, how="e") df.index += MonthEnd(0) return df
def plot_SARIMA_M(self, c, params, frec='M', date_init='2019-01-01' ): # Freq puede ser D W Y etc, dia semana, año #Plot SARIMA MENSUAL results = params[0] pred = results.get_prediction(start=pd.to_datetime(self.begin_train + MonthEnd(1)), dynamic=False) pred_uc = results.get_forecast(steps=self.test_tomonth) train = pred.predicted_mean train.rename('forecast_train', inplace=True) test = pred_uc.predicted_mean test.rename('forecast_test', inplace=True) #both=train.add(test,fill_value=0) test_obs = self.Mts2_test[c] train_obs = self.Mts2_train[c][self.begin_train + MonthEnd(1):] result_test = pd.merge(test, test_obs, how='left', left_index=True, right_index=True) result_train = pd.merge(train, train_obs, how='left', left_index=True, right_index=True) out_test = result_test.groupby(pd.Grouper(freq=frec)).sum() out_train = result_train.groupby(pd.Grouper(freq=frec)).sum() ax = out_train.plot(kind='line', y='D fix', color='g', label='Real demand', title='SARIMA forecast', grid=True) out_train.plot(kind='line', y='forecast_train', color='b', ax=ax, label='Forecast Train') out_test.plot(kind='line', y='forecast_test', color='r', ax=ax, label='Forecast test') out_test.plot(kind='line', y='D fix', color='g', ax=ax, legend=False) return [out_test, out_train]
def date_11(): from pandas.tseries.offsets import Day,MonthEnd now=datetime(2011,11,17) print now+3*Day() print now+MonthEnd() print now+MonthEnd(2) offset=MonthEnd() print offset.rollforward(now) print offset.rollback(now) ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d')) print ts.groupby(offset.rollforward).mean() print ts.resample('M',how='mean')
def random_state(self, train_begin, train_end, seed_num): 'Draw a random state as a form of tensor' #list of dates that we can choose from date_choice = [ item for item in self.datelist if item >= train_begin + MonthEnd(12) and item <= train_end - MonthEnd(11) ] #Set a seed_number #np.random.seed(seed_num) #draw random a random date random_date = np.random.choice(date_choice) return random_date, self.encoder_12m(random_date)
def BackTest_EqualWeight(self): d = self.Dates[0] for i in xrange( int( self.Periods_Diff(self.Dates[0], self.Dates[1]) / self.Frequency)): assets = self.Benchmark.Extract_Compo(d - MonthEnd()) self.Equal_Weighted(len(assets)) bckData = self.Benchmark.Extract_Returns( [d + BDay(), (d + BDay()) + BMonthEnd() * self.Frequency], d - MonthEnd()).loc[:, assets] self.Compute_Performance(bckData) self.Wght_Histo[d] = pd.DataFrame(self.Weights, index=assets) d = (d + BMonthEnd() * self.Frequency) + BDay()
def test_month_range_union_tz_dateutil(self, sort): from pandas._libs.tslibs.timezones import dateutil_gettz tz = dateutil_gettz("US/Eastern") early_start = datetime(2011, 1, 1) early_end = datetime(2011, 3, 1) late_start = datetime(2011, 3, 1) late_end = datetime(2011, 5, 1) early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) early_dr.union(late_dr, sort=sort)
def test_month_range_union_tz_pytz(self, sort): from pytz import timezone tz = timezone("US/Eastern") early_start = datetime(2011, 1, 1) early_end = datetime(2011, 3, 1) late_start = datetime(2011, 3, 1) late_end = datetime(2011, 5, 1) early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) early_dr.union(late_dr, sort=sort)
def set_current(self): now = datetime.date.today() offset_m, offset_q = MonthEnd(), QuarterEnd() self.newest_date['M'] = offset_m.rollback(now) self.newest_date['Q'] = offset_q.rollback(now) self.newest_date['D'] = now - timedelta(days=1) self.newest_date['Y'] = YearEnd().rollback(now) half1 = datetime.date(now.year, 6, 30) half2 = datetime.date(now.year, 12, 31) if now < half1: self.newest_date['H'] = datetime.date(now.year - 1, 12, 31) elif now < half2: self.newest_date['H'] = half1 else: self.newest_date['H'] = half2
def BackTest_MinVar(self): d = self.Dates[0] for i in xrange( int( self.Periods_Diff(self.Dates[0], self.Dates[1]) / self.Frequency)): d_f = d - BMonthBegin() * self.Histo_Length vcov, assets = self.Benchmark.Extract_VCov_Matrix([d_f, d], d - MonthEnd()) self.MinVariance(vcov, 0.001) bckData = self.Benchmark.Extract_Returns( [d + BDay(), (d + BDay()) + BMonthEnd() * self.Frequency], d - MonthEnd()).loc[:, assets] self.Compute_Performance(bckData) self.Wght_Histo[d] = pd.DataFrame(self.Weights, index=assets) d = (d + BMonthEnd() * self.Frequency) + BDay()
def gen_dr(df_row: pd.DataFrame) -> pd.date_range: start_yr = int(df_row['本批次周期'][0]) start_mon = int(df_row['本批次周期'][1]) # 本批次 起始日期 if df_row['本批次周期'][2] == u'上旬': start_d = 1 elif df_row['本批次周期'][2] == u'中旬': start_d = 11 elif df_row['本批次周期'][2] == u'整月': start_d = 1 else: start_d = 16 if df_row['上一批次周期'][4] == '中旬': start_d = 21 start_date = date(start_yr, start_mon, start_d) # 本批次 结束日期 end_yr = int(df_row['本批次周期'][0]) end_mon = int(df_row['本批次周期'][3]) if df_row['本批次周期'][4] == u'中旬': end_d = 20 elif df_row['本批次周期'][4] == u'下旬' or df_row['本批次周期'][4] == u'整月': end_d = (date(end_yr, end_mon, 1) + MonthEnd(1)).day else: end_d = 15 if df_row['下一批次周期'][2] == '中旬': end_d = 10 end_date = date(end_yr, end_mon, end_d) dt_range = pd.date_range(start_date, end_date) return dt_range
def merging_with_macros(processed_companies, processed_macro_from_daily, processed_macro_from_monthly: DataFrame) -> DataFrame: macro = processed_macro_from_daily.merge(processed_macro_from_monthly, how="left", left_index=True, right_index=True) lagging_variables = [ LOG_EXPORT, LOG_IMPORT, LOG_INDUSTRY_PRODUCTION_US, LOG_INDUSTRY_PRODUCTION_EURO, LOG_INDUSTRY_PRODUCTION_KOR ] contemporaneous_variables = list( set(list(macro.columns)) - set(lagging_variables)) macro_contemporaneous = macro[contemporaneous_variables].copy(deep=True) macro_lagging = macro[lagging_variables].copy(deep=True) processed_companies = processed_companies.merge(macro_contemporaneous, how='left', left_on="date", right_index=True) macro_lagging['mdate_lag'] = macro_lagging.index + MonthEnd(-1) macro_lagging = macro_lagging.reset_index().set_index('mdate_lag').copy( deep=True) macro_lagging = macro_lagging.drop(columns='date').copy(deep=True) # Lagging merged_companies = processed_companies.merge(macro_lagging, how='left', left_on="date", right_index=True) return merged_companies
def process_geracao_folha(pfunc: DataFrame, ppessoa: DataFrame, pparam: DataFrame, psecao: DataFrame, pfhstaft: DataFrame) -> DataFrame: """ TODO: doc string Afastamento menor ou igual a 15 dias, não deve ser levado para a staging Gerou Folha. Exemplo 1: Início em 25/06 e término em 30/06 - 5 dias(Não deve constar na Gerou Folha) Exemplo 2: Início em 25/06 e término em 10/07 - 15 dias(Não deve constar na Gerou Folha) Afastamento maior que 15 dias deve verificar se contempla o mês inteiro, caso positivo deve ser levado para o Gerou Folha. Caso negativo o mês não deve ser considerado no Gerou Folha. Exemplo 1: Início em 15/05 e término em 10/07 - 56 dias(Deve levar para o gerou folha, apenas a informação referente ao mês 6) Exemplo 2: Início em 15/05 e término em 15/06 - 30 dias(Não deve levar o funcionário para o Gerou Folha) Afastamento sem data final, deve verificar se contempla o mês inteiro, caso positivo deve ser levado para o Gerou Folha Exemplo 1: Início em 20/04 e sem datafim(Deve levar para o mês 6 e para os meses seguintes até existir uma datafim) Exemplo 2: Início em 15/04 e sem datafim(Deve levar para o mês 5, 6 e para os meses seguintes até existir uma datafim) """ # .query('(_dtinicio - _dtfinal).dt.days > 15') return ( pfunc.merge(ppessoa, left_on=['codpessoa'], right_on=['codigo'], how='inner').merge(psecao, left_on=['codcoligada', 'codsecao'], right_on=['codcoligada', 'codigo'], how='inner'). query('codsituacao != "E" & codsituacao != "W" & codsituacao != "R"'). merge(pparam, left_on=['codcoligada'], right_on=['codcoligada'], how='inner').merge( pfhstaft, left_on=['codcoligada', 'chapa'], right_on=['codcoligada', 'chapa'], how='inner').query('tipoafastamento != "N"').assign( _dtinicio=lambda df: df['dtinicio']).assign( _dtfinal=lambda df: df['dtfinal']).replace( { '_dtfinal': '0001-01-01T00:00:00.000Z', '_dtinicial': '0001-01-01T00:00:00.000Z' }, '2099-12-31T23:59:59.000Z'). assign(_dtinicio=lambda df: to_datetime( df['_dtinicio'], format='%Y-%m-%dT%H:%M:%S.%f' ).dt.tz_convert(None)).assign(_dtfinal=lambda df: to_datetime( df['_dtfinal'], format='%Y-%m-%dT%H:%M:%S.%f' ).dt.tz_convert(None)).assign(_dtcomp_endmonth=lambda df: to_datetime( df['mescomp'].astype(str) + '-' + df['anocomp'].astype(str), format='%m-%Y' ) + MonthEnd(0)).query( '(_dtinicio.dt.month <= mescomp & _dtinicio.dt.year <= anocomp) & (_dtfinal.dt.month >= mescomp & _dtfinal.dt.year >= anocomp) & (_dtfinal - _dtinicio).dt.days > 15 & abs((_dtcomp_endmonth - _dtinicio).dt.days) > 15 & _dtcomp_endmonth <= _dtfinal' ).assign(gerou_folha=lambda df: ((df['_dtinicio'].dt.month == df[ 'mescomp']) & (df['_dtinicio'].dt.year == df['anocomp'])) | ((df['_dtfinal'].dt.month == df['mescomp']) & (df['_dtfinal'].dt.year == df['anocomp']))).assign( cnpj=lambda df: df['cgc'].str.replace(r'\.|\/|\-', ''))[[ 'anocomp', 'mescomp', 'cpf', 'cnpj', 'gerou_folha' ]].rename({ 'anocomp': 'ano', 'mescomp': 'mes' }, axis=1))
def slide7(): from pandas.tseries.offsets import Hour, Minute hour = Hour() print hour four_hours = Hour(4) print four_hours print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') print Hour(2) + Minute(30) print pd.date_range('1/1/2000', periods=10, freq='1h30min') ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print ts print ts.shift(2) print ts.shift(-2) print '2 M' print ts.shift(2, freq='M') print '3 D' print ts.shift(3, freq='D') print '1 3D' print ts.shift(1, freq='3D') print '1 90T' print ts.shift(1, freq='90T') print 'shifting dates with offsets' from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011, 11, 17) print now + 3 * Day() print now + MonthEnd() print now + MonthEnd(2) offset = MonthEnd() print offset print offset.rollforward(now) print offset.rollback(now) ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print ts.groupby(offset.rollforward).mean()
def timestamp_rollforward_rollback(): """ How to role the date forward (end of time) or backward (beg of time) """ now = datetime(2014, 4, 15) print "Current time is:", now now = now + 3 * Day() print "Adding 3 days to now:", now offset = MonthEnd() now = offset.rollforward(now) print "Rolling foward to last day of the month", now offset = MonthBegin() now = offset.rollback(now) print "Rolling foward to first day of the month", now ts = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000', periods=20, freq='4d')) print "Original Time Series is:\n", ts offset = YearBegin() ts = ts.groupby(offset.rollforward).mean() print "Time Series after rolling forward\n", ts
ts = Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print(ts) print(ts.shift(2)) print(ts / ts.shift(1) - 1) print(ts.shift(2, freq='M')) print(ts.shift(3, freq='D')) print(ts.shift(1, freq='3D')) now = datetime(2011, 11, 17) print(now + 3 * Day()) print(now + MonthEnd()) print(now + MonthEnd(2)) offset = MonthEnd() print(offset.rollforward(now)) print(offset.rollback(now)) ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print(ts) print(ts.groupby(offset.rollforward).mean()) print(ts.resample('M').mean()) rng = pd.date_range('1/1/2000', periods=3, freq='M') ts = Series(np.random.randn(3), index=rng) pts = ts.to_period() print(ts) print(pts)
) ts ts.shift(2) ts.shift(-2) ts/ts.shift(1)-1 ts.shift(2,freq='M') ts.shift(3,freq='D') ts.shift(1,freq='3D') ts.shift(1,freq='90T') from pandas.tseries.offsets import Day, MonthEnd now = datetime(2011,11,17) now+3*Day() now+MonthEnd() now+MonthEnd(2) now+MonthEnd(3) offset =MonthEnd() offset.rollforward(now) offset.rollback(now) ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d') ) ts.groupby(offset.rollforward).mean() ts.resample('M',how='mean') ts.resample('M').mean() import pytz pytz.common_timezones[-5:] tz=pytz.timezone('US/Eastern') tz rng=pd.date_range('3/9/2-12 9:30',periods=6,freq='D') ts=Series(np.random.randn(len(rng)),index=rng) ts print(ts.index.tz)
# 通常是用shift来计算一个时间序列或者多个时间序列中百分比的变化 print ts / ts.shift(1) - 1 # 只是移动shift并不会修改索引, 可以指定频率来对时间戳移动, 而不是移动数据,然后产生NaN print ts.shift(2, freq='M') print ts.shift(3, freq='D') print ts.shift(1, freq='3D') print ts.shift(1, freq='90T') # 通过偏移量对日期进行位移 now = datetime(2011, 11, 17) print now + 3 * Day() # 可以加锚点偏移量, 比如MonthEnd, 在第一次会将原日期前滚(未来)到符合频率规则的下一个日期 print now + MonthEnd() print now + MonthEnd(2) # 对于锚点偏移量的rollforward和rollback方法,可以直接指定前滚(未来)/后滚(以前) offset = MonthEnd() # 前滚 print offset.rollforward(now) # 后滚 print offset.rollback(now) # 可以结合groupby使用这两个滚动方法 ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d')) print ts.groupby(offset.rollforward).mean() # 等价于 print ts.resample('M', how='mean')