Beispiel #1
0
    def BackTest_1_Beta(self):
        d = self.Dates[0]
        for i in xrange(
                int(
                    self.Periods_Diff(self.Dates[0], self.Dates[1]) /
                    self.Frequency)):
            # Compute the date for historical data extract
            d_f = d - BMonthBegin() * self.Histo_Length
            # Extract the Benchmark index returns
            bnch = self.Benchmark.Extract_Index_Returns([d_f, d])
            returns = self.Benchmark.Extract_Returns([d_f, d], d - MonthEnd())
            assets = self.Benchmark.Extract_Compo(d - MonthEnd())
            std_Index = np.std(bnch)
            beta = np.empty(len(assets))
            for i in xrange(len(beta)):
                # Delete missing value in the estimation
                beta[i] = bnch.iloc[:, 0].cov(returns.iloc[:, i]) / std_Index

            self.RB_Weighted_1_risk(beta)

            bckData = self.Benchmark.Extract_Returns(
                [d + BDay(), (d + BDay()) + BMonthEnd() * self.Frequency],
                d - MonthEnd()).loc[:, assets]
            self.Compute_Performance(bckData)
            self.Wght_Histo[d] = pd.DataFrame(self.Weights, index=assets)
            d = (d + BMonthEnd() * self.Frequency) + BDay()
Beispiel #2
0
 def Test_Sarima_M2(self, c, params, frec='M'):
     #Calcula MAAPE mensual para variable
     results = params
     pred = results.get_prediction(start=pd.to_datetime(self.begin_train +
                                                        MonthEnd(1)),
                                   dynamic=False)
     pred_uc = results.get_forecast(steps=self.test_tomonth)
     train = pred.predicted_mean
     train.rename('forecast_train', inplace=True)
     test = pred_uc.predicted_mean
     test.rename('forecast_test', inplace=True)
     #both=train.add(test,fill_value=0)
     test_obs = self.Mts1_test[c]
     train_obs = self.Mts1_train[c][self.begin_train + MonthEnd(1):]
     result_test = pd.merge(test,
                            test_obs,
                            how='left',
                            left_index=True,
                            right_index=True)
     result_train = pd.merge(train,
                             train_obs,
                             how='left',
                             left_index=True,
                             right_index=True)
     result_test = result_test.groupby(pd.Grouper(freq=frec)).sum()
     result_train = result_train.groupby(pd.Grouper(freq=frec)).sum()
     MAAPE_train = 0
     MAAPE_test = 0
     z = 0
     for i in range(result_train.shape[0]):
         if np.isnan(
                 math.atan(
                     math.fabs(result_train.iat[i, 1] -
                               result_train.iat[i, 0]) /
                     result_train.iat[i, 1])) == False:
             MAAPE_train += math.atan(
                 math.fabs(result_train.iat[i, 1] - result_train.iat[i, 0])
                 / result_train.iat[i, 0])
             z += 1
     if z != 0:
         MAAPE_train = MAAPE_train / (z)
     else:
         MAAPE_train = 999999999
     z = 0
     for j in range(result_test.shape[0]):
         if np.isnan(
                 math.atan(
                     math.fabs(result_test.iat[j, 1] -
                               result_test.iat[j, 0]) /
                     result_test.iat[j, 1])) == False:
             MAAPE_test += math.atan(
                 math.fabs(result_test.iat[j, 1] - result_test.iat[j, 0]) /
                 result_test.iat[j, 1])
             z += 1
     if z != 0:
         MAAPE_test = MAAPE_test / (z)
     else:
         MAAPE_test = 999999999
     #print('MAAPE train',MAAPE_train, ' \nMAAPE test', MAAPE_test)
     return [MAAPE_train, MAAPE_test]
def create_first_production_first_payment_and_last_payment_date(df):

    # initialize new columns of dataframe
    df['First Payment Date'] = pd.to_datetime(0)
    df['Last Payment Date'] = pd.to_datetime(0)

    # Make first production date - the end of month one month after inservice date
    df['First Production Date'] = pd.to_datetime(
        df['InService Date']) + pd.DateOffset(months=1) + MonthEnd(0)

    # Set all values for first payment date to first production date - the ppa systems will be changed later
    df['First Payment Date'] = df['First Production Date']

    # Set all values for last payment date = 300 month offset.
    df['Last Payment Date'] = df['First Payment Date'] + pd.DateOffset(
        months=300) + MonthEnd(0)

    df_ppas = get_contract_type(
        df, ['PPA', 'PPA-EZ', 'EZ PPA-Connect']).loc[:, 'InService Date']

    df.loc[df_ppas.index, 'First Payment Date'] = df_ppas + pd.DateOffset(
        months=2) + MonthEnd(0)

    df.loc[df_ppas.index, 'Last Payment Date'] = df_ppas + pd.DateOffset(
        months=301) + MonthEnd(0)

    return df
Beispiel #4
0
def GetDefaultAnnouncementDate(report_periods, adjusted=False):
    """
    根据财报披露的时间规则找到公告日期
    
    Parameters:
    -----------
    report_periods: str or list of str
        报告期  
    adjusted: bool
        调整前的财报还是调整后的财报。
    
    Returns:
        DatetimeIndex
    """
    from pandas.tseries.offsets import MonthEnd
    periods = pd.to_datetime(report_periods)
    quarters = periods.quarter
    if isinstance(quarters, int):
        quarters = 1 if quarters == 3 else quarters
    else:
        quarters = np.where(quarters == 3, 1, quarters)
    announcements = periods + MonthEnd(1) * quarters
    if adjusted:
        announcements = announcements + MonthEnd(12)
    return announcements
Beispiel #5
0
def _reduce(price_data, period, term, region):
    calendar = build_trading_calendar(region)
    
    if term == 'start':
        if period == 'weekly':
            pmap = price_data.index.dayofweek
            price_data[pmap == 0]
        if period == 'monthly':
            calendar = CustomBusinessMonthBegin(calendar=calendar)
            
            st = price_data.index[0] + MonthBegin()
            end = price_data.index[-1] + MonthBegin()
            pmap = pd.date_range(st,end,freq = calendar)
            price_data['pmap'] = [1 if idx in pmap else 0 for idx in price_data.index]
            price_data = price_data[price_data['pmap'] == 1].drop('pmap', axis = 1)
    
    if term == 'end':
        if period == 'weekly':
            pmap = price_data.index.dayofweek
            price_data[pmap == 4]
        if period == 'monthly':
            calendar = CustomBusinessMonthEnd(calendar=calendar)
            
            st = price_data.index[0] + MonthEnd()
            end = price_data.index[-1] + MonthEnd()
            pmap = pd.date_range(st,end,freq = calendar)
            price_data['pmap'] = [1 if idx in pmap else 0 for idx in price_data.index]
            price_data = price_data[price_data['pmap'] == 1].drop('pmap', axis = 1)
    
    return price_data
Beispiel #6
0
def resample_index(dat, to_freq):
    '''
    例如: 20180808 -> 20180831
          20180809 -> 20180831
    注意:
        1、使用时一定要注意,此命令会更改数据的index;因此,凡是涉及输入的数据使用此命令时,一定要使用copy(),以防出错;
        2、此方法会掩盖真实交易日期(全都转换为自然年月末尾值)
    :param dat:
    :param to_freq:
    :return:
    '''
    data = dat.copy()
    if to_freq == 'M':
        data.index = data.index.where(
            data.index == ((data.index + MonthEnd()) - MonthEnd()),
            data.index + MonthEnd())
    elif to_freq == 'W':
        # By=lambda x:x.year*100+x.week # 此种方法转化为周末日期时会出现错误
        week_day = 5  #0-6分别对应周一至周日
        data.index = data.index.where(
            data.index == ((data.index + Week(weekday=week_day)) - Week()),
            data.index + Week(weekday=week_day))

    elif to_freq == 'Y':
        data.index = data.index.where(
            data.index == ((data.index + YearEnd()) - YearEnd()),
            data.index + YearEnd())
    return data
Beispiel #7
0
def format_date(curr_date, prev_date, freq):
    if freq == '6':
        # Quarterly
        if int(curr_date[6:]) == 1:
            curr_date = curr_date[:5] + '01/01'
        elif int(curr_date[6:]) == 2:
            curr_date = curr_date[:5] + '04/01'
        elif int(curr_date[6:]) == 3:
            curr_date = curr_date[:5] + '07/01'
        else:
            curr_date = curr_date[:5] + '10/01'

        report_date = datetime.strptime(curr_date, "%Y/%m/%d") + QuarterEnd(1)
    elif freq == '8':
        curr_date = curr_date + '/01'
        report_date = datetime.strptime(curr_date, "%Y/%m/%d") + MonthEnd(1)
    elif freq == '9':
        if not prev_date or prev_date != curr_date:
            curr_date = curr_date + '/15'
            report_date = datetime.strptime(curr_date, "%Y/%m/%d")
        else:
            curr_date = curr_date + '/01'
            report_date = datetime.strptime(curr_date, "%Y/%m/%d") + MonthEnd(1)

    return report_date
Beispiel #8
0
def GetEndDateList(data, freq, trim_end=False):
    '''
    trim主要用于日度数据
    :param data:
    :param freq:
    :param trim_end:
    :return:
    '''
    if freq=='M':
        date_list=data.index.where(data.index == ((data.index + MonthEnd()) - MonthEnd()),
                                      data.index + MonthEnd())
        #date_list=pd.to_datetime(date_list.astype(str),format='%Y%m')+MonthEnd()
    elif freq=='W':
        week_day = 5  # 0-6分别对应周一至周日
        date_list = data.index.where(data.index == ((data.index + Week(weekday=week_day)) - Week()),
                                      data.index + Week(weekday=week_day))

        #date_list=pd.to_datetime(date_list.astype(str).str.pad(7,side='right',fillchar='6'),format='%Y%W%w')
    elif freq=='Y':
        date_list = data.index.where(data.index == ((data.index + YearEnd()) - YearEnd()),
                                      data.index + YearEnd())
        #date_list = pd.to_datetime(date_list.astype(str), format='%Y') + YearEnd()
    if trim_end:
        return sorted(set(date_list))[:-1]
    else:
        return sorted(set(date_list))
Beispiel #9
0
    def calculate_factors(self, prices, dividends, assets, ROE, marketcap):

        # Lining up dates to end of month
        prices.columns = prices.columns + MonthEnd(0)
        dividends.columns = dividends.columns + MonthEnd(0)
        assets.columns = assets.columns + MonthEnd(0)
        ROE.columns = ROE.columns + MonthEnd(0)
        marketcap.columns = marketcap.columns + MonthEnd(0)

        # Padronizing columns
        dividends, assets, ROE = self._padronize_columns(
            prices.columns, dividends, assets, ROE)

        # Basic information
        self.securities = {
            'assets': assets,
            'ROE': ROE,
            'price': prices,
            'marketcap': marketcap,
            'dividends': dividends
        }

        # Gathering info
        self.securities = self._get_IA_info(self.securities)
        self.securities = self._get_return(self.securities)
        self.securities = self._get_benchmarks(self.securities)
        self.securities['sizecls'] = self._get_sizecls(self.securities)
        self.securities['iacls'] = self._get_iacls(self.securities)
        self.securities['ROEcls'] = self._get_ROEcls(self.securities)
        self.securities['cls'] = self.securities['sizecls'] + self.securities[
            'iacls'] + self.securities['ROEcls']

        # Calculating factors
        self.HXLInvestment = self.get_investment()
        self.HXLProfit = self.get_profit()
Beispiel #10
0
def prep_FF_monthly(path):

    # Load Fama French three factors
    FF3 = pd.read_csv(path + '\\Data\\F-F_Research_Data_Factors.csv')

    # Convert date column to date
    FF3['DATE'] = pd.to_datetime(FF3['DATE'].astype(str), format='%Y%m')

    # Make sure data is at the end of the month
    FF3['DATE'] = FF3['DATE'] + MonthEnd(0)

    # Divide columns be 100
    FF3[['Mkt-RF', 'SMB', 'HML', 'RF']] = FF3[['Mkt-RF', 'SMB', 'HML',
                                               'RF']].div(100)

    # Load faux Fama French weekly momentum
    FF_mom = pd.read_csv(path + '\\Data\\F-F_Momentum_Factor_monthly.csv')

    # Convert date column to datetime
    FF_mom['DATE'] = pd.to_datetime(FF_mom['DATE'].astype(str),
                                    format='%Y-%m-%d')

    # Make sure date at end of month (Excel messed it up and put at first)
    FF_mom['DATE'] = FF_mom['DATE'] + MonthEnd(0)

    # Merge Fama French data
    FF = FF3.merge(FF_mom, on='DATE')

    # Save risk-free rate
    RF = FF[['DATE', 'RF']]

    # Drop risk free date from Fama French dataframe
    FF.drop('RF', axis=1, inplace=True)

    return FF, RF
def dayMoveOutFast():
    sundays = 0
    days = [0, 1, 2]
    date = datetime.now()
    fom = date.replace(day=1)
    for n in days:
        wd = timedelta(days=n)
        bd = fom + wd
        wdCheck = bd.weekday()
        if wdCheck == 6:
            sundays = sundays + 1
    if sundays > 0:
        fbdom = fom + timedelta(days=3)
    else:
        fbdom = fom + timedelta(days=2)
    if fbdom >= date:
        date = date + relativedelta(months=+2)
        dmo = pd.to_datetime(date, format='%Y-%m-%d') + MonthEnd(1)
        dmo = datetime.strftime(dmo, "%d.%m.%Y")
        output = "Ihr schnellstmöglicher Kündigungstermin ist der " + dmo + "."
        print(output)
        return output
    else:
        date = date + relativedelta(months=+3)
        dmo = pd.to_datetime(date, format='%Y-%m-%d') + MonthEnd(1)
        dmo = datetime.strftime(dmo, "%d.%m.%Y")
        output = "Ihr schnellstmöglicher Kündigungstermin ist der " + dmo + "."
        print(output)
        return output
Beispiel #12
0
def GetEndDateList(data, freq, trim_end=False):
    '''
    trim主要用于日度数据resample成低频数据
    :param data:
    :param freq:
    :param trim_end:
    :return:
    '''
    if freq == 'M':
        date_list = data.index.where(
            data.index == ((data.index + MonthEnd()) - MonthEnd()),
            data.index + MonthEnd())
    elif freq == 'W':
        week_day = 5  # 0-6分别对应周一至周日
        date_list = data.index.where(
            data.index == ((data.index + Week(weekday=week_day)) - Week()),
            data.index + Week(weekday=week_day))
    elif freq == 'Y':
        date_list = data.index.where(
            data.index == ((data.index + YearEnd()) - YearEnd()),
            data.index + YearEnd())
    if trim_end:
        return sorted(set(date_list))[:-1]
    else:
        return sorted(set(date_list))
def crsp_block(upload_type, conn):
    if upload_type == 'from_wrds':
        #Market data
        Querry_crsp = """
        select a.permno, a.permco, a.date, b.shrcd, b.exchcd,
        a.ret, a.retx, a.shro$^ut, a.prc
        from crsp.msf as a
        left join crsp.msenames as b
        on a.permno=b.permno
        and b.namedt<=a.date
        and a.date<=b.nameendt
        where a.date between '01/01/1959' and '12/31/2017'
        and b.exchcd between 1 and 3
        """
        Querry_dlret = """
        select permno, dlret, dlstdt 
        from crsp.msedelist
        """

        try:
            crsp_m = conn.raw_sql(Querry_crsp)
            dlret = conn.raw_sql(Querry_dlret)
        except:  # OperationalError:
            conn = wrds.Connection()
            crsp_m = conn.raw_sql(Querry_crsp)
            dlret = conn.raw_sql(Querry_dlret)

    elif upload_type == 'from_file':
        #crsp_m = pd.read_csv('.data/crsp_m.csv.gz', compression='gzip')
        crsp_m = pd.read_pickle('data/crsp_m_modified.pkl')
        if bool(np.isin('Unnamed: 0', crsp_m.columns)):
            crsp_m = crsp_m.drop('Unnamed: 0', axis=1)
        #dlret = pd.read_csv('.data/dlret.csv.gz', compression='gzip')
        dlret = pd.read_pickle('data/dlret.pkl')
        if bool(np.isin('Unnamed: 0', dlret.columns)):
            dlret = dlret.drop('Unnamed: 0', axis=1)

    # change variable format to int
    crsp_m[['permco', 'permno', 'shrcd',
            'exchcd']] = crsp_m[['permco', 'permno', 'shrcd',
                                 'exchcd']].astype(int)
    # Line up date to be end of month
    crsp_m['date'] = pd.to_datetime(crsp_m['date'])
    crsp_m['jdate'] = crsp_m['date'] + MonthEnd(0)

    # add delisting return
    dlret.permno = dlret.permno.astype(int)
    dlret['dlstdt'] = pd.to_datetime(dlret['dlstdt'])
    dlret['jdate'] = dlret['dlstdt'] + MonthEnd(0)

    crsp = pd.merge(crsp_m, dlret, how='left', on=['permno', 'jdate'])
    crsp['dlret'] = crsp['dlret'].fillna(0)
    crsp['ret'] = crsp['ret'].fillna(0)
    crsp['retadj'] = (1 + crsp['ret']) * (1 + crsp['dlret']) - 1
    crsp['me'] = crsp['prc'].abs() * crsp['shrout']  # calculate market equity
    crsp = crsp.drop(['dlret', 'dlstdt', 'shrout'], axis=1)
    crsp = crsp.sort_values(by=['jdate', 'permco', 'me'])

    return crsp
Beispiel #14
0
def meses_3():
    ini=[0 for r in range(3)]
    hoy= pd.to_datetime('today').replace(hour=0,minute=0,second=0,microsecond=0,nanosecond=0)
    hoy_inicio_mes=hoy.replace(day=1)
    ini[0]= hoy_inicio_mes + relativedelta(months=1) + MonthEnd(1)
    ini[1]= hoy_inicio_mes + relativedelta(months=2) + MonthEnd(1)
    ini[2]= hoy_inicio_mes + relativedelta(months=3)+ MonthEnd(1)
    return ini
def create_first_payment_and_last_payment_date(df):
    # Make first payment date
    df['First Payment Date'] = pd.to_datetime(
        df['InService Date']) + pd.DateOffset(months=1) + MonthEnd(0)

    # Make Last Payment Date 300 months after the first payment
    df['Last Payment Date'] = df['First Payment Date'] + pd.DateOffset(
        months=299) + MonthEnd(0)

    return df
Beispiel #16
0
def test(policy, market, optimizer, scheduler, test_begin):
    global date

    print('Test Begins')

    test_log = pd.DataFrame()
    test_port = pd.DataFrame()

    # subtract 1 month so that we can use next_state method directly for test_begin
    date = test_begin - MonthEnd(1)

    # Initiate test iteration
    for _ in count(1):

        # run episode
        stock_code, weights, avg_ret, cum_ret, sr, dl = run_episode(
            policy, market, optimizer, scheduler, train_flag=False)

        # episode start date and end date
        ep_start_date = datetime.strftime(date - MonthEnd(11), '%Y-%m')
        ep_end_date = datetime.strftime(date, '%Y-%m')

        # Test_port report prep
        temp_port = pd.DataFrame()
        temp_port['Stock_Codes'] = stock_code
        temp_port['Weights'] = weights
        temp_port['Invest_Date'] = dl
        temp_port.set_index('Invest_Date', inplace=True)

        # Concat new report
        test_port = pd.concat([test_port, temp_port], axis=0)
        del temp_port

        # Test_log report prep
        test_log.at[f'{ep_start_date}:{ep_end_date}',
                    'Average_Return'] = avg_ret
        test_log.at[f'{ep_start_date}:{ep_end_date}',
                    'Cumulative_Return'] = cum_ret
        test_log.at[f'{ep_start_date}:{ep_end_date}', 'Sharpe_Ratio'] = sr

        # Calculate episode stop date(subtract 12 month + 1 month)
        stop_date = market.last_date() - relativedelta(
            months=13) + relativedelta(day=31)

        # Stop episode on a specific date
        if date > stop_date:
            print(f'last episode end date: {datetime.strftime(date, "%Y-%m")}')
            break

        # Name index of test_log
        test_log.index.name = 'Investment Period'

    return test_log, test_port
def to_monthend(dt):
    """Return calendar monthend date given an int date or list"""
    if is_list_like(dt):
        return [to_monthend(d) for d in dt]
    if dt <= 9999:
        d = datetime.datetime(year=dt, month=12, day=1) + MonthEnd(0)
    elif dt <= 999999:
        d = datetime.datetime(year=dt // 100, month=dt % 100,
                              day=1) + MonthEnd(0)
    else:
        d = pd.to_datetime(str(dt)) + MonthEnd(0)
    return int(d.strftime('%Y%m%d'))
Beispiel #18
0
def get_mom_ind(delay, signal):

    # Only select observations with stock price less than 5 dollars
    good_firm = (stocks_month['TOT_EQUITY'] >
                 0) & (stocks_month['PRICE_UNADJUSTED'] >= 5)
    stocks_sub = stocks_month.loc[good_firm, ['DATE', 'RETURN', 'INDUSTRY']]

    # Compute equal weighted returns
    ind = stocks_sub.groupby(['DATE',
                              'INDUSTRY'])['RETURN'].mean().reset_index()

    # Compute number of clusters
    ind['n'] = stocks_sub.groupby(['DATE',
                                   'INDUSTRY'])['RETURN'].transform('count')

    # Sort values
    ind.sort_values(['INDUSTRY', 'DATE'], inplace=True)

    # Determine the valid observations
    ind['valid'] = ind['DATE'].shift(signal + delay) + dt.timedelta(
        days=7) + MonthEnd(signal + delay) == ind['DATE'] + MonthEnd(0)

    # Determine momentum
    ind['MOM'] = ind.groupby('INDUSTRY')['RETURN'].transform(
        lambda x: x.shift(1 + delay).rolling(signal).apply(prod))

    # Get rid of the the invalid observations
    ind.loc[ind['valid'] == False, 'MOM'] = np.nan

    # Remove observations with undefined momentum or fewer than 5 firms in cluster
    ind = ind.loc[ind['MOM'].notna() & (ind['n'] > 4), :]

    # Drop variables that have done their jobs
    ind.drop(['valid', 'n'], axis=1, inplace=True)

    # Create quantiles; add 1 to avoid zero-indexing confusion
    ind['quintile'] = 1 + ind[[
        'DATE', 'MOM'
    ]].groupby('DATE')['MOM'].transform(
        lambda x: pd.qcut(x, 5, duplicates='raise', labels=False))

    # Calculate equal weighted returns within each quintile
    I = ind.groupby(['DATE', 'quintile'])['RETURN'].mean().reset_index()

    # Make quintiles the columns
    ind_5 = I.pivot(index='DATE', columns='quintile',
                    values='RETURN').reset_index()

    # Construct winners minus losers
    ind_5['Q5-Q1'] = ind_5[len(ind_5.columns) - 1] - ind_5[1]

    return ind_5
Beispiel #19
0
def calc_weighted_rtn(stock, column, wt, monthly=True):

    # Create deep copy so don't modify original data frame
    data = copy.deepcopy(stock)

    if (wt == 'vw') | (wt == 'ivw'):
        # Create market equity column
        data['ME'] = data['TOTAL_SHARES'] * data['PRICE_UNADJUSTED']

        # Sort values
        data.sort_values(['DW_INSTRUMENT_ID', 'DATE'], inplace=True)

        if wt == 'vw':
            # Shift results; some dates may be more than a month previous
            data['wt'] = data.groupby('DW_INSTRUMENT_ID')['ME'].shift(1)

        else:
            data['wt'] = 1 / data.groupby('DW_INSTRUMENT_ID')['ME'].shift(1)

        # Check if valid
        if monthly == True:
            data['valid'] = data['DATE'].shift(1) + dt.timedelta(
                days=7) + MonthEnd(0) == data['DATE'] + MonthEnd(0)
        else:
            data['valid'] = data['DATE'].shift(1) + dt.timedelta(
                days=7) == data['DATE']

        data.loc[data['valid'] == False, 'wt'] = np.nan

        # Drop ME and valid flag
        data.drop(['ME', 'valid'], axis=1, inplace=True)
    else:
        # If EW all weights are 1
        data.loc[data['RETURN'].notna(), 'wt'] = 1

    # Collect total ME_lag value
    data['sum'] = data.groupby(['DATE', column])['wt'].transform('sum')

    # Divide ME_lag by sum
    data['wt'] = data['wt'] / data['sum']

    # Weight returns
    data['RETURN'] = data['RETURN'] * data['wt']

    # Calculate weighted average
    data['RETURN'] = data.groupby(['DATE', column])['RETURN'].transform('sum')

    # Record number
    data['N*'] = data.groupby(['DATE',
                               column])['DW_INSTRUMENT_ID'].transform('count')

    return data['RETURN'], data['N*']
Beispiel #20
0
def make_monthly(df: pd.DataFrame,
                 method='ffill',
                 add_endpoints: list = None) -> pd.DataFrame:
    if add_endpoints:
        ind = df.index.to_series()
        if add_endpoints[0]:
            ind[0] = ind[0] - MonthEnd(1)
        if add_endpoints[1]:
            ind[-1] = ind[-1] + MonthEnd(1)
        df.set_index(ind, inplace=True)
    df = df.asfreq(freq='1M', method=method, how="e")
    df.index += MonthEnd(0)
    return df
Beispiel #21
0
    def plot_SARIMA_M(self,
                      c,
                      params,
                      frec='M',
                      date_init='2019-01-01'
                      ):  # Freq puede ser D W Y etc, dia semana, año
        #Plot SARIMA MENSUAL
        results = params[0]
        pred = results.get_prediction(start=pd.to_datetime(self.begin_train +
                                                           MonthEnd(1)),
                                      dynamic=False)
        pred_uc = results.get_forecast(steps=self.test_tomonth)
        train = pred.predicted_mean
        train.rename('forecast_train', inplace=True)
        test = pred_uc.predicted_mean
        test.rename('forecast_test', inplace=True)
        #both=train.add(test,fill_value=0)
        test_obs = self.Mts2_test[c]
        train_obs = self.Mts2_train[c][self.begin_train + MonthEnd(1):]
        result_test = pd.merge(test,
                               test_obs,
                               how='left',
                               left_index=True,
                               right_index=True)
        result_train = pd.merge(train,
                                train_obs,
                                how='left',
                                left_index=True,
                                right_index=True)
        out_test = result_test.groupby(pd.Grouper(freq=frec)).sum()
        out_train = result_train.groupby(pd.Grouper(freq=frec)).sum()

        ax = out_train.plot(kind='line',
                            y='D fix',
                            color='g',
                            label='Real demand',
                            title='SARIMA forecast',
                            grid=True)
        out_train.plot(kind='line',
                       y='forecast_train',
                       color='b',
                       ax=ax,
                       label='Forecast Train')

        out_test.plot(kind='line',
                      y='forecast_test',
                      color='r',
                      ax=ax,
                      label='Forecast test')
        out_test.plot(kind='line', y='D fix', color='g', ax=ax, legend=False)
        return [out_test, out_train]
def date_11():
    from pandas.tseries.offsets import Day,MonthEnd
    now=datetime(2011,11,17)
    print now+3*Day()
    print now+MonthEnd()
    print now+MonthEnd(2)

    offset=MonthEnd()
    print offset.rollforward(now)
    print offset.rollback(now)

    ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d'))
    print ts.groupby(offset.rollforward).mean()
    print ts.resample('M',how='mean')
Beispiel #23
0
    def random_state(self, train_begin, train_end, seed_num):
        'Draw a random state as a form of tensor'
        #list of dates that we can choose from
        date_choice = [
            item for item in self.datelist if item >= train_begin +
            MonthEnd(12) and item <= train_end - MonthEnd(11)
        ]

        #Set a seed_number
        #np.random.seed(seed_num)

        #draw random a random date
        random_date = np.random.choice(date_choice)
        return random_date, self.encoder_12m(random_date)
Beispiel #24
0
 def BackTest_EqualWeight(self):
     d = self.Dates[0]
     for i in xrange(
             int(
                 self.Periods_Diff(self.Dates[0], self.Dates[1]) /
                 self.Frequency)):
         assets = self.Benchmark.Extract_Compo(d - MonthEnd())
         self.Equal_Weighted(len(assets))
         bckData = self.Benchmark.Extract_Returns(
             [d + BDay(), (d + BDay()) + BMonthEnd() * self.Frequency],
             d - MonthEnd()).loc[:, assets]
         self.Compute_Performance(bckData)
         self.Wght_Histo[d] = pd.DataFrame(self.Weights, index=assets)
         d = (d + BMonthEnd() * self.Frequency) + BDay()
Beispiel #25
0
    def test_month_range_union_tz_dateutil(self, sort):
        from pandas._libs.tslibs.timezones import dateutil_gettz

        tz = dateutil_gettz("US/Eastern")

        early_start = datetime(2011, 1, 1)
        early_end = datetime(2011, 3, 1)

        late_start = datetime(2011, 3, 1)
        late_end = datetime(2011, 5, 1)

        early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
        late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())

        early_dr.union(late_dr, sort=sort)
Beispiel #26
0
    def test_month_range_union_tz_pytz(self, sort):
        from pytz import timezone

        tz = timezone("US/Eastern")

        early_start = datetime(2011, 1, 1)
        early_end = datetime(2011, 3, 1)

        late_start = datetime(2011, 3, 1)
        late_end = datetime(2011, 5, 1)

        early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
        late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())

        early_dr.union(late_dr, sort=sort)
Beispiel #27
0
 def set_current(self):
     now = datetime.date.today()
     offset_m, offset_q = MonthEnd(), QuarterEnd()
     self.newest_date['M'] = offset_m.rollback(now)
     self.newest_date['Q'] = offset_q.rollback(now)
     self.newest_date['D'] = now - timedelta(days=1)
     self.newest_date['Y'] = YearEnd().rollback(now)
     half1 = datetime.date(now.year, 6, 30)
     half2 = datetime.date(now.year, 12, 31)
     if now < half1:
         self.newest_date['H'] = datetime.date(now.year - 1, 12, 31)
     elif now < half2:
         self.newest_date['H'] = half1
     else:
         self.newest_date['H'] = half2
Beispiel #28
0
 def BackTest_MinVar(self):
     d = self.Dates[0]
     for i in xrange(
             int(
                 self.Periods_Diff(self.Dates[0], self.Dates[1]) /
                 self.Frequency)):
         d_f = d - BMonthBegin() * self.Histo_Length
         vcov, assets = self.Benchmark.Extract_VCov_Matrix([d_f, d],
                                                           d - MonthEnd())
         self.MinVariance(vcov, 0.001)
         bckData = self.Benchmark.Extract_Returns(
             [d + BDay(), (d + BDay()) + BMonthEnd() * self.Frequency],
             d - MonthEnd()).loc[:, assets]
         self.Compute_Performance(bckData)
         self.Wght_Histo[d] = pd.DataFrame(self.Weights, index=assets)
         d = (d + BMonthEnd() * self.Frequency) + BDay()
        def gen_dr(df_row: pd.DataFrame) -> pd.date_range:
            start_yr = int(df_row['本批次周期'][0])
            start_mon = int(df_row['本批次周期'][1])

            # 本批次 起始日期
            if df_row['本批次周期'][2] == u'上旬':
                start_d = 1
            elif df_row['本批次周期'][2] == u'中旬':
                start_d = 11
            elif df_row['本批次周期'][2] == u'整月':
                start_d = 1
            else:
                start_d = 16
                if df_row['上一批次周期'][4] == '中旬':
                    start_d = 21
            start_date = date(start_yr, start_mon, start_d)

            # 本批次 结束日期
            end_yr = int(df_row['本批次周期'][0])
            end_mon = int(df_row['本批次周期'][3])
            if df_row['本批次周期'][4] == u'中旬':
                end_d = 20
            elif df_row['本批次周期'][4] == u'下旬' or df_row['本批次周期'][4] == u'整月':
                end_d = (date(end_yr, end_mon, 1) + MonthEnd(1)).day
            else:
                end_d = 15
                if df_row['下一批次周期'][2] == '中旬':
                    end_d = 10
            end_date = date(end_yr, end_mon, end_d)
            dt_range = pd.date_range(start_date, end_date)
            return dt_range
Beispiel #30
0
def merging_with_macros(processed_companies, processed_macro_from_daily,
                        processed_macro_from_monthly: DataFrame) -> DataFrame:
    macro = processed_macro_from_daily.merge(processed_macro_from_monthly,
                                             how="left",
                                             left_index=True,
                                             right_index=True)

    lagging_variables = [
        LOG_EXPORT, LOG_IMPORT, LOG_INDUSTRY_PRODUCTION_US,
        LOG_INDUSTRY_PRODUCTION_EURO, LOG_INDUSTRY_PRODUCTION_KOR
    ]

    contemporaneous_variables = list(
        set(list(macro.columns)) - set(lagging_variables))

    macro_contemporaneous = macro[contemporaneous_variables].copy(deep=True)
    macro_lagging = macro[lagging_variables].copy(deep=True)

    processed_companies = processed_companies.merge(macro_contemporaneous,
                                                    how='left',
                                                    left_on="date",
                                                    right_index=True)

    macro_lagging['mdate_lag'] = macro_lagging.index + MonthEnd(-1)
    macro_lagging = macro_lagging.reset_index().set_index('mdate_lag').copy(
        deep=True)
    macro_lagging = macro_lagging.drop(columns='date').copy(deep=True)

    # Lagging
    merged_companies = processed_companies.merge(macro_lagging,
                                                 how='left',
                                                 left_on="date",
                                                 right_index=True)

    return merged_companies
Beispiel #31
0
def process_geracao_folha(pfunc: DataFrame, ppessoa: DataFrame,
                          pparam: DataFrame, psecao: DataFrame,
                          pfhstaft: DataFrame) -> DataFrame:
    """
        TODO: doc string

        Afastamento menor ou igual a 15 dias, não deve ser levado para a staging Gerou Folha.
	        Exemplo 1: Início em 25/06 e término em 30/06 - 5 dias(Não deve constar na Gerou Folha)
	        Exemplo 2: Início em 25/06 e término em 10/07 - 15 dias(Não deve constar na Gerou Folha)
        Afastamento maior que 15 dias deve verificar se contempla o mês inteiro, caso positivo deve ser levado para o Gerou Folha. Caso negativo o mês não deve ser considerado no Gerou Folha.
	        Exemplo 1: Início em 15/05 e término em 10/07 - 56 dias(Deve levar para o gerou folha, apenas a informação referente ao mês 6)
	        Exemplo 2: Início em 15/05 e término em 15/06 - 30 dias(Não deve levar o funcionário para o Gerou Folha)
        Afastamento sem data final, deve verificar se contempla o mês inteiro, caso positivo deve ser levado para o Gerou Folha
	        Exemplo 1: Início em 20/04 e sem datafim(Deve levar para o mês 6 e para os meses seguintes até existir uma datafim)
	        Exemplo 2: Início em 15/04 e sem datafim(Deve levar para o mês 5, 6 e para os meses seguintes até existir uma datafim)
    """
    #  .query('(_dtinicio - _dtfinal).dt.days > 15')

    return (
        pfunc.merge(ppessoa,
                    left_on=['codpessoa'],
                    right_on=['codigo'],
                    how='inner').merge(psecao,
                                       left_on=['codcoligada', 'codsecao'],
                                       right_on=['codcoligada', 'codigo'],
                                       how='inner').
        query('codsituacao != "E" & codsituacao != "W" & codsituacao != "R"').
        merge(pparam,
              left_on=['codcoligada'],
              right_on=['codcoligada'],
              how='inner').merge(
                  pfhstaft,
                  left_on=['codcoligada', 'chapa'],
                  right_on=['codcoligada', 'chapa'],
                  how='inner').query('tipoafastamento != "N"').assign(
                      _dtinicio=lambda df: df['dtinicio']).assign(
                          _dtfinal=lambda df: df['dtfinal']).replace(
                              {
                                  '_dtfinal': '0001-01-01T00:00:00.000Z',
                                  '_dtinicial': '0001-01-01T00:00:00.000Z'
                              }, '2099-12-31T23:59:59.000Z').
        assign(_dtinicio=lambda df: to_datetime(
            df['_dtinicio'], format='%Y-%m-%dT%H:%M:%S.%f'
        ).dt.tz_convert(None)).assign(_dtfinal=lambda df: to_datetime(
            df['_dtfinal'], format='%Y-%m-%dT%H:%M:%S.%f'
        ).dt.tz_convert(None)).assign(_dtcomp_endmonth=lambda df: to_datetime(
            df['mescomp'].astype(str) + '-' + df['anocomp'].astype(str),
            format='%m-%Y'
        ) + MonthEnd(0)).query(
            '(_dtinicio.dt.month <= mescomp & _dtinicio.dt.year <= anocomp) & (_dtfinal.dt.month >= mescomp & _dtfinal.dt.year >= anocomp) & (_dtfinal - _dtinicio).dt.days > 15 & abs((_dtcomp_endmonth - _dtinicio).dt.days) > 15 & _dtcomp_endmonth <= _dtfinal'
        ).assign(gerou_folha=lambda df: ((df['_dtinicio'].dt.month == df[
            'mescomp']) & (df['_dtinicio'].dt.year == df['anocomp'])) |
                 ((df['_dtfinal'].dt.month == df['mescomp']) &
                  (df['_dtfinal'].dt.year == df['anocomp']))).assign(
                      cnpj=lambda df: df['cgc'].str.replace(r'\.|\/|\-', ''))[[
                          'anocomp', 'mescomp', 'cpf', 'cnpj', 'gerou_folha'
                      ]].rename({
                          'anocomp': 'ano',
                          'mescomp': 'mes'
                      }, axis=1))
def slide7():
    from pandas.tseries.offsets import Hour, Minute
    hour = Hour()
    print hour
    four_hours = Hour(4)
    print four_hours
    print pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')

    print Hour(2) + Minute(30)
    print pd.date_range('1/1/2000', periods=10, freq='1h30min')

    ts = Series(np.random.randn(4),
                index=pd.date_range('1/1/2000', periods=4, freq='M'))
    print ts
    print ts.shift(2)
    print ts.shift(-2)
    print '2 M'
    print ts.shift(2, freq='M')
    print '3 D'
    print ts.shift(3, freq='D')
    print '1 3D'
    print ts.shift(1, freq='3D')
    print '1 90T'
    print ts.shift(1, freq='90T')

    print 'shifting dates with offsets'
    from pandas.tseries.offsets import Day, MonthEnd
    now = datetime(2011, 11, 17)
    print now + 3 * Day()
    print now + MonthEnd()
    print now + MonthEnd(2)

    offset = MonthEnd()
    print offset
    print offset.rollforward(now)
    print offset.rollback(now)

    ts = Series(np.random.randn(20),
                index=pd.date_range('1/15/2000', periods=20, freq='4d'))
    print ts.groupby(offset.rollforward).mean()
def timestamp_rollforward_rollback():
    """ How to role the date forward (end of time) or backward (beg of time) """
    now = datetime(2014, 4, 15)
    print "Current time is:", now
    now = now + 3 * Day()
    print "Adding 3 days to now:", now

    offset = MonthEnd()
    now = offset.rollforward(now)
    print "Rolling foward to last day of the month", now

    offset = MonthBegin()
    now = offset.rollback(now)
    print "Rolling foward to first day of the month", now

    ts = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000',
        periods=20, freq='4d'))
    print "Original Time Series is:\n", ts

    offset = YearBegin()
    ts = ts.groupby(offset.rollforward).mean()
    print "Time Series after rolling forward\n", ts
Beispiel #34
0
ts = Series(np.random.randn(4),
            index=pd.date_range('1/1/2000', periods=4, freq='M'))
print(ts)
print(ts.shift(2))
print(ts / ts.shift(1) - 1)
print(ts.shift(2, freq='M'))
print(ts.shift(3, freq='D'))
print(ts.shift(1, freq='3D'))

now = datetime(2011, 11, 17)
print(now + 3 * Day())
print(now + MonthEnd())
print(now + MonthEnd(2))

offset = MonthEnd()
print(offset.rollforward(now))
print(offset.rollback(now))

ts = Series(np.random.randn(20),
            index=pd.date_range('1/15/2000', periods=20, freq='4d'))
print(ts)
print(ts.groupby(offset.rollforward).mean())
print(ts.resample('M').mean())

rng = pd.date_range('1/1/2000', periods=3, freq='M')
ts = Series(np.random.randn(3), index=rng)
pts = ts.to_period()
print(ts)
print(pts)
Beispiel #35
0
)
ts
ts.shift(2)
ts.shift(-2)
ts/ts.shift(1)-1
ts.shift(2,freq='M')
ts.shift(3,freq='D')
ts.shift(1,freq='3D')
ts.shift(1,freq='90T')
from pandas.tseries.offsets import Day, MonthEnd
now  = datetime(2011,11,17)
now+3*Day()
now+MonthEnd()
now+MonthEnd(2)
now+MonthEnd(3)
offset =MonthEnd()
offset.rollforward(now)
offset.rollback(now)
ts=Series(np.random.randn(20),index=pd.date_range('1/15/2000',periods=20,freq='4d')
)
ts.groupby(offset.rollforward).mean()
ts.resample('M',how='mean')
ts.resample('M').mean()
import pytz
pytz.common_timezones[-5:]
tz=pytz.timezone('US/Eastern')
tz
rng=pd.date_range('3/9/2-12 9:30',periods=6,freq='D')
ts=Series(np.random.randn(len(rng)),index=rng)
ts
print(ts.index.tz)
Beispiel #36
0
# 通常是用shift来计算一个时间序列或者多个时间序列中百分比的变化
print ts / ts.shift(1) - 1

# 只是移动shift并不会修改索引, 可以指定频率来对时间戳移动, 而不是移动数据,然后产生NaN
print ts.shift(2, freq='M')
print ts.shift(3, freq='D')
print ts.shift(1, freq='3D')
print ts.shift(1, freq='90T')

# 通过偏移量对日期进行位移
now = datetime(2011, 11, 17)
print now + 3 * Day()

# 可以加锚点偏移量, 比如MonthEnd, 在第一次会将原日期前滚(未来)到符合频率规则的下一个日期
print now + MonthEnd()
print now + MonthEnd(2)

# 对于锚点偏移量的rollforward和rollback方法,可以直接指定前滚(未来)/后滚(以前)
offset = MonthEnd()
# 前滚
print offset.rollforward(now)
# 后滚
print offset.rollback(now)

# 可以结合groupby使用这两个滚动方法
ts = Series(np.random.randn(20), index=pd.date_range('1/15/2000', periods=20, freq='4d'))
print ts.groupby(offset.rollforward).mean()
# 等价于
print ts.resample('M', how='mean')