Python USFederalHolidayCalendar.holidaysの例、pandas.tseries.holiday.USFederalHolidayCalendar.holidays Pythonの例

コード例 #1

0

ファイルを表示

ファイル: kernels.py プロジェクト: afcarl/gplib

 def calc_holidays(start_date=datetime.datetime(2012, 10, 1),
                   end_date=datetime.datetime.now()):
     cal = USFederalHolidayCalendar()
     holidays = np.ones(cal.holidays().shape[0])
     holidays = pd.Series(holidays, index=cal.holidays()).resample("D").fillna(0.0)
     h = holidays[start_date:end_date].values
     h = np.where(h)[0] - 1
     return h

コード例 #2

0

ファイルを表示

ファイル: test_holiday.py プロジェクト: israelzuniga/pandas

    def test_calendar(self):

        calendar = USFederalHolidayCalendar()
        holidays = calendar.holidays(self.start_date, self.end_date)

        holidays_1 = calendar.holidays(self.start_date.strftime("%Y-%m-%d"), self.end_date.strftime("%Y-%m-%d"))
        holidays_2 = calendar.holidays(Timestamp(self.start_date), Timestamp(self.end_date))

        self.assertEqual(list(holidays.to_pydatetime()), self.holiday_list)
        self.assertEqual(list(holidays_1.to_pydatetime()), self.holiday_list)
        self.assertEqual(list(holidays_2.to_pydatetime()), self.holiday_list)

コード例 #3

0

ファイルを表示

ファイル: test_holiday.py プロジェクト: xcompass/pandas

    def test_calendar(self):

        calendar = USFederalHolidayCalendar()
        holidays = calendar.holidays(self.start_date, self.end_date)

        holidays_1 = calendar.holidays(self.start_date.strftime('%Y-%m-%d'),
                                       self.end_date.strftime('%Y-%m-%d'))
        holidays_2 = calendar.holidays(Timestamp(self.start_date),
                                       Timestamp(self.end_date))

        assert list(holidays.to_pydatetime()) == self.holiday_list
        assert list(holidays_1.to_pydatetime()) == self.holiday_list
        assert list(holidays_2.to_pydatetime()) == self.holiday_list

コード例 #4

0

ファイルを表示

ファイル: test_holiday.py プロジェクト: DusanMilunovic/pandas

    def test_calendar(self):

        calendar = USFederalHolidayCalendar()
        holidays = calendar.holidays(self.start_date, self.end_date)

        holidays_1 = calendar.holidays(
            self.start_date.strftime('%Y-%m-%d'),
            self.end_date.strftime('%Y-%m-%d'))
        holidays_2 = calendar.holidays(
            Timestamp(self.start_date),
            Timestamp(self.end_date))

        assert list(holidays.to_pydatetime()) == self.holiday_list
        assert list(holidays_1.to_pydatetime()) == self.holiday_list
        assert list(holidays_2.to_pydatetime()) == self.holiday_list

コード例 #5

0

ファイルを表示

ファイル: Airline Delays - Data Processing Pipeline.py プロジェクト: stevendleung/Predicting_Airline_Delays

def holiday_column(airline_df, start='2014-01-01', end='2018-12-31'):
    '''Takes airline df and returns df with column indicating if
  date of flight is a US Federal Holiday'''

    #Pull Holiday Dates and convert to Spark DF with timestamp column
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start, end).to_pydatetime()
    holidays_df = pd.DataFrame(pd.DataFrame(holidays)[0].astype('string'))
    schema = StructType([StructField('Holiday_Date', StringType())])
    holidays_sc = spark.createDataFrame(holidays_df, schema)
    holidays_sc = holidays_sc.select(
        to_timestamp(holidays_sc.Holiday_Date,
                     'yyyy-MM-dd').alias('holiday_date'))

    #Join holidays to airlines
    holiday_joined_df = airline_df.join(
        holidays_sc, (airline_df.fl_date == holidays_sc.holiday_date), 'left')

    #Change date column to binary
    holiday_joined_df = holiday_joined_df.withColumn(
        "Holiday", (f.col('holiday_date').isNotNull()).cast("integer"))

    #Drop redundant holiday_date column
    holiday_joined_df = holiday_joined_df.drop(holiday_joined_df.holiday_date)

    return holiday_joined_df

コード例 #6

0

ファイルを表示

def pandas_holidays():
    from pandas.tseries.holiday import USFederalHolidayCalendar
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2014-01-01',
                            end='2014-12-31').to_pydatetime()
    if datetime.datetime(2014, 1, 1) in holidays:
        print(True)

コード例 #7

0

ファイルを表示

    def __init__(
            self,
            path: str = None,
            data: pd.DataFrame = pd.DataFrame(),
            holdings: pd.DataFrame = pd.DataFrame(),
    ):
        """Initialize the portfolio with holdings and market data.

        Args:
            path: The name of an a directory containing holdings and market data. If this
                is None then the portfolio must be described by the data and holdings arguments.
            data: A DataFrame  containing market close data for a set of
                financial instruments over a period of time.
            holdings:   A DataFrame containing the number of shares held of the set of
                symbols in data over a time period corresponding to that of data.
        """
        if len(data) > 0 and len(holdings) > 0:
            logger.debug("Data and holdings arguments are set.")
            self.data = data
            self.holdings = holdings
        else:
            logger.debug("Data and holdings are not set.")
        if path is None:
            path = Path().home() / ".portfolio" / "data"
        self.path = Path(path)
        if not self.path.is_dir():
            logger.info("%s is not a directory, creating it...", self.path)
            self.path.mkdir()
        self.data_file = self.path / "prices.feather"
        self.holdings_file = self.path / "holdings.feather"
        try:
            # The feather format does not support date indices,
            # so set the Date colemn to be the index.
            self.holdings = pd.read_feather(
                self.holdings_file).set_index("date")
        except FileNotFoundError:
            logger.info("No stored holdings found.")
            self.holdings = holdings
        symbols = self.holdings.columns
        try:
            # The feather format does not support date indices,
            # so set the Date colemn to be the index.
            self.data = pd.read_feather(self.data_file).set_index("date")
        except FileNotFoundError:
            logger.info("Market data is not stored .")
        calendar = USFederalHolidayCalendar()
        today = pd.Timestamp.today().normalize()
        last_business_day = min(today - BDay(1),
                                self.data.index.max() + BDay(1))
        holidays = calendar.holidays(last_business_day, today)
        if (today not in self.data.index
                and last_business_day not in self.data.index
                and holidays.empty):
            self.data = self.get_market_data(
                symbols,
                start=last_business_day,
                end=today,
            )
            self.holdings = self.holdings.reindex(self.data.index,
                                                  method="ffill").dropna()

コード例 #8

0

ファイルを表示

ファイル: data_handler.py プロジェクト: mrktrvr/tasks

def get_us_holidays(df: pd.DataFrame = None) -> pd.DataFrame:
    '''
    holidays = get_us_holidays(df)

    '''
    ushc = USFederalHolidayCalendar()

    if df is not None:
        t_min = df.index.min()
        t_max = df.index.max()
        vals = [x for x in ushc.holidays() if (t_min <= x) & (x <= t_max)]
        holidays = pd.to_datetime(vals)
    else:
        holidays = pd.to_datetime(ushc.holidays())

    return holidays

コード例 #9

0

ファイルを表示

ファイル: getFeatures.py プロジェクト: w33ab00/IRDM2016

def isHoliday(d):
    cal = USFederalHolidayCalendar()
    y_str = d.strftime("%Y")
    holidays = cal.holidays(start=y_str+'-01-01', end=y_str+'-12-31').to_pydatetime()
    if d in holidays:
        return 1
    else:
        return 0

コード例 #10

0

ファイルを表示

def get_holiday_list(start_yr, start_mon, start_day, end_yr, end_mon, end_day):
    calendar = USFederalHolidayCalendar()
    holiday_list = calendar.holidays(datetime.datetime(start_yr, 11, 1),
                                     datetime.datetime(end_yr, 5, 1))
    holidays = []
    for holiday in holiday_list:
        holidays.append(holiday)
    return holidays

コード例 #11

0

ファイルを表示

def holiday_lists():
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2021-01-01',
                            end='2021-12-31').to_pydatetime()
    hs = []
    for h in holidays:
        hs.append(h.strftime("%Y/%m/%d"))
    return hs

コード例 #12

0

ファイルを表示

ファイル: extract.py プロジェクト: chnhgn/Machine-Learning

 def near_holiday(self):
     calendar = USFederalHolidayCalendar()
     holidays = calendar.holidays()
     df1 = self.df_raw[['id', 'date_account_created']]
     df1.date_account_created = pd.to_datetime(df1.date_account_created)
     df1.date_account_created = pd.to_datetime(df1.date_account_created.dt.date)
     df1['near_holiday'] = (df1.date_account_created.isin(holidays + timedelta(days=1)) | df1.date_account_created.isin(holidays - timedelta(days=1)))
     return df1[['id', 'near_holiday']]

コード例 #13

0

ファイルを表示

ファイル: mianPD.py プロジェクト: ycc1107/pandasStock

 def __init__(self,epsPath,pricePath,daysNeed = 3):
     self.raw_eps = pd.read_csv(epsPath)
     self.price = pd.read_csv(pricePath)
     self.daysNeed = daysNeed
     self.__getEps()
     minDate = str(min(self.epsBefore['DATE'],self.epsAfter['DATE']))
     maxDate = str(max(self.epsBefore['DATE'],self.epsAfter['DATE']))
     cal = USFederalHolidayCalendar()
     self.holidays = cal.holidays(start=minDate, end=maxDate).to_pydatetime()

コード例 #14

0

ファイルを表示

ファイル: stocks.py プロジェクト: pdghawk/systrade

    def get_path(self,
                 initial_values,
                 t_start,
                 t_end,
                 freq,
                 interest_rate,
                 seed=None):
        # check initial value os the right size and dimension
        #print("getting path " , freq.)
        # set up parameters of the pathway model
        covar = stats.corr_to_cov(self.correlation_matrix, self.sigmas)
        covar_param = param.SimpleArrayParam(covar)

        chol = linalg.cholesky(covar, lower=True)
        chol[chol < 1.0e-9] = 0.0
        cholesky_param = param.SimpleArrayParam(chol)

        r_param = param.SimpleParam(interest_rate)

        # create the index of times that are in market hours between
        # requested times
        timeindex = pd.date_range(start=t_start, end=t_end, freq=freq)
        # get the frequency (in seconds) now before removing non-market times
        freq_in_secs = pd.to_timedelta(timeindex.freq,
                                       unit='s').total_seconds()
        # only trading hours
        timeindex = timeindex[timeindex.indexer_between_time('09:30', '16:00')]
        # only weekdays
        timeindex = timeindex[~(timeindex.dayofweek > 4)]
        # remove fed holidays
        cal = USFederalHolidayCalendar()
        hols = cal.holidays(start=timeindex.min(), end=timeindex.max())
        timeindex = timeindex[~timeindex.isin(hols)]

        # get array of time in yearly units and get stock pathways
        times = np.arange(
            0, len(timeindex)) * freq_in_secs / MARKET_SECONDS_PER_YEAR

        # seed and create the pathway generator object
        np.random.seed(seed=seed)
        path_maker = path.GeometricDiffusionManyAsset(self.generator, r_param,
                                                      covar_param,
                                                      cholesky_param)

        if len(times) > 0:
            s_paths = path_maker.get_single_timed_path(initial_values, times)
        else:
            raise RunTimeError('Trying to generate stocks on empty time list')

        # put all data into a pandas Dataframe
        stocks_df = pd.DataFrame(index=timeindex,
                                 data=s_paths,
                                 columns=self.stock_names)
        print("internal : ",
              stocks_df.groupby(stocks_df.index.dayofweek).sum())
        np.random.seed(seed=None)
        return stocks_df

コード例 #15

0

ファイルを表示

def next_business_day(date):
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays()
    date = date + BDay(1)
    date = date.to_pydatetime()
    while date in holidays:
        date = date + BDay(1)
        date = date.to_pydatetime()
    return date

コード例 #16

0

ファイルを表示

ファイル: Fetch_Data_Stock_US_Weekly.py プロジェクト: zhangyunGit/StockRecommendSystem

def judgeOpenDaysInRange(from_date, to_date):
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(from_date, to_date)
    duedays = pd.bdate_range(from_date, to_date)
    df = pd.DataFrame()
    df['date'] = duedays
    df['holiday'] = duedays.isin(holidays)
    opendays = df[df['holiday'] == False]
    return opendays

コード例 #17

0

ファイルを表示

def create_ts_calendar(df, ts_settings, additional_events=None):
    """
    df: pandas df
    ts_settings: dict
        Parameters for time series project
    additional_events: pandas df(optional)
        df of additional events to add to calendar

    Returns:
    --------
    Calendar of events

    """
    date_col = ts_settings['date_col']

    cal = USFederalHolidayCalendar(AbstractHolidayCalendar)

    black_friday = Holiday(
        "Black Friday",
        month=11,
        day=1,
        offset=[pd.DateOffset(weekday=TH(4)),
                pd.DateOffset(1)])
    cyber_monday = Holiday(
        "Cyber Monday",
        month=11,
        day=1,
        offset=[pd.DateOffset(weekday=TH(4)),
                pd.DateOffset(4)])
    cal.rules.append(black_friday)
    cal.rules.append(cyber_monday)

    cal.rules[9] = Holiday('Christmas', month=12, day=25)
    cal.rules[4] = Holiday('July 4th', month=7, day=4)

    events = pd.DataFrame(
        cal.holidays(
            start=pd.to_datetime(df[date_col].min()),
            end=pd.to_datetime(df[date_col].max()) + dt.timedelta(days=365),
            return_name=True,
        ))
    events = events.reset_index()
    events.columns = ['Date', 'Event']

    if additional_events is not None:
        assert additional_events.shape[
            1] == 2, 'additional events must be a df with 2 columns'
        additional_events.columns = ['Date', 'Event']
        additional_events['Date'] = pd.to_datetime(additional_events['Date'])
        events = events.append(additional_events)

    events['Date'] = [
        dt.datetime.strftime(pd.to_datetime(date), '%Y-%m-%d')
        for date in events['Date']
    ]
    return events.drop_duplicates().sort_values(by='Date').reset_index(
        drop=True)

コード例 #18

0

ファイルを表示

class USHoliday(Feature):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.n_periods = 2
        self.cal = USFederalHolidayCalendar()

    def indexer(self, index, column=None):
        holidays = self.cal.holidays(start=index.min() - pd.Timedelta('2d'),
                                     end=index.max() + pd.Timedelta('2d'))
        return np.isin(index.date, holidays.date)

コード例 #19

0

ファイルを表示

ファイル: TimeSeries.py プロジェクト: HuangYiran/data_mining

def create_date_feature_is_public_holiday(df, date, start, end, country = 'US'):
    """
    create date feature: is holiday
    only support USFederal Holiday
    """
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=start, end=end).to_pydatetime()
    foo = lambda x: 1 if x in holidays else 0
    df['is_holiday'] = df[date].map(foo)
    return df

コード例 #20

0

ファイルを表示

def holidays_check(forecast_date):
    ''' Check whether the prediction date a holiday or weekend.
        The programme will select the last working from the date that users select '''

    calendar = USFederalHolidayCalendar()
    holiday = calendar.holidays(start=forecast_date, end=forecast_date)
    if not holiday.empty or forecast_date.weekday() >= 5:
        print(
            '***Notice***\nThe date that you want to predict is a holiday or weekend'
        )
        print('Our programme will choose the last working date')
        while not holiday.empty or forecast_date.weekday(
        ) >= 5:  # check if the date is a holiday or weekend
            forecast_date = forecast_date - timedelta(1)  # move 1 day backward
            holiday = calendar.holidays(start=forecast_date, end=forecast_date)
        print('The forecast date is changed to {}'.format(
            forecast_date.strftime('%m/%d/%Y')))

    return forecast_date

コード例 #21

0

ファイルを表示

ファイル: date_utils.py プロジェクト: theJollySin/ESTA

def find_holidays(base_year):
    ''' Using Pandas calendar, find all 10 US Federal Holidays,
        plus California's Cesar Chavez Day (March 31).
    '''
    yr = str(base_year)
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=yr + '-01-01',
                            end=yr + '-12-31').to_pydatetime()

    return [d.strftime('%m-%d') for d in holidays] + ['03-31']

コード例 #22

0

ファイルを表示

ファイル: generateCsvFiltered.py プロジェクト: jeanlks/ARP

def getHolidays(dates):
    holidaysVectorReturn  = []
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2015-01-01', end='2017-12-31').to_pydatetime()
    for idx, date in enumerate(dates):
         if date in holidays:
             holidaysVectorReturn.append(1)
         else:
             holidaysVectorReturn.append(0)
    return  holidaysVectorReturn

コード例 #23

0

ファイルを表示

ファイル: mianPD.py プロジェクト: ycc1107/pandasStock

 def __init__(self, epsPath, pricePath, daysNeed=3):
     self.raw_eps = pd.read_csv(epsPath)
     self.price = pd.read_csv(pricePath)
     self.daysNeed = daysNeed
     self.__getEps()
     minDate = str(min(self.epsBefore['DATE'], self.epsAfter['DATE']))
     maxDate = str(max(self.epsBefore['DATE'], self.epsAfter['DATE']))
     cal = USFederalHolidayCalendar()
     self.holidays = cal.holidays(start=minDate,
                                  end=maxDate).to_pydatetime()

コード例 #24

0

ファイルを表示

    def __init__(self, data_df, date):
        # df = pd.read_csv(path, date_parser='Date')
        self.date = date
        df = data_df.copy()
        # df = pd.read_csv(path, date_parser='Date')
        # df = data_df.copy()
        test = df[[
            'Date', 'Hour', 'Weekday', 'Month', 'Load', 'Mean_Temp',
            'Mean_Humi', 'RIV_Temp', 'RIV_Humi', 'LAX_Temp', 'LAX_Humi',
            'USC_Temp', 'USC_Humi', 'WJF_Temp', 'WJF_Humi', 'TRM_Temp',
            'TRM_Humi'
        ]]

        test.loc[:, 'RIV_Temp_Log'] = np.log(df['RIV_Temp'])

        test.loc[:, 'Load_Log'] = np.log(df['Load'])
        test['Load_Lag_48'] = test['Load_Log'].shift(48, axis=0)
        # test['Temp_Lag_48'] = test['Mean_Temp'].shift(48, axis=0)
        test['Humi_Lag_48'] = test['Mean_Humi'].shift(48, axis=0)
        test['RIV_Temp_Log_Lag_48'] = test['RIV_Temp_Log'].shift(48, axis=0)

        # test['RIV_Temp_Lag_48']= test['RIV_Temp'].shift(48, axis=0)

        cal = USFederalHolidayCalendar()

        holidays = cal.holidays(start='2014-01-01',
                                end=str(datetime.datetime.now()),
                                return_name=True)

        holidays = pd.DataFrame(holidays)

        holidays = holidays.reset_index()
        holidays = holidays.rename(columns={'index': "Date", 0: 'Holiday'})
        holidays['Date'] = pd.to_datetime(holidays['Date'])

        test['Date'] = pd.to_datetime(test['Date'])
        lm_data = test.loc[49:len(test), ].merge(holidays,
                                                 how='left',
                                                 on='Date')
        lm_data['Holiday'] = lm_data['Holiday'].fillna("Not Holiday")

        lm_data[["Hour", "Weekday", "Month",
                 "Holiday"]] = lm_data[["Hour", "Weekday", "Month",
                                        "Holiday"]].astype('category')

        DateTime = pd.DataFrame(
            lm_data.apply(lambda line: pd.to_datetime(line['Date']) + datetime.
                          timedelta(hours=line['Hour']),
                          axis=1))
        DateTime.columns = ['DateTime']

        self.lm_data = pd.concat([DateTime, lm_data], axis=1)
        self.lm_data.set_index('DateTime', inplace=True)

コード例 #25

0

ファイルを表示

ファイル: stack.py プロジェクト: happyphonon/Airbnb

def build_features(data, features):
    #Timestamp First Active
    data['timestamp_first_active'] = data['timestamp_first_active'].astype(str)
    data['timestamp_first_active_date'] = data['timestamp_first_active'].str[:8]
    data['timestamp_first_active_date'] = pd.to_datetime(data['timestamp_first_active_date'], format='%Y%m%d')
    data['tfa_month'] = data['timestamp_first_active_date'].map(lambda x : x.month)
    data['tfa_year'] = data['timestamp_first_active_date'].map(lambda x : x.year)
    data['tfa_day'] = data['timestamp_first_active_date'].map(lambda x : x.day)
    data['tfa_dayofyear'] = data.timestamp_first_active_date.dt.dayofyear
    data['tfa_dayofweek'] = data.timestamp_first_active_date.dt.dayofweek
    data['tfa_week'] = data.timestamp_first_active_date.dt.week
    data['tfa_quarter'] = data.timestamp_first_active_date.dt.quarter
    features.extend(['tfa_day','tfa_month','tfa_year','tfa_dayofyear','tfa_dayofweek','tfa_week','tfa_quarter'])
    #TFA Holidays
    #calendar = USFederalHolidayCalendar()
    #tfa_holidays = calendar.holidays(start=data['timestamp_first_active_date'].min(),end=data['timestamp_first_active_date'].max())
    #for i in range(len(tfa_holidays)):
        #data['tfa_holiday_diff_'+str(i)] = data['timestamp_first_active_date'].map(lambda x : (x-tfa_holidays[i]).days)
        #data['tfa_holiday_diff_'+str(i)] = data['tfa_holiday_diff_'+str(i)].map(holiday_transform)
        #data_dummy = pd.get_dummies(data['tfa_holiday_diff_'+str(i)],prefix='tfa_holiday_diff_'+str(i))
        #features.extend(data_dummy.columns.values)
        #data.drop(['tfa_holiday_diff_'+str(i)],axis=1,inplace=True)
        #data = pd.concat((data,data_dummy),axis=1)
        #features.extend('tfa_holiday_diff_'+str(i))
    #Date Account Created
    data['date_account_created'] = pd.to_datetime(data['date_account_created'])
    data['dac_month'] = data['date_account_created'].map(lambda x : x.month)
    data['dac_year'] = data['date_account_created'].map(lambda x : x.year)
    data['dac_day'] = data['date_account_created'].map(lambda x : x.day)
    data['dac_dayofyear'] = data.date_account_created.dt.dayofyear
    data['dac_dayofweek'] = data.date_account_created.dt.dayofweek
    data['dac_week'] = data.date_account_created.dt.week
    data['dac_quarter'] = data.date_account_created.dt.quarter
    features.extend(['dac_year','dac_month','dac_day','dac_dayofyear','dac_dayofweek','dac_week','dac_quarter'])
    #DAC Holidays
    calendar = USFederalHolidayCalendar()
    dac_holidays = calendar.holidays(start=data['date_account_created'].min(),end=data['date_account_created'].max())
    for i in range(len(dac_holidays)):
        data['dac_holiday_diff_'+str(i)] = data['date_account_created'].map(lambda x : (x-dac_holidays[i]).days)
        data['dac_holiday_diff_'+str(i)] = data['dac_holiday_diff_'+str(i)].map(holiday_transform)
        features.extend(['dac_holiday_diff_'+str(i)])
    #Days Difference Between TFA and DAC
    data['days_diff'] = (data['date_account_created'] - data['timestamp_first_active_date']).dt.days
    #data['days_diff'] = data['days_diff'].map(holiday_transform)
    features.extend(['days_diff'])
    data.drop(['date_account_created','timestamp_first_active','timestamp_first_active_date'],axis=1,inplace=True)
    other_features = ['gender', 'signup_method','signup_flow','language','affiliate_channel', 'affiliate_provider', 'first_affiliate_tracked', 'signup_app', 'first_device_type', 'first_browser']
    for f in other_features:
        data_dummy = pd.get_dummies(data[f],prefix=f)
        features.extend(data_dummy.columns.values)
        data.drop([f],axis=1,inplace=True)
        data = pd.concat((data,data_dummy),axis=1)
    return data

コード例 #26

0

ファイルを表示

ファイル: model.py プロジェクト: adelina-ie/MLOPS

def predict(dict):
    """
    This function pickles the
    model and generates the test
    features by passing the dict
    parameter.
    """

    folder_selected = folderSelect()
    filename = os.path.join(folder_selected, "model.joblib")
    if filename is None:
        train_and_persist()
    else:
        with open(filename, "rb") as f:
            model = load(f)
    testDf = pd.DataFrame(dict, index=[0])
    testDf["hr"] = testDf["date"].dt.hour
    testDf["yr"] = testDf["date"].dt.year - testDf["date"].dt.year.min()
    testDf["mnth"] = testDf["date"].dt.month
    testDf["season"] = testDf["date"].map(get_season)
    testDf["weekday"] = testDf["date"].dt.weekday
    testDf["dteday"] = testDf["date"].dt.day
    testDf["dteday"] = pd.to_datetime(testDf["dteday"])
    cal = USFederalHolidayCalendar()
    holidays = pd.to_datetime(cal.holidays(start="2011-01-01", end="2011-06-30"))
    testDf["holiday"] = pd.to_datetime(testDf["date"]).dt.date in holidays
    testDf["workingday"] = pd.to_datetime(testDf["date"]).dt.date not in holidays
    testDf["holiday"] = testDf["holiday"].map(lambda x: 1 if x else 0)
    testDf["workingday"] = testDf["workingday"].map(lambda x: 1 if x else 0)
    t_max , t_min = 50, -8
    # This is for tempretaure normalization
    testDf["temp"] = (testDf["temperature_C"] - t_min)/(t_max-t_min)
    # We divide humidity by 100 to scale it between 0 and 1
    testDf["hum"] = testDf["humidity"]/100
    testDf = testDf.drop(columns=["temperature_C", "humidity"])
    # Convert the data type to eithwe category or to float
    testDf = generating_new_features(testDf)
    testDf = pd.get_dummies(testDf)
    # Finally start with Machine Learning
    test = testDf.drop(columns=["date", "dteday", "feeling_temperature_C"])
    # savedir = Path.home()
    # filename = os.path.join(savedir, "model.joblib")
    # with open(filename, "rb") as f:
    #     model = load(f)
    # print("done!")
    # f.close()
    train_X, train_y, test_X, test_y = prepare_train_data()
    train_features = train_X.columns.values
    test = fill_missing_features(test, train_features)
    pred = model.predict(test)
    pred = pred.astype(int)
    print(" Rounded predictions:\n", pred)

コード例 #27

0

ファイルを表示

ファイル: helper.py プロジェクト: lynkeib/WebProjects

def DR_Temp_data_cleaning(dataframe):
    '''
        inplace change of the dataframe, for the structure purpose, return this dataframe
    '''
    dataframe['Date'] = pd.to_datetime(dataframe['Date'])
    test = dataframe[[
        'Date', 'Hour', 'Weekday', 'Month', 'Load', 'Mean_Temp', 'Mean_Humi',
        'RIV_Temp', 'RIV_Humi', 'LAX_Temp', 'LAX_Humi', 'USC_Temp', 'USC_Humi',
        'WJF_Temp', 'WJF_Humi', 'TRM_Temp', 'TRM_Humi'
    ]]

    test.loc[:, 'RIV_Temp_Log'] = np.log(dataframe['RIV_Temp'])
    test.loc[:, 'LAX_Temp_Log'] = np.log(dataframe['LAX_Temp'])
    test.loc[:, 'USC_Temp_Log'] = np.log(dataframe['USC_Temp'])
    test.loc[:, 'WJF_Temp_Log'] = np.log(dataframe['WJF_Temp'])
    test.loc[:, 'TRM_Temp_Log'] = np.log(dataframe['TRM_Temp'])
    test.loc[:, 'Mean_Temp_Log'] = np.log(dataframe['TRM_Temp'])

    test.loc[:, 'Load_Log'] = np.log(dataframe['Load'])
    test['Load_Lag_48'] = test['Load_Log'].shift(48, axis=0)
    test['Humi_Lag_48'] = test['Mean_Humi'].shift(48, axis=0)

    test['RIV_Temp_Log_Lag_48'] = test['RIV_Temp_Log'].shift(48, axis=0)
    test['LAX_Temp_Log_Lag_48'] = test['LAX_Temp_Log'].shift(48, axis=0)
    test['USC_Temp_Log_Lag_48'] = test['USC_Temp_Log'].shift(48, axis=0)
    test['WJF_Temp_Log_Lag_48'] = test['WJF_Temp_Log'].shift(48, axis=0)
    test['TRM_Temp_Log_Lag_48'] = test['TRM_Temp_Log'].shift(48, axis=0)
    test['Mean_Temp_Log_Lag_48'] = test['Mean_Temp_Log'].shift(48, axis=0)

    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2014-01-01',
                            end=str(datetime.datetime.now()),
                            return_name=True)
    holidays = pd.DataFrame(holidays)
    holidays = holidays.reset_index()
    holidays = holidays.rename(columns={'index': "Date", 0: 'Holiday'})
    holidays['Date'] = pd.to_datetime(holidays['Date'])
    test['Date'] = pd.to_datetime(test['Date'])
    lm_data = test.loc[49:len(test), ].merge(holidays, how='left', on='Date')
    lm_data['Holiday'] = lm_data['Holiday'].fillna("Not Holiday")
    lm_data[["Hour", "Weekday", "Month",
             "Holiday"]] = lm_data[["Hour", "Weekday", "Month",
                                    "Holiday"]].astype('category')
    DateTime = pd.DataFrame(
        lm_data.apply(lambda line: pd.to_datetime(line['Date']) + datetime.
                      timedelta(hours=line['Hour']),
                      axis=1))
    DateTime.columns = ['DateTime']
    lm_data = pd.concat([DateTime, lm_data], axis=1)
    lm_data.set_index('DateTime', inplace=True)

    return lm_data

コード例 #28

0

ファイルを表示

ファイル: trend_finder.py プロジェクト: cduggan1708/stocks

def getStartDate():
    # get 200 business days since yesterday but leave room in case this is run on weekend
    yesterday = date.today() - timedelta(days=1)
    start_date = yesterday - BDay(202)

    # find the number of holidays between yesterday and 200 days ago and subtract that to get real start date
    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(start_date, yesterday)

    start_date = start_date - BDay(len(holidays.tolist()))

    s = pd.Series(start_date)
    return s.map(pd.Timestamp.date)[0]

コード例 #29

0

ファイルを表示

ファイル: process.py プロジェクト: transitmatters/mbta-covid-recovery-dash

def format_subway_data(path_to_csv_file: str):
    # read data, convert to datetime
    df = pd.read_csv(path_to_csv_file)
    df["servicedate"] = pd.to_datetime(df["servicedate"])

    # add holidays
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=df["servicedate"].min(),
                            end=df["servicedate"].max())

    # mark as holiday and weekday
    df["holiday"] = df["servicedate"].dt.date.astype("datetime64").isin(
        holidays.date)
    df["weekday"] = df["servicedate"].dt.dayofweek

    # define peak, mark weekdays, convert service date back
    conditions = [(df["holiday"] == False) & (df["weekday"] < 5)]
    choices = ["peak"]
    df["peak"] = np.select(conditions, choices, default="offpeak")
    df["week"] = df["servicedate"].dt.isocalendar().week
    df["year"] = df["servicedate"].dt.isocalendar().year
    df["servicedate"] = df["servicedate"].dt.date.astype(str)

    # select date of the week
    dates = df[df["weekday"] == 0]
    dates = dates[["servicedate", "week", "year"]].drop_duplicates()

    # limit data to just peak, merge back dates
    final = df[df["peak"] == "peak"]
    final = final.groupby(["year", "week", "route_or_line"
                           ])["sum"].mean().round().reset_index()

    final = final.merge(dates, on=["week", "year"], how="left")

    # get list of bus routes
    routelist = list(set(final["route_or_line"].tolist()))

    # create dict
    output = {}

    # write out each set of routes to dict
    for route in routelist:
        dftemp = final[final["route_or_line"] == route]
        dictdata = (dftemp[["servicedate", "sum"]].rename(columns={
            "servicedate": "date",
            "sum": "riders"
        }).to_dict(orient="records"))
        output[route] = dictdata

    return output

コード例 #30

0

ファイルを表示

ファイル: Finance.py プロジェクト: amtrek/stock-event-repo

def validate ( date ):
  
    cal = USFederalHolidayCalendar() 
    holidays = cal.holidays(date) # check recent holidays

    adjusted = False

    if date in holidays:
        #create offset, where offset replace BDay as bussiness day offset to account for holiday
        offset = CustomBusinessDay(calendar=cal)
        date+=offset
        adjusted = True

    return date, adjusted

コード例 #31

0

ファイルを表示

def add_time_windows(df):
    """Adds time windows as target variable, based on delivery date and time.

    Calculates number of business days (excl weekends, shipment date, federal holidays) for delivery.
    Adds delivery time to number of business days to get time-in-transit.
    Creates window thresholds based on discussion with Jose.
    Lastly assign time windows to shipments based on time-in-transit.

    Args:
        df (pandas dataframe obj): Pandas dataframe that must contain shipment_date, delivery_date, delivery_time.

    Returns:
        pandas dataframe obj: Pandas dataframe with new columns days_in_transit, days_taken_float (time-in-transit),
            Y (target variable i.e. time window)

    """
    print("Adding time windows i.e. target variable...")
    print(f"Starting with {report_df_stats(df)}")
    start_time = time.time()
    # Calculate days in transit (exclude shipment date, holidays, weekends)
    start_date = df['shipment_date'].min()
    end_date = df['shipment_date'].max()
    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(start_date, end_date).date.tolist()
    shipment_dates = [d.date() for d in df['shipment_date']]
    delivery_dates = [d.date() for d in df['delivery_date']]
    # -1 because we will add transit time
    df['days_in_transit'] = np.busday_count(
        shipment_dates, delivery_dates, holidays=holidays) - 1
    # Convert days in transit/delivery time to days taken (with decimals))
    # e.g. if parcel reaches at 12.00pm on 2nd business day, days taken is 1.5
    delivery_percentage_of_day = [
        (timedelta.total_seconds(d) / timedelta(days=1).total_seconds())
        for d in df['delivery_time']
    ]
    df['days_taken_float'] = df['days_in_transit'] + delivery_percentage_of_day
    # Keep rows from -1 to 5 days in transit. The rest are rare occurrences.
    max_days_to_keep = 5
    df = df[df['days_in_transit'].isin(np.arange(-1, max_days_to_keep))]

    # Assign time windows
    time_window_thresholds = create_time_window_thresholds()
    tqdm.pandas(desc="Assign time window")
    df['Y'] = df.progress_apply(lambda x: assign_time_window(
        x['days_taken_float'], time_window_thresholds),
                                axis=1)

    print(f"Ending with {report_df_stats(df)}.")
    utilities.print_elapsed_time(start_time)
    return df

コード例 #32

0

ファイルを表示

ファイル: builder.py プロジェクト: yihengli/chitaxi

 def assign_workdays(self, data, start, end, speed=False):
     cal = USFederalHolidayCalendar()
     holidays = cal.holidays(start=start, end=end)
     if speed:
         num_process = mp.cpu_count()
         chunk_size = int(data.shape[0] / num_process)
         chunks = [data.loc[data.index[i: i + chunk_size]]
                   for i in range(0, data.shape[0], chunk_size)]
         pools = mp.Pool(num_process)
         res = pools.map(
             partial(self._workday_process, holidays=holidays), chunks)
         return pd.concat(res)
     else:
         return self._workday_process(data, holidays)

コード例 #33

0

ファイルを表示

def validate(date):

    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(date)  # check recent holidays

    adjusted = False

    if date in holidays:
        #create offset, where offset replace BDay as bussiness day offset to account for holiday
        offset = CustomBusinessDay(calendar=cal)
        date += offset
        adjusted = True

    return date, adjusted

コード例 #34

0

ファイルを表示

ファイル: schedule_it.py プロジェクト: bgithub1/scheduleit

 def __init__(self,hour_to_wait_for,avoid_holidays=None,weekdays_to_avoid=None,logfile_path=None,logging_level=None):
     pass
     self.logger = init_root_logger(
                                            'logfile.log' if logfile_path is None else logfile_path,
                                            'INFO' if logging_level is None else logging_level)
     
     self.hour_to_wait_for = hour_to_wait_for
     wta = '5,6' if weekdays_to_avoid is None else weekdays_to_avoid
     self.days_to_go_back_to_sleep = [int(s) for s in wta.split(',')]
     self.avoid_holidays = True if avoid_holidays is None else avoid_holidays.lower()=='true'
     
     # get all of the holdidays in the current year
     cal = USFederalHolidayCalendar()
     y1 = dt.datetime.now().year                                                    
     y2= y1+1
     self.holidays = cal.holidays(start=str(y1)+'-01-01', end=str(y2)+'-01-01').to_pydatetime()

コード例 #35

0

ファイルを表示

def _fix_new_dates(df):
    """
        Year, month, hour, weekday: Come from the index
        holiday: Comes from a Pandas module called "USFederalHolidayCalendar"
        working day: True if is not a holiday and it is between monday to friday
        season, weathersit, temp, atemp, hum, windspeed: take the same record from an hour ago
        casual, registered: Mean of the samples that correspond to the same hour and week day
        cnt: sum of casual and registered
    """
    ### Get holidays in the specific period of time
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start='2011-01-01',
                            end='2012-12-31').to_pydatetime()

    nan_index = df[df.dteday.isna()].index

    for index in nan_index:
        df.loc[index,
               ['yr', 'mnth', 'hr']] = (0 if int(index.year) == 2011 else 1,
                                        index.month, index.hour)
        df.loc[index, 'weekday'] = index.dayofweek
        df.loc[index, 'workingday'] = int(index.dayofweek in [0, 1, 2, 3, 4]
                                          and index not in holidays)
        df.loc[index, 'holiday'] = int(index in holidays)

        sub_index = index

        while np.isnan(df.loc[sub_index, 'instant']):
            sub_index = sub_index - pd.Timedelta(1, unit='hr')
            if not np.isnan(df.loc[sub_index, 'instant']):
                df.loc[index, [
                    'weathersit', 'season', 'temp', 'atemp', 'hum', 'windspeed'
                ]] = df.loc[sub_index, [
                    'weathersit', 'season', 'temp', 'atemp', 'hum', 'windspeed'
                ]]

        df.loc[index, 'casual'] = round(
            df[(df['hr'] == df.loc[index, 'hr'])
               & (df['weekday'] == df.loc[index, 'weekday'])]['casual'].mean())
        df.loc[index, 'registered'] = round(
            df[(df['hr'] == df.loc[index, 'hr'])
               & (df['weekday'] == df.loc[index,
                                          'weekday'])]['registered'].mean())
        df.loc[index,
               'cnt'] = df.loc[index, 'casual'] + df.loc[index, 'registered']

    return df

コード例 #36

0

ファイルを表示

def solar_calcs(hourly_ac_list, tariff):
	last_day = {1:31, 2:28, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31}
	hour = 0
	month = 1
	day = 1
	index = 0
	total_ac_output = 0
	annual_kwh_solar_revenue = 0
	season = "winter"
	cal = USFederalHolidayCalendar()
	holidays = cal.holidays(start=str(YEAR) + '-01-01', end=str(YEAR) + '-12-31').to_pydatetime()
	for i in range(len(hourly_ac_list)):
		total_ac_output += hourly_ac_list[i]
		date = datetime.datetime(YEAR, month, day, 0, 0)
		if date == tariff.seasons['Summer Start']:
			season = "summer"
		if date == tariff.seasons['Winter Start']:
			season = "winter"
		if date.weekday() == 5 or date.weekday() == 6 or date in holidays:
			if season == "winter":
				rate = tariff.rates['kWh Off Peak (winter)']
			if season == "summer":
				rate = tariff.rates['kWh Off Peak (summer)']
		else:
			if season == "winter":
				if hour < int(tariff.hours['Off Peak Morning Start (winter)']) or hour >= int(tariff.hours['Off Peak Evening End (winter)']):
					rate = tariff.rates['kWh Off Peak (winter)']
				elif hour >= int(tariff.hours['Peak Start (winter)']) and hour < int(tariff.hours['Peak End (winter)']):
					rate = tariff.rates['kWh Peak (winter)']
				else:
					rate = tariff.rates['kWh Part Peak (winter)']
			else:
				if hour < int(tariff.hours['Off Peak Morning Start (summer)']) or hour >= int(tariff.hours['Off Peak Evening End (summer)']):
					rate = tariff.rates['kWh Off Peak (summer)']
				elif hour >= int(tariff.hours['Peak Start (summer)']) and hour < int(tariff.hours['Peak End (summer)']):
					rate = tariff.rates['kWh Peak (summer)']
				else:
					rate = tariff.rates['kWh Part Peak (summer)']
		annual_kwh_solar_revenue += float(rate) * hourly_ac_list[i]
		hour += 1
		if hour == 24:
			day += 1
			if day == last_day[month] + 1:
				month += 1
				day = 1
			hour = 0
	return total_ac_output/1000, annual_kwh_solar_revenue/1000

コード例 #37

0

ファイルを表示

def create_date_features(df, start_date, end_date):
    dt_format = '%Y-%m-%dT%H:%M:%S.%f'
    # Get the list of US federal hollidays
    cal = USFederalHolidayCalendar()
    us_holidays = cal.holidays(start=start_date, end=end_date).to_pydatetime()
    
    date_feature_names = ["tpep_dropoff_datetime", "tpep_pickup_datetime"]
    date_feature_to_datetimes = {"tpep_dropoff_datetime" : [],
                                 "tpep_pickup_datetime" : []}
    for feature_name in date_feature_names:
        y, mo, d = [], [], [] # Year, Month, Day
        h, mi, s = [], [], [] # Hours, Minutes, Seconds
        day_of_week = [] # Day of the Week
        is_holiday = [] # Is the date a holiday?
        for time in df[feature_name]:
            # Extract the datetime object from the timestamp
            dt = datetime.strptime(time, dt_format)
            date_feature_to_datetimes[feature_name].append(dt)
            # Add the Year/Month/Day
            y.append(dt.year)
            mo.append(dt.month)
            d.append(dt.day)
            # Add Hour/Minute/Second
            h.append(dt.hour)
            mi.append(dt.minute)
            s.append(dt.second)
            # Add Day of the Week
            day_of_week.append(dt.weekday())
            # Add is_holiday
            y_m_d = datetime(dt.year, dt.month, dt.day)
            is_holiday.append(1 if y_m_d in us_holidays else 0)
        df[feature_name + "_years"] = y
        df[feature_name + "_months"] = mo
        df[feature_name + "_days"] = d
        df[feature_name + "_hours"] = h
        df[feature_name + "_minutes"] = mi
        df[feature_name + "_seconds"] = s
        df[feature_name + "_day_of_week"] = day_of_week
        df[feature_name + "_is_holiday"] = is_holiday
    durations = []
    for dropoff_time, pickup_time in zip(date_feature_to_datetimes["tpep_dropoff_datetime"],
                                         date_feature_to_datetimes["tpep_pickup_datetime"]):
        duration = (dropoff_time - pickup_time).total_seconds() / 60.0
        durations.append(round(duration))
    
    df["duration"] = durations
    return df

コード例 #38

0

ファイルを表示

ファイル: test_calendar.py プロジェクト: Itay4/pandas

def test_calendar(transform):
    start_date = datetime(2012, 1, 1)
    end_date = datetime(2012, 12, 31)

    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(transform(start_date), transform(end_date))

    expected = [
        datetime(2012, 1, 2),
        datetime(2012, 1, 16),
        datetime(2012, 2, 20),
        datetime(2012, 5, 28),
        datetime(2012, 7, 4),
        datetime(2012, 9, 3),
        datetime(2012, 10, 8),
        datetime(2012, 11, 12),
        datetime(2012, 11, 22),
        datetime(2012, 12, 25)
    ]

    assert list(holidays.to_pydatetime()) == expected

コード例 #39

0

ファイルを表示

ファイル: config.py プロジェクト: schwallie2/alive_rescue

def get_first_bday_of_month(mnth=None, yr=None):
    '''
    Return the first business day of the current month if no variables provided
    Return the first business day of the month and year provided if variables provided

    Tests:
    In [188]: config.get_first_bday_of_month(12,2015)
    Out[188]: datetime.date(2015, 12, 1)

    In [189]: config.get_first_bday_of_month(11,2015)
    Out[189]: datetime.date(2015, 11, 2)

    In [190]: config.get_first_bday_of_month(10,2015)
    Out[190]: datetime.date(2015, 10, 1)

    In [191]: config.get_first_bday_of_month(1,2016)
    Out[191]: datetime.date(2016, 1, 4)

    In [192]: config.get_first_bday_of_month(8,2015)
    Out[192]: datetime.date(2015, 8, 3)
    :param mnth:
    :param yr:
    :return:
    '''
    from calendar import monthrange
    from pandas.tseries.holiday import USFederalHolidayCalendar
    from pandas.tseries.offsets import CustomBusinessDay
    if yr is None or mnth is None:
        yr = pd.datetime.now().year if pd.datetime.now().month != 1 else pd.datetime.now().year - 1
        mnth = pd.datetime.now().month - 1 if pd.datetime.now().month != 1 else 12
    else:
        yr = yr if mnth != 1 else yr - 1
        mnth = mnth - 1 if mnth != 1 else 12
    end_last = monthrange(yr, mnth)
    end_last = pd.Timestamp('%s/%s/%s' % (mnth, end_last[1], yr)).date()
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=end_last - pd.tseries.offsets.Day(60),
                            end=end_last + pd.tseries.offsets.Day(60)).to_pydatetime()
    bday_cus = CustomBusinessDay(holidays=holidays)
    return (end_last + bday_cus).date()

コード例 #40

0

ファイルを表示

ファイル: create_classifier.py プロジェクト: gnatman/TapPy

        days_since_school_year_start = (date_of_interest - beginning_range).days
    #for 2015/2016 School year
    beginning_range = datetime.date(2015, 8, 31)
    end_range = datetime.date(2016, 8, 29)
    if beginning_range <= date_of_interest <= end_range:
        days_since_school_year_start = (date_of_interest - beginning_range).days
    datekey_list.append(weather_datekey)
    days_since_school_year_start_list.append(days_since_school_year_start)
days_since_school_year_start_db = {'days_since_school_year_start' : pd.Series(days_since_school_year_start_list, index=datekey_list)}
days_since_school_year_start_df = pd.DataFrame(days_since_school_year_start_db)



#Account for major US federal holidays
cal = USFederalHolidayCalendar()
holidays = cal.holidays(start='2013-01-01', end='2016-12-31').to_pydatetime()
datekey_list = []
holidays_list = []
#for weather_datekey in weather_df.index.values:
for weather_datekey in weather_df.datekey:
    year = int(str(weather_datekey)[:4])
    month = int(str(weather_datekey)[4:6])
    day = int(str(weather_datekey)[6:])
    date_of_interest = datetime.date(year, month, day)
    if date_of_interest in holidays:
        holidays_list.append(1)
    else:
        holidays_list.append(0)
    datekey_list.append(weather_datekey)
holidays_db = {'holidays' : pd.Series(holidays_list, index=datekey_list)}
holidays_df = pd.DataFrame(holidays_db)

コード例 #41

0

ファイルを表示

ファイル: divvy_data_preprocessing.py プロジェクト: mgiangreco/divvy

#add a dummy variable for weekend day
merged_master['weekend_day'] = np.where((merged_master['day_of_week']==5)|(merged_master['day_of_week']==6),1,0)

#drop day_of_week field
merged_master.drop('day_of_week', axis=1, inplace=True)

#add a column for month and create month dummies
merged_master['month'] = merged_master.index.month
dummy_df = pd.get_dummies(merged_master['month'], prefix='month')
merged_master = pd.concat([merged_master,dummy_df], axis=1)
merged_master.drop('month', axis=1, inplace=True)

#add a column for holiday
merged_master['date'] = merged_master.index.date
merged_master['date'] = pd.to_datetime(merged_master['date'])
cal = USFederalHolidayCalendar()
holidays = cal.holidays(start=merged_master.date.min(), end=merged_master.date.max())
merged_master['holiday'] = merged_master['date'].isin(holidays).astype(int)
merged_master.drop('date', axis=1, inplace=True)

#add a column for days since first date in dataset
merged_master['days_since_first_date'] = (merged_master.index.date - (merged_master.index.date.min())).astype('timedelta64[D]').astype(int)

#Drop any rows with null values
merged_master.dropna(axis=0, inplace=True)

#rearrange column order
merged_master['station_id'] = merged_master['from_station_id']
merged_master.drop(['from_station_id'], axis=1, inplace=True)

コード例 #42

0

ファイルを表示

ファイル: matplotlib_funcs.py プロジェクト: tesskbot/moveloot_public

def make_regression_features_oct(df):
    '''Create features for regression given a dataframe
    '''

    from pandas.tseries.holiday import USFederalHolidayCalendar

    x_dayssincestart = df.dayssincestart

    x_dayofweek = df.dayofweek
    x_dayofweek = pd.get_dummies(x_dayofweek,prefix='dayofweek')

    x_dayofmonth = df.dayofmonth
    x_dayofmonth = pd.get_dummies(x_dayofmonth, prefix='dayofmonth')

    x_week1 = pd.Series([1 if day < 8 else 0 for day in df.dayofmonth],name='week1')
    x_week2 = pd.Series([1 if (day >= 8 and day < 16) else 0 for day in df.dayofmonth],name='week2')
    x_week3 = pd.Series([1 if (day >= 16 and day < 23) else 0 for day in df.dayofmonth],name='week3')
    x_week4 = pd.Series([1 if day >= 23 else 0 for day in df.dayofmonth],name='week4')

    x_isweekend = pd.Series([1 if (day == 5 or day == 6) else 0 for day in df.dayofweek],name='isweekend')

    x_istueswed = pd.Series([1 if (day == 1 or day == 2) else 0 for day in df.dayofweek],name='istueswed')

    x_isfrisat = pd.Series([1 if (day == 4 or day == 5) else 0 for day in df.dayofweek],name='isfrisat')

    #get holidays
    calendar = USFederalHolidayCalendar()
    holidays = calendar.holidays(start=df.datetime.min(), end=df.datetime.max())
    x_isholiday = pd.Series([1 if day in holidays.tolist() else 0 for day in df.datetime],name='x_isholiday')

    #find 3-day weekends
    x_isholidaywknd = pd.Series(np.zeros(len(x_isholiday)),name='x_isholidaywknd')
    x_weekendandholiday = x_isweekend + x_isholiday
    for i in range(1, len(x_weekendandholiday)-2):
        if x_weekendandholiday[i] == 1:
            if x_weekendandholiday[i+1] == 1 and x_weekendandholiday[i+2] ==1:
                x_isholidaywknd[i] = 1
                x_isholidaywknd[i+1] = 1
                x_isholidaywknd[i+2] = 1

    x_week1tueswed = x_week1 & x_istueswed
    x_week2tueswed = x_week2 & x_istueswed
    x_week3tueswed = x_week3 & x_istueswed
    x_week4tueswed = x_week4 & x_istueswed

    x_week1frisat = x_week1 & x_isfrisat
    x_week2frisat = x_week2 & x_isfrisat
    x_week3frisat = x_week3 & x_isfrisat
    x_week4frisat = x_week4 & x_isfrisat

    x_week1wknd = x_week1 & x_isweekend
    x_week2wknd = x_week2 & x_isweekend
    x_week3wknd = x_week3 & x_isweekend
    x_week4wknd = x_week4 & x_isweekend

    X_vars = pd.concat([x_dayssincestart,x_dayofweek,x_dayofmonth,
                       x_week1,x_week2,x_week3,x_week4,
                       x_isweekend,x_istueswed,x_isfrisat,
                       x_isholiday,x_isholidaywknd,
                       x_week1tueswed,x_week2tueswed,x_week3tueswed,x_week4tueswed,
                       x_week1frisat,x_week2frisat,x_week3frisat,x_week4frisat,
                       x_week1wknd,x_week2wknd,x_week3wknd,x_week4wknd],
                       axis=1)

    return X_vars