def __date_control_quarter(x): dates = x.dropna().index.to_series() start = dates.diff() < offset.Day(95) end = dates.shift(-1) - dates < offset.Day(95) start = dates[~start].values end = dates[~end].values start = pd.PeriodIndex(start, freq='Q') end = pd.PeriodIndex(end, freq='Q') return list(zip(start, end))
def test_get_freq_code(self): # freqstr self.assertEqual(frequencies.get_freq_code('A'), (frequencies.get_freq('A'), 1)) self.assertEqual(frequencies.get_freq_code('3D'), (frequencies.get_freq('D'), 3)) self.assertEqual(frequencies.get_freq_code('-2M'), (frequencies.get_freq('M'), -2)) # tuple self.assertEqual(frequencies.get_freq_code(('D', 1)), (frequencies.get_freq('D'), 1)) self.assertEqual(frequencies.get_freq_code(('A', 3)), (frequencies.get_freq('A'), 3)) self.assertEqual(frequencies.get_freq_code(('M', -2)), (frequencies.get_freq('M'), -2)) # numeric tuple self.assertEqual(frequencies.get_freq_code((1000, 1)), (1000, 1)) # offsets self.assertEqual(frequencies.get_freq_code(offsets.Day()), (frequencies.get_freq('D'), 1)) self.assertEqual(frequencies.get_freq_code(offsets.Day(3)), (frequencies.get_freq('D'), 3)) self.assertEqual(frequencies.get_freq_code(offsets.Day(-2)), (frequencies.get_freq('D'), -2)) self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd()), (frequencies.get_freq('M'), 1)) self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(3)), (frequencies.get_freq('M'), 3)) self.assertEqual(frequencies.get_freq_code(offsets.MonthEnd(-2)), (frequencies.get_freq('M'), -2)) self.assertEqual(frequencies.get_freq_code(offsets.Week()), (frequencies.get_freq('W'), 1)) self.assertEqual(frequencies.get_freq_code(offsets.Week(3)), (frequencies.get_freq('W'), 3)) self.assertEqual(frequencies.get_freq_code(offsets.Week(-2)), (frequencies.get_freq('W'), -2)) # monday is weekday=0 self.assertEqual(frequencies.get_freq_code(offsets.Week(weekday=1)), (frequencies.get_freq('W-TUE'), 1)) self.assertEqual(frequencies.get_freq_code(offsets.Week(3, weekday=0)), (frequencies.get_freq('W-MON'), 3)) self.assertEqual( frequencies.get_freq_code(offsets.Week(-2, weekday=4)), (frequencies.get_freq('W-FRI'), -2))
def test_get_freq_code(self): # frequency str assert (frequencies.get_freq_code('A') == (frequencies.get_freq('A'), 1)) assert (frequencies.get_freq_code('3D') == (frequencies.get_freq('D'), 3)) assert (frequencies.get_freq_code('-2M') == (frequencies.get_freq('M'), -2)) # tuple assert (frequencies.get_freq_code( ('D', 1)) == (frequencies.get_freq('D'), 1)) assert (frequencies.get_freq_code( ('A', 3)) == (frequencies.get_freq('A'), 3)) assert (frequencies.get_freq_code( ('M', -2)) == (frequencies.get_freq('M'), -2)) # numeric tuple assert frequencies.get_freq_code((1000, 1)) == (1000, 1) # offsets assert (frequencies.get_freq_code( offsets.Day()) == (frequencies.get_freq('D'), 1)) assert (frequencies.get_freq_code( offsets.Day(3)) == (frequencies.get_freq('D'), 3)) assert (frequencies.get_freq_code( offsets.Day(-2)) == (frequencies.get_freq('D'), -2)) assert (frequencies.get_freq_code( offsets.MonthEnd()) == (frequencies.get_freq('M'), 1)) assert (frequencies.get_freq_code( offsets.MonthEnd(3)) == (frequencies.get_freq('M'), 3)) assert (frequencies.get_freq_code( offsets.MonthEnd(-2)) == (frequencies.get_freq('M'), -2)) assert (frequencies.get_freq_code( offsets.Week()) == (frequencies.get_freq('W'), 1)) assert (frequencies.get_freq_code( offsets.Week(3)) == (frequencies.get_freq('W'), 3)) assert (frequencies.get_freq_code( offsets.Week(-2)) == (frequencies.get_freq('W'), -2)) # Monday is weekday=0 assert (frequencies.get_freq_code( offsets.Week(weekday=1)) == (frequencies.get_freq('W-TUE'), 1)) assert (frequencies.get_freq_code(offsets.Week( 3, weekday=0)) == (frequencies.get_freq('W-MON'), 3)) assert (frequencies.get_freq_code(offsets.Week( -2, weekday=4)) == (frequencies.get_freq('W-FRI'), -2))
def test_construct_timestamp_preserve_original_frequency(self): # GH 22311 with tm.assert_produces_warning(FutureWarning, match="The 'freq' argument"): result = Timestamp(Timestamp("2010-08-08", freq="D")).freq expected = offsets.Day() assert result == expected
def test_with_local_timezone_pytz(self): # see gh-5430 local_timezone = pytz.timezone('America/Los_Angeles') start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc) # 1 day later end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc) index = pd.date_range(start, end, freq='H') series = Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample('D', kind='period').mean() # Create the expected series # Index is moved back a day with the timezone conversion from UTC to # Pacific expected_index = (pd.period_range(start=start, end=end, freq='D') - offsets.Day()) expected = Series(1, index=expected_index) assert_series_equal(result, expected)
def regress_em(df): ret_list = [] predict_start = pd.to_datetime("2017-03-18", infer_datetime_format='%Y-%m-%d') predict_end = pd.to_datetime("2017-04-22", infer_datetime_format='%Y-%m-%d') train_end = predict_start - offsets.Day(1) quarter_start = train_end - offsets.Week(13) year_start = train_end - offsets.Week(52) predict_df = take_df_by_period(df, predict_start, predict_end) quarter_df = take_df_by_period(df, quarter_start, train_end).dropna(axis=0, subset=["visitors_nan"]) year_df = take_df_by_period(df, year_start, train_end).dropna(axis=0, subset=["visitors_nan"]) if predict_df.empty or quarter_df.empty: return ret_list li1 = linear_model.LinearRegression() r1 = linear_model.Ridge(alpha=0.1) r2 = linear_model.Ridge(alpha=0.5) r3 = linear_model.Ridge(alpha=1.0) l1 = linear_model.Lasso(alpha=0.1) l2 = linear_model.Lasso(alpha=0.5) l3 = linear_model.Lasso(alpha=1.0) # h1 = linear_model.HuberRegressor() models = [li1, r1, r2, r3, l1, l2, l3] quarter_y_pred_list = do_regression(quarter_df, predict_df, models) year_y_pred_list = do_regression(year_df, predict_df, models) name_list = ["li1", "r1", "r2", "r3", "l1", "l2", "l3"] temp_df = pd.DataFrame(index=predict_df.index) for i in range(7): col_name = "q_regress_" + name_list[i] temp_df[col_name] = quarter_y_pred_list[i] col_name = "y_regress_" + name_list[i] temp_df[col_name] = year_y_pred_list[i] ret_list.append(temp_df) return ret_list
def range_datetime(datetime_start, datetime_end, timeskip=None): """Build datetime generator over successive time steps.""" if timeskip is None: timeskip = offsets.Day(1) while datetime_start <= datetime_end: yield datetime_start datetime_start += timeskip
def test_valid(self): df = self.regular # not a valid freq msg = "passed window foobar is not compatible with a datetimelike index" with pytest.raises(ValueError, match=msg): df.rolling(window="foobar") # not a datetimelike index msg = "window must be an integer" with pytest.raises(ValueError, match=msg): df.reset_index().rolling(window="foobar") # non-fixed freqs msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency" for freq in ["2MS", offsets.MonthBegin(2)]: with pytest.raises(ValueError, match=msg): df.rolling(window=freq) for freq in ["1D", offsets.Day(2), "2ms"]: df.rolling(window=freq) # non-integer min_periods msg = (r"local variable 'minp' referenced before assignment|" "min_periods must be an integer") for minp in [1.0, "foo", np.array([1, 2, 3])]: with pytest.raises(ValueError, match=msg): df.rolling(window="1D", min_periods=minp) # center is not implemented msg = "center is not implemented for datetimelike and offset based windows" with pytest.raises(NotImplementedError, match=msg): df.rolling(window="1D", center=True)
def _construct_bt_dt_index(self): """ constructs the t0 dates index that runs from t0 to T if the price series is longer than the weights series use the the l The function takes the weights index and prepends it either with the last available previous date from the price index or else prepends t1 with a timedelta """ dt_t0_tmp = self.price_date_index.copy() # where is first date first_weight_date = self.trading_dt_index[0] if dt_t0_tmp[0] < first_weight_date: # prices start before first weight date, bt index starts at date closest to weight date start # initialization date t0 is the date closest to the date of the first weight t1 initialization_date_index = dt_t0_tmp.get_loc(first_weight_date) - 1 dates_t0_index = dt_t0_tmp[initialization_date_index:] else: freq = self.frequency if freq == 'B': initialization_date = dt_t0_tmp[0] - time_offset.BDay(1) elif freq == 'D': initialization_date = dt_t0_tmp[0] - time_offset.Day(1) elif freq == 'min': initialization_date = dt_t0_tmp[0] - time_offset.Minute(1) elif freq == 'H': initialization_date = dt_t0_tmp[0] - time_offset.Hour(1) else: import pdb pdb.set_trace() assert freq == 'S' initialization_date = dt_t0_tmp[0] - time_offset.Second(1) # prepend index with "artificial" first datetime; interval chosen to match frequency of price index dates_t0_index = dt_t0_tmp.append(pd.DatetimeIndex([initialization_date])).sort_values() return dates_t0_index
def test_valid(self): df = self.regular # not a valid freq with pytest.raises(ValueError): df.rolling(window="foobar") # not a datetimelike index with pytest.raises(ValueError): df.reset_index().rolling(window="foobar") # non-fixed freqs for freq in ["2MS", offsets.MonthBegin(2)]: with pytest.raises(ValueError): df.rolling(window=freq) for freq in ["1D", offsets.Day(2), "2ms"]: df.rolling(window=freq) # non-integer min_periods for minp in [1.0, "foo", np.array([1, 2, 3])]: with pytest.raises(ValueError): df.rolling(window="1D", min_periods=minp) # center is not implemented with pytest.raises(NotImplementedError): df.rolling(window="1D", center=True)
def test_overflow_offset_raises(self): # xref https://github.com/statsmodels/statsmodels/issues/3374 # ends up multiplying really large numbers which overflow stamp = Timestamp("2017-01-13 00:00:00", freq="D") offset_overflow = 20169940 * offsets.Day(1) msg = ("the add operation between " r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " "will overflow") with pytest.raises(OverflowError, match=msg): stamp + offset_overflow with pytest.raises(OverflowError, match=msg): offset_overflow + stamp with pytest.raises(OverflowError, match=msg): stamp - offset_overflow # xref https://github.com/pandas-dev/pandas/issues/14080 # used to crash, so check for proper overflow exception stamp = Timestamp("2000/1/1") offset_overflow = to_offset("D") * 100**25 with pytest.raises(OverflowError, match=msg): stamp + offset_overflow with pytest.raises(OverflowError, match=msg): offset_overflow + stamp with pytest.raises(OverflowError, match=msg): stamp - offset_overflow
def get_dates_range(self, scale='auto', start=None, end=None, date_max='2010-01-01'): ''' Returns a list of dates sampled according to the specified parameters. :param scale: {'auto', 'maximum', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly'} Scale specifies the sampling intervals. 'auto' will heuristically choose a scale for quick processing :param start: First date that will be included. :param end: Last date that will be included ''' if scale not in [ 'auto', 'maximum', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly' ]: raise ValueError('Incorrect scale: %s' % scale) start = Timestamp(start or self._start.min() or date_max) # FIXME: start != start is true for NaN objects... is NaT the same? start = Timestamp(date_max) if repr(start) == 'NaT' else start end = Timestamp(end or max(Timestamp(self._end.max()), self._start.max())) # FIXME: end != end ? end = datetime.utcnow() if repr(end) == 'NaT' else end start = start if self.check_in_bounds(start) else self._lbound end = end if self.check_in_bounds(end) else self._rbound if scale == 'auto': scale = self._auto_select_scale(start, end) if scale == 'maximum': start_dts = list(self._start.dropna().values) end_dts = list(self._end.dropna().values) dts = map(Timestamp, set(start_dts + end_dts)) dts = filter( lambda ts: self.check_in_bounds(ts) and ts >= start and ts <= end, dts) return dts freq = dict(daily='D', weekly='W', monthly='M', quarterly='3M', yearly='12M') offset = dict(daily=off.Day(n=0), weekly=off.Week(), monthly=off.MonthEnd(), quarterly=off.QuarterEnd(), yearly=off.YearEnd()) # for some reason, weekly date range gives one week less: end_ = end + off.Week() if scale == 'weekly' else end ret = list(pd.date_range(start + offset[scale], end_, freq=freq[scale])) ret = [dt for dt in ret if dt <= end] ret = [start] + ret if ret and start < ret[0] else ret ret = ret + [end] if ret and end > ret[-1] else ret ret = filter(lambda ts: self.check_in_bounds(ts), ret) return ret
def test_get_rule_month(): result = frequencies._get_rule_month('W') assert (result == 'DEC') result = frequencies._get_rule_month(offsets.Week()) assert (result == 'DEC') result = frequencies._get_rule_month('D') assert (result == 'DEC') result = frequencies._get_rule_month(offsets.Day()) assert (result == 'DEC') result = frequencies._get_rule_month('Q') assert (result == 'DEC') result = frequencies._get_rule_month(offsets.QuarterEnd(startingMonth=12)) print(result == 'DEC') result = frequencies._get_rule_month('Q-JAN') assert (result == 'JAN') result = frequencies._get_rule_month(offsets.QuarterEnd(startingMonth=1)) assert (result == 'JAN') result = frequencies._get_rule_month('A-DEC') assert (result == 'DEC') result = frequencies._get_rule_month(offsets.YearEnd()) assert (result == 'DEC') result = frequencies._get_rule_month('A-MAY') assert (result == 'MAY') result = frequencies._get_rule_month(offsets.YearEnd(month=5)) assert (result == 'MAY')
def test_with_local_timezone_dateutil(self): # see gh-5430 local_timezone = "dateutil/America/Los_Angeles" start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) # 1 day later end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) index = pd.date_range(start, end, freq="H", name="idx") series = Series(1, index=index) series = series.tz_convert(local_timezone) result = series.resample("D", kind="period").mean() # Create the expected series # Index is moved back a day with the timezone conversion from UTC to # Pacific expected_index = ( pd.period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()) expected = Series(1, index=expected_index) assert_series_equal(result, expected)
def next_day(self): self.t = offsets.Day(1).apply(self.t) price = list(self.hist['Close'].loc[self.hist.index.values == np.datetime64(self.t)]) if len(price) > 0: self.stock_price_t = price[0] else: self.stock_price_t = 'None'
def test_delta_to_tick(): delta = timedelta(3) tick = delta_to_tick(delta) assert tick == offsets.Day(3) td = Timedelta(nanoseconds=5) tick = delta_to_tick(td) assert tick == Nano(5)
def test_overflow_offset(self): # xref https://github.com/statsmodels/statsmodels/issues/3374 # ends up multiplying really large numbers which overflow stamp = Timestamp('2017-01-13 00:00:00', freq='D') offset = 20169940 * offsets.Day(1) with pytest.raises(OverflowError): stamp + offset with pytest.raises(OverflowError): offset + stamp with pytest.raises(OverflowError): stamp - offset
def test_roll_date_object(self): offset = self._offset() dt = date(2012, 9, 15) result = offset.rollback(dt) assert result == datetime(2012, 9, 14) result = offset.rollforward(dt) assert result == datetime(2012, 9, 17) offset = offsets.Day() result = offset.rollback(dt) assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) assert result == datetime(2012, 9, 15)
def test_roll_date_object(self): offset = CBMonthEnd() dt = date(2012, 9, 15) result = offset.rollback(dt) assert result == datetime(2012, 8, 31) result = offset.rollforward(dt) assert result == datetime(2012, 9, 28) offset = offsets.Day() result = offset.rollback(dt) assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) assert result == datetime(2012, 9, 15)
def test_roll_date_object(self): offset = CBMonthBegin() dt = date(2012, 9, 15) result = offset.rollback(dt) assert result == datetime(2012, 9, 3) result = offset.rollforward(dt) assert result == datetime(2012, 10, 1) offset = offsets.Day() result = offset.rollback(dt) assert result == datetime(2012, 9, 15) result = offset.rollforward(dt) assert result == datetime(2012, 9, 15)
def data_move_test(): s = pd.Series(np.random.randn(6), index=pd.date_range('1/1/2019', periods=6, freq='M')) print('原数据 \r\n', s) # 单纯的前后移动(数据移动,产生缺失数据) print('数据往后移动 \r\n', s.shift(2)) print('数据往前移动 \r\n', s.shift(-2)) print('后移动 freg参数,根据频率移动,实际对时间戳进行位移而不是对数据进行位移 \r\n', s.shift(2, freq='M')) print('前移动 freg参数\r\n', s.shift(-2, freq='D')) now = datetime.today() print('datetim 今天:\r\n', now) print('datetim 偏移 3天\r\n', now + 3 * offset.Day()) print('datetim 偏移 到本月底\r\n', now + offset.MonthEnd()) print('datetim期偏移 第2月后的月底\r\n', now + offset.MonthEnd(2)) print('rollforward 向前滚到当月底 \r\n', offset.MonthEnd().rollforward(now)) print('rollforward 向后滚到上月底\r\n', offset.MonthEnd().rollback(now)) print('Series的时间戳 向前滚到月底\r\n', s.groupby(offset.MonthEnd().rollforward).count())
def test_range_datetime(): datetime_start = datetime.datetime(2019, 1, 15) datetime_end = datetime.datetime(2019, 1, 20) range_lst_no_offset = [ datetime.datetime(2019, 1, 15, 0, 0), datetime.datetime(2019, 1, 16, 0, 0), datetime.datetime(2019, 1, 17, 0, 0), datetime.datetime(2019, 1, 18, 0, 0), datetime.datetime(2019, 1, 19, 0, 0), datetime.datetime(2019, 1, 20, 0, 0), ] range_lst_w_offset = [ datetime.datetime(2019, 1, 15, 0, 0), datetime.datetime(2019, 1, 17, 0, 0), datetime.datetime(2019, 1, 19, 0, 0), ] drange = utils.range_datetime(datetime_start, datetime_end) assert range_lst_no_offset == list(drange) drange = utils.range_datetime(datetime_start, datetime_end, offsets.Day(2)) assert range_lst_w_offset == list(drange)
def test_to_offset_pd_timedelta(self): # Tests for #9064 td = Timedelta(days=1, seconds=1) result = frequencies.to_offset(td) expected = offsets.Second(86401) assert (expected == result) td = Timedelta(days=-1, seconds=1) result = frequencies.to_offset(td) expected = offsets.Second(-86399) assert (expected == result) td = Timedelta(hours=1, minutes=10) result = frequencies.to_offset(td) expected = offsets.Minute(70) assert (expected == result) td = Timedelta(hours=1, minutes=-10) result = frequencies.to_offset(td) expected = offsets.Minute(50) assert (expected == result) td = Timedelta(weeks=1) result = frequencies.to_offset(td) expected = offsets.Day(7) assert (expected == result) td1 = Timedelta(hours=1) result1 = frequencies.to_offset(td1) result2 = frequencies.to_offset('60min') assert (result1 == result2) td = Timedelta(microseconds=1) result = frequencies.to_offset(td) expected = offsets.Micro(1) assert (expected == result) td = Timedelta(microseconds=0) pytest.raises(ValueError, lambda: frequencies.to_offset(td))
# Frequency string. ("A", (get_freq("A"), 1)), ("3D", (get_freq("D"), 3)), ("-2M", (get_freq("M"), -2)), # Tuple. (("D", 1), (get_freq("D"), 1)), (("A", 3), (get_freq("A"), 3)), (("M", -2), (get_freq("M"), -2)), ((5, "T"), (FreqGroup.FR_MIN, 5)), # Numeric Tuple. ((1000, 1), (1000, 1)), # Offsets. (offsets.Day(), (get_freq("D"), 1)), (offsets.Day(3), (get_freq("D"), 3)), (offsets.Day(-2), (get_freq("D"), -2)), (offsets.MonthEnd(), (get_freq("M"), 1)), (offsets.MonthEnd(3), (get_freq("M"), 3)), (offsets.MonthEnd(-2), (get_freq("M"), -2)), (offsets.Week(), (get_freq("W"), 1)), (offsets.Week(3), (get_freq("W"), 3)), (offsets.Week(-2), (get_freq("W"), -2)), (offsets.Hour(), (FreqGroup.FR_HR, 1)), # Monday is weekday=0. (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)), (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)), (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)), ])
def test_construct_timestamp_preserve_original_frequency(self): # GH 22311 result = Timestamp(Timestamp("2010-08-08", freq="D")).freq expected = offsets.Day() assert result == expected
import lineNotify import annualData as ad import pandas.tseries.offsets as offsets # ロガー設定 logging.config.fileConfig('logging.conf') logger = logging.getLogger() # 明日の授業変更を持ってくる data_5e_tomorrow = pd.read_csv('tomorrow.csv') # 年間行事予定を取得する annual_data = ad.get_data() # 明日の日付を取得 tomorrow = (pd.datetime.today() + offsets.Day()).normalize() # 明日の行事予定を取得する annual_tomorrow = ad.search_for_date(annual_data, tomorrow) # メッセージを作る msgs = [] for index, d in data_5e_tomorrow.iterrows(): msgs.append(lessonData.create_tweet(d)) # print(msgs) for index, d in annual_tomorrow.iterrows(): msgs.append(ad.create_tweet(d)) # line Notifyにポスト ln = lineNotify.LineNotify()
price['s_rate'] = price['s_rate'].fillna(1) price['a_rate'] = 1.0 # yahoo の正確な計算式は不明。誤差が発生する銘柄もある。桁数だけの問題ではなさそう。 for i in reversed(range(len(price) - 1)): #price['a_rate'][i] = np.round(price['a_rate'][i + 1] / price['s_rate'][i + 1], 6) price['a_rate'][i] = price['a_rate'][i + 1] / price['s_rate'][i + 1] price['CalcClose'] = np.round(price['Close'] * price['a_rate'], 2) # In[ ]: # 分割実施前後の期間を表示 for date in info.index: print(date.date()) display(price[date + offsets.Day(-10):date + offsets.Day(10)]) # In[ ]: price # In[ ]: price.to_csv('calc_1491.csv') # In[ ]: price['2014-07-01':'2014-07-30'] # In[ ]:
import matplotlib.dates as mdates import pandas as pd import pandas.tseries.offsets as offsets import statsmodels.api as sm data_file = sys.argv[1] item_name = sys.argv[2] type = sys.argv[3] dest_file = sys.argv[4] season = int(sys.argv[5]) if len(sys.argv) > 5 else None df = pd.read_csv(data_file, parse_dates=['lastdate']) ds = df.groupby(['lastdate'])[item_name].sum().astype('float') r = sm.tsa.UnobservedComponents(ds, type, seasonal=season).fit() print(r.summary()) fig, ax = plt.subplots() plt.plot(ds) start_date = max(ds.index) plt.plot(r.predict(start_date, start_date + offsets.Day(350))) ax.xaxis.set_major_locator(mdates.YearLocator()) plt.savefig(dest_file)
def test_delta_to_tick(): delta = timedelta(3) tick = offsets._delta_to_tick(delta) assert (tick == offsets.Day(3))
def close_fluxnet(insitu_df, offset=15): """ This function applies the fluxnet methodology of closing EBC_CF Method 1 using a moving window of +/- 15 days Conservatively we set the default to include the +/- 15 day windows. Correction factors (closure ratios) outside of 1.5 x the 25th percentile and 75th percentiles are filtered This parameter can be changed to emulate EBC_CF Method 2 or EBCF Method 3 with minor changes to the time window See: https://fluxnet.fluxdata.org/data/fluxnet2015-dataset/data-processing/ INPUT: insitu_df | is a dataframe including columns # Rn (W/m2) labeled 'insitu_Rn'; G (W/m2) labeled 'insitu_GHF'; # LE (W/m2) labeled 'insitu_LE'; H (W/m2) labeled 'insitu_SHF' offset | is the moving window range in units of days OUTPUT: insitu_df | added columns for EBC_CF energy balance at 50th percentile, 25th percentile, and 75th percentile Parameters: insitu_df offset Returns: insitu_df """ try: flag_day_lim = [] insitu_cr_25 = [] insitu_cr_50 = [] insitu_cr_75 = [] delta = offsets.Day(offset) for t0 in insitu_df.index: ss_1 = insitu_df[t0 - delta:t0 + delta] hour = ss_1.index.hour selector = (22 < hour) | (hour < 3) | ((10 <= hour) & (hour < 15)) ss_2 = ss_1[selector] if (ss_2['insitu_GHF'].isna().sum() / len(ss_2.index)) > 0.2: cr_ss2 = ss_2.insitu_Rn / (ss_2.insitu_SHF + ss_2.insitu_LE) else: cr_ss2 = (ss_2.insitu_Rn - ss_2.insitu_GHF) / (ss_2.insitu_SHF + ss_2.insitu_LE) ds_sort = sorted(cr_ss2) q1, q3 = np.percentile(ds_sort, [25, 75]) iqr = q3 - q1 # computing thresholds for closure quality filtering lower_bound = q1 - (1.5 * iqr) upper_bound = q3 + (1.5 * iqr) # filtering data cr_ss2[cr_ss2 > upper_bound] = np.nan cr_ss2[cr_ss2 < lower_bound] = np.nan cr_ss2 = cr_ss2[~np.isnan(cr_ss2)] ds_sort2 = sorted(cr_ss2) # computing clorure ratios cr_q1, cr_q3 = np.percentile(ds_sort2, [25, 75]) cr_med = np.percentile(ds_sort2, [50])[0] insitu_cr_25.append(cr_q1) insitu_cr_50.append(cr_med) insitu_cr_75.append(cr_q3) inst_flag = len(cr_ss2) < 100 # less than 5 days of data points flag_day_lim.append(inst_flag) insitu_df['insitu_cr25'] = np.array(insitu_cr_25) insitu_df['insitu_cr50'] = np.array(insitu_cr_50) insitu_df['insitu_cr75'] = np.array(insitu_cr_75) EBC_lower_thresh = 0.5 # <-- These thresholds can be implemented on top of the FLUXNET2015 processing EBC_upper_thresh = 1.5 # <-- These thresholds can be implemented on top of the FLUXNET2015 processing insitu_df.loc[(insitu_df['insitu_cr50'] > EBC_upper_thresh or insitu_df['insitu_cr50'] < EBC_lower_thresh), 'insitu_cr50'] = np.nan insitu_df.loc[np.isnan(insitu_df.insitu_cr50), 'insitu_cr25'] = np.nan insitu_df.loc[np.isnan(insitu_df.insitu_cr50), 'insitu_cr75'] = np.nan # Correcting LE and SHF observations with closure ratios # filtering data for unrealistic fluxes less than 0 insitu_df[ 'insitu_LE_flux50'] = insitu_df.insitu_LE_raw * insitu_df.insitu_cr50 insitu_df.loc[insitu_df['insitu_LE_flux50'] < 0, 'insitu_LE_flux50'] = np.nan insitu_df[ 'insitu_LE_flux25'] = insitu_df.insitu_LE_raw * insitu_df.insitu_cr25 insitu_df.loc[insitu_df['insitu_LE_flux50'] < 0, 'insitu_LE_flux25'] = np.nan insitu_df[ 'insitu_LE_flux75'] = insitu_df.insitu_LE_raw * insitu_df.insitu_cr75 insitu_df.loc[insitu_df['insitu_LE_flux50'] < 0, 'insitu_LE_flux75'] = np.nan insitu_df[ 'insitu_SHF_flux50'] = insitu_df.insitu_SHF * insitu_df.insitu_cr50 insitu_df.loc[insitu_df['insitu_SHF_flux50'] < 0, 'insitu_SHF_flux_fc'] = np.nan insitu_df[ 'insitu_H_flux25'] = insitu_df.insitu_SHF * insitu_df.insitu_cr25 insitu_df.loc[insitu_df['insitu_SHF_flux50'] < 0, 'insitu_H_flux25'] = np.nan insitu_df[ 'insitu_H_flux75'] = insitu_df.insitu_SHF * insitu_df.insitu_cr75 insitu_df.loc[insitu_df['insitu_SHF_flux50'] < 0, 'insitu_H_flux75'] = np.nan # Creating a flag when there are less than 5 days of data insitu_df['ebc_1_flag'] = flag_day_lim except: # If the tower data cannot be closed due to missing data, we apply a range of artificial closure rates # These are only applied in the cases when net radiation or sensible heat flux observations are not available insitu_df['insitu_LE_1.1'] = insitu_df['insitu_LE_raw'].apply( lambda x: x * 1.1) insitu_df['insitu_LE_1.3'] = insitu_df['insitu_LE_raw'].apply( lambda x: x * 1.3) insitu_df['insitu_LE_1.5'] = insitu_df['insitu_LE_raw'].apply( lambda x: x * 1.5) return insitu_df