path = "elspot-prices_2019_hourly_eur.xls" df = datapreparation.read_ElspotPrices(path) df = datapreparation.removeDaylightSavings(df) # Get data market_area = "SE1" historic_prices = df[market_area] prediction_method = "SameHourLastWeek" # methods = ["Perfect", "SameHourLastWeek"] # Plant parameters E0 = 0 # Energy difference at t = t_start Pmax = 100 # Maximum power deviation in MW Emax = 48 * Pmax # Maximum energy deviation in MWh # Simulation parameters t_start = pd.Timestamp("2019-10-22 00:00:00") # Times are in UTC+0 t_end = pd.Timestamp( "2019-12-21 23:00:00") # Simulate two months of operation t_horizon = 7 * 24 # Time horizon for optimization (in h) results = runScenario( historic_prices, # Historic electricty prices E0, Emax, Pmax, # Plant parameters prediction_method, # Prediction settings t_start, t_end, t_horizon) # Simulation times) print(results)
def test_unstack_nan_index(self): # GH7466 cast = lambda val: '{0:1}'.format('' if val != val else val) nan = np.nan def verify(df): mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] rows, cols = df.notna().values.nonzero() for i, j in zip(rows, cols): left = sorted(df.iloc[i, j].split('.')) right = mk_list(df.index[i]) + mk_list(df.columns[j]) right = sorted(list(map(cast, right))) assert left == right df = DataFrame({ 'jim': ['a', 'b', nan, 'd'], 'joe': ['w', 'x', 'y', 'z'], 'jolie': ['a.w', 'b.x', ' .y', 'd.z'] }) left = df.set_index(['jim', 'joe']).unstack()['jolie'] right = df.set_index(['joe', 'jim']).unstack()['jolie'].T assert_frame_equal(left, right) for idx in itertools.permutations(df.columns[:2]): mi = df.set_index(list(idx)) for lev in range(2): udf = mi.unstack(level=lev) assert udf.notna().values.sum() == len(df) verify(udf['jolie']) df = DataFrame({ '1st': ['d'] * 3 + [nan] * 5 + ['a'] * 2 + ['c'] * 3 + ['e'] * 2 + ['b'] * 5, '2nd': ['y'] * 2 + ['w'] * 3 + [nan] * 3 + ['z'] * 4 + [nan] * 3 + ['x'] * 3 + [nan] * 2, '3rd': [ 67, 39, 53, 72, 57, 80, 31, 18, 11, 30, 59, 50, 62, 59, 76, 52, 14, 53, 60, 51 ] }) df['4th'], df['5th'] = \ df.apply(lambda r: '.'.join(map(cast, r)), axis=1), \ df.apply(lambda r: '.'.join(map(cast, r.iloc[::-1])), axis=1) for idx in itertools.permutations(['1st', '2nd', '3rd']): mi = df.set_index(list(idx)) for lev in range(3): udf = mi.unstack(level=lev) assert udf.notna().values.sum() == 2 * len(df) for col in ['4th', '5th']: verify(udf[col]) # GH7403 df = pd.DataFrame({ 'A': list('aaaabbbb'), 'B': range(8), 'C': range(8) }) df.iloc[3, 1] = np.NaN left = df.set_index(['A', 'B']).unstack(0) vals = [[3, 0, 1, 2, nan, nan, nan, nan], [nan, nan, nan, nan, 4, 5, 6, 7]] vals = list(map(list, zip(*vals))) idx = Index([nan, 0, 1, 2, 4, 5, 6, 7], name='B') cols = MultiIndex(levels=[['C'], ['a', 'b']], codes=[[0, 0], [0, 1]], names=[None, 'A']) right = DataFrame(vals, columns=cols, index=idx) assert_frame_equal(left, right) df = DataFrame({ 'A': list('aaaabbbb'), 'B': list(range(4)) * 2, 'C': range(8) }) df.iloc[2, 1] = np.NaN left = df.set_index(['A', 'B']).unstack(0) vals = [[2, nan], [0, 4], [1, 5], [nan, 6], [3, 7]] cols = MultiIndex(levels=[['C'], ['a', 'b']], codes=[[0, 0], [0, 1]], names=[None, 'A']) idx = Index([nan, 0, 1, 2, 3], name='B') right = DataFrame(vals, columns=cols, index=idx) assert_frame_equal(left, right) df = pd.DataFrame({ 'A': list('aaaabbbb'), 'B': list(range(4)) * 2, 'C': range(8) }) df.iloc[3, 1] = np.NaN left = df.set_index(['A', 'B']).unstack(0) vals = [[3, nan], [0, 4], [1, 5], [2, 6], [nan, 7]] cols = MultiIndex(levels=[['C'], ['a', 'b']], codes=[[0, 0], [0, 1]], names=[None, 'A']) idx = Index([nan, 0, 1, 2, 3], name='B') right = DataFrame(vals, columns=cols, index=idx) assert_frame_equal(left, right) # GH7401 df = pd.DataFrame({ 'A': list('aaaaabbbbb'), 'B': (date_range('2012-01-01', periods=5).tolist() * 2), 'C': np.arange(10) }) df.iloc[3, 1] = np.NaN left = df.set_index(['A', 'B']).unstack() vals = np.array([[3, 0, 1, 2, nan, 4], [nan, 5, 6, 7, 8, 9]]) idx = Index(['a', 'b'], name='A') cols = MultiIndex(levels=[['C'], date_range('2012-01-01', periods=5)], codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], names=[None, 'B']) right = DataFrame(vals, columns=cols, index=idx) assert_frame_equal(left, right) # GH4862 vals = [['Hg', nan, nan, 680585148], ['U', 0.0, nan, 680585148], ['Pb', 7.07e-06, nan, 680585148], ['Sn', 2.3614e-05, 0.0133, 680607017], ['Ag', 0.0, 0.0133, 680607017], ['Hg', -0.00015, 0.0133, 680607017]] df = DataFrame(vals, columns=['agent', 'change', 'dosage', 's_id'], index=[17263, 17264, 17265, 17266, 17267, 17268]) left = df.copy().set_index(['s_id', 'dosage', 'agent']).unstack() vals = [[nan, nan, 7.07e-06, nan, 0.0], [0.0, -0.00015, nan, 2.3614e-05, nan]] idx = MultiIndex(levels=[[680585148, 680607017], [0.0133]], codes=[[0, 1], [-1, 0]], names=['s_id', 'dosage']) cols = MultiIndex(levels=[['change'], ['Ag', 'Hg', 'Pb', 'Sn', 'U']], codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]], names=[None, 'agent']) right = DataFrame(vals, columns=cols, index=idx) assert_frame_equal(left, right) left = df.loc[17264:].copy().set_index(['s_id', 'dosage', 'agent']) assert_frame_equal(left.unstack(), right) # GH9497 - multiple unstack with nulls df = DataFrame({ '1st': [1, 2, 1, 2, 1, 2], '2nd': pd.date_range('2014-02-01', periods=6, freq='D'), 'jim': 100 + np.arange(6), 'joe': (np.random.randn(6) * 10).round(2) }) df['3rd'] = df['2nd'] - pd.Timestamp('2014-02-02') df.loc[1, '2nd'] = df.loc[3, '2nd'] = nan df.loc[1, '3rd'] = df.loc[4, '3rd'] = nan left = df.set_index(['1st', '2nd', '3rd']).unstack(['2nd', '3rd']) assert left.notna().values.sum() == 2 * len(df) for col in ['jim', 'joe']: for _, r in df.iterrows(): key = r['1st'], (col, r['2nd'], r['3rd']) assert r[col] == left.loc[key]
def test_real_apisession_get_forecast_values(real_session): fx = real_session.get_forecast_values( 'f8dd49fa-23e2-48a0-862b-ba0af6dec276', pd.Timestamp('2019-04-15T00:00:00Z'), pd.Timestamp('2019-04-15T12:00:00Z')) assert isinstance(fx, pd.Series)
"https":"https://10.42.32.29:8080"} e = entsoe.EntsoePandasClient(api_key=TOKEN, proxies = PROXY, retry_count=20, retry_delay=30) start_year = 2015 end_year = 2019 domains = ["BE","FR","ES","DE","PL","PT","CZ","GB","IT","CH","NL","HU","AT","SK"] quarterhour = ["BE","DE","AT","NL","HU"] halfhour = ["GB"] df_dic = {} for year in range(start_year, end_year): start = pd.Timestamp(year=year, month=1, day=1, tz='Europe/Brussels') end = pd.Timestamp(year=year+1, month=1, day=1, tz='Europe/Brussels') df_dic[year] = {} for country in domains: print("Querying yearly load from %s to %s for country %s" %(start.strftime('%d-%m-%Y'),end.strftime('%d-%m-%Y'),country)) s = e.query_load(country_code=country, start=start, end=end) if s is not None: df_dic[year][country] = s df_dic[year] = pd.concat(df_dic[year]) result = pd.concat(df_dic).reset_index() result.columns = ["year","country","time","load"] #result["year"] = [n.year for n in result.time] result.load = result.load/1000/1000 #CONVERTING MW TO TW result.loc[result.country.isin(quarterhour), "load"] = result.loc[result.country.isin(quarterhour),"load"]/4. result.loc[result.country.isin(halfhour),"load"] = result.loc[result.country.isin(halfhour),"load"]/2.
def setData(self, index, value, role=Qt.DisplayRole): """Set the value to the index position depending on Qt::ItemDataRole and data type of the column Args: index (QtCore.QModelIndex): Index to define column and row. value (object): new value. role (Qt::ItemDataRole): Use this role to specify what you want to do. Raises: TypeError: If the value could not be converted to a known datatype. Returns: True if value is changed. Calls layoutChanged after update. False if value is not different from original value. """ if not index.isValid() or not self.editable: return False if value != index.data(role): self.layoutAboutToBeChanged.emit() row = self._dataFrame.index[index.row()] col = self._dataFrame.columns[index.column()] #print 'before change: ', index.data().toUTC(), self._dataFrame.iloc[row][col] columnDtype = self._dataFrame[col].dtype if columnDtype == object: pass elif columnDtype in self._intDtypes: dtypeInfo = numpy.iinfo(columnDtype) if value < dtypeInfo.min: value = dtypeInfo.min elif value > dtypeInfo.max: value = dtypeInfo.max elif columnDtype in self._floatDtypes: value = numpy.float64(value).astype(columnDtype) elif columnDtype in self._boolDtypes: value = numpy.bool_(value) elif columnDtype in self._dateDtypes: # convert the given value to a compatible datetime object. # if the conversation could not be done, keep the original # value. if isinstance(value, QtCore.QDateTime): value = value.toString(self.timestampFormat) try: value = pandas.Timestamp(value) except Exception: raise Exception( "Can't convert '{0}' into a datetime".format(value)) # return False else: raise TypeError("try to set unhandled data type") self._dataFrame.set_value(row, col, value) #print 'after change: ', value, self._dataFrame.iloc[row][col] self.layoutChanged.emit() return True else: return False
def test_minutely_fetcher(self): self.responses.add( self.responses.GET, 'https://fake.urls.com/aapl_minute_csv_data.csv', body=AAPL_MINUTE_CSV_DATA, content_type='text/csv', ) sim_params = factory.create_simulation_parameters( start=pd.Timestamp("2006-01-03", tz='UTC'), end=pd.Timestamp("2006-01-10", tz='UTC'), emission_rate="minute", data_frequency="minute") test_algo = TradingAlgorithm(script=""" from zipline.api import fetch_csv, record, sid def initialize(context): fetch_csv('https://fake.urls.com/aapl_minute_csv_data.csv') def handle_data(context, data): record(aapl_signal=data.current(sid(24), "signal")) """, sim_params=sim_params, data_frequency="minute", env=self.env) # manually setting data portal and getting generator because we need # the minutely emission packets here. TradingAlgorithm.run() only # returns daily packets. test_algo.data_portal = FetcherDataPortal(self.env, self.trading_calendar) gen = test_algo.get_generator() perf_packets = list(gen) signal = [ result["minute_perf"]["recorded_vars"]["aapl_signal"] for result in perf_packets if "minute_perf" in result ] self.assertEqual(6 * 390, len(signal)) # csv data is: # symbol,date,signal # aapl,1/4/06 5:31AM, 1 # aapl,1/4/06 11:30AM, 2 # aapl,1/5/06 5:31AM, 1 # aapl,1/5/06 11:30AM, 3 # aapl,1/9/06 5:31AM, 1 # aapl,1/9/06 11:30AM, 4 for dates 1/3 to 1/10 # 2 signals per day, only last signal is taken. So we expect # 390 bars of signal NaN on 1/3 # 390 bars of signal 2 on 1/4 # 390 bars of signal 3 on 1/5 # 390 bars of signal 3 on 1/6 (forward filled) # 390 bars of signal 4 on 1/9 # 390 bars of signal 4 on 1/9 (forward filled) np.testing.assert_array_equal([np.NaN] * 390, signal[0:390]) np.testing.assert_array_equal([2] * 390, signal[390:780]) np.testing.assert_array_equal([3] * 780, signal[780:1560]) np.testing.assert_array_equal([4] * 780, signal[1560:])
def test_days_to_weeks(fn=days_to_weeks): tickers = generate_random_tickers(3) dates = pd.date_range('10/10/2018', periods=28, freq='D') resampled_dates = [ pd.Timestamp('2018-10-14 00:00:00', freq='W-SUN'), pd.Timestamp('2018-10-21 00:00:00', freq='W-SUN'), pd.Timestamp('2018-10-28 00:00:00', freq='W-SUN'), pd.Timestamp('2018-11-04 00:00:00', freq='W-SUN'), pd.Timestamp('2018-11-11 00:00:00', freq='W-SUN') ] fn_inputs = { 'open_prices': pd.DataFrame([[24, 21, 43], [14, 22, 41], [29, 23, 44], [44, 14, 13], [31, 28, 34], [36, 49, 27], [48, 20, 46], [48, 37, 27], [16, 42, 22], [23, 36, 32], [13, 31, 28], [23, 33, 18], [14, 47, 45], [28, 21, 31], [31, 36, 40], [19, 25, 46], [30, 46, 48], [19, 34, 35], [24, 13, 24], [48, 15, 39], [16, 34, 14], [37, 30, 28], [34, 24, 20], [17, 15, 38], [44, 15, 22], [24, 36, 28], [12, 41, 49], [24, 27, 14]], dates, tickers), 'high_prices': pd.DataFrame([[48, 48, 43], [42, 49, 47], [45, 47, 48], [48, 46, 48], [49, 49, 46], [40, 49, 49], [49, 44, 49], [49, 46, 48], [46, 49, 49], [49, 47, 47], [45, 49, 46], [45, 49, 49], [49, 48, 48], [48, 49, 49], [49, 49, 48], [48, 48, 49], [48, 47, 48], [47, 49, 49], [47, 49, 49], [48, 49, 48], [49, 49, 47], [48, 47, 48], [47, 48, 47], [49, 49, 45], [49, 49, 49], [47, 46, 48], [47, 47, 49], [49, 49, 46]], dates, tickers), 'low_prices': pd.DataFrame([[12, 12, 13], [12, 14, 15], [13, 14, 12], [14, 14, 13], [12, 12, 14], [12, 12, 12], [12, 12, 12], [13, 12, 13], [12, 12, 13], [14, 12, 14], [12, 12, 12], [13, 14, 16], [14, 13, 13], [13, 14, 12], [14, 12, 14], [15, 12, 13], [12, 12, 12], [12, 13, 15], [14, 12, 12], [12, 12, 12], [12, 14, 13], [12, 12, 13], [13, 14, 15], [12, 12, 12], [12, 14, 12], [12, 12, 13], [12, 12, 12], [16, 12, 14]], dates, tickers), 'close_prices': pd.DataFrame([[27, 45, 15], [40, 49, 40], [25, 26, 36], [26, 36, 19], [25, 34, 46], [22, 39, 45], [40, 14, 17], [42, 46, 33], [35, 41, 49], [14, 24, 31], [41, 18, 13], [36, 27, 18], [16, 16, 45], [37, 24, 16], [43, 40, 28], [39, 29, 45], [38, 20, 43], [44, 13, 34], [23, 17, 47], [25, 14, 38], [48, 44, 23], [37, 24, 33], [40, 28, 17], [31, 12, 44], [29, 40, 49], [18, 30, 13], [27, 16, 47], [31, 32, 14]], dates, tickers) } fn_correct_outputs = OrderedDict([ ('open_prices_weekly', pd.DataFrame([[24, 21, 43], [36, 49, 27], [14, 47, 45], [48, 15, 39], [12, 41, 49]], resampled_dates, tickers)), ('high_prices_weekly', pd.DataFrame([[49, 49, 48], [49, 49, 49], [49, 49, 49], [49, 49, 49], [49, 49, 49]], resampled_dates, tickers)), ('low_prices_weekly', pd.DataFrame([[12, 12, 12], [12, 12, 12], [12, 12, 12], [12, 12, 12], [12, 12, 12]], resampled_dates, tickers)), ('close_prices_weekly', pd.DataFrame([[25, 34, 46], [36, 27, 18], [23, 17, 47], [18, 30, 13], [31, 32, 14]], resampled_dates, tickers)) ]) assert_output(fn, fn_inputs, fn_correct_outputs)
def test_fast_unique_multiple_unsortable_runtimewarning(self): arr = [np.array(["foo", pd.Timestamp("2000")])] with tm.assert_produces_warning(RuntimeWarning): lib.fast_unique_multiple(arr, sort=None)
def test_setitem_clears_freq(self): a = DatetimeArray( pd.date_range("2000", periods=2, freq="D", tz="US/Central")) a[0] = pd.Timestamp("2000", tz="US/Central") assert a.freq is None
def write(): st.markdown(""" # SugarTime ### Model Performance This page lets you visualize how the model performs on data that it hasn't seen yet. """) with st.beta_expander("CLICK HERE to expand discussion"): st.markdown(""" The dataset is split into two sets: a training set and a testing set. The model has been trained on the training set, and we can use the model to perform inference on data from the testing set here. The time series model is auto-regressive with exogenous variables (ARX). The base algorithm used in such a model can be any regression algorithm; here I currently use a support vector machine. The full model actually consists of several models, each individually fit to a different lag of the target variable. In other words, there is one model fit to the glucose data at time *t+1*, another fit to the glucose data at time *t+2*, another at *t+3*, etc., all the way up to the selected horizon of the model (which defaults to 12 steps of 5 minutes each, i.e., one hour). Each model represents the best performing model after optimizing the time-series design hyperparameters (e.g., order of the *endogenous* or *target* variable, order of the *exogenous* variables, and/or delay of the exogenous variables) at that time step. Note that this model has essentially learned to revert to the mean. Since there is considerable autocorrelation in data from continuous glucose monitors, inference becomes less acurrate as the inference step gets farther away from the current time *t*. Here, instead of relying on the exogenous variables (i.e., carbohydrates and insulin), the model does a better job by increasingly bringing the predicted value back to the mean, which for this patient is a blood glucose level of approximately 100 mg/dL. This is obviously not what we want the model to learn. But I have yet to find an estimator/algorithm that doesn't converge on this strategy to some extent, which suggests that these two exogenous variables are simply not predictive enough to account for significant variance beyond the autoregressive component of this model. """) st.markdown(""" *Instructions:* Use the slider to select a time within the test set. The model will use the data up to that point to generate a forecast for the next hour. *** """) st.markdown("# Select date/time to show forecast.") # load patient data and fit model vm = load_saved_model() patient = vm.patient # make datetime selection slider x_index = patient.Xtest.index start_time = st.slider( "Move the slider to select the forecast date/time", min_value=x_index[40].to_pydatetime(), max_value=x_index[-40].to_pydatetime(), value=x_index[45].to_pydatetime(), step=timedelta(minutes=60), format="MM/DD/YY - hh:mm", ) # plot glucose values for the test set fig = plot_test_set(patient, start_time) st.plotly_chart(fig) # plot performance of model st.markdown("# Show forecast vs actual") start_time_index = (x_index == pd.Timestamp(start_time)).argmax() nsteps = vm.horizon ypred = vm.multioutput_forecast(patient.Xtest[:start_time_index], patient.ytest[:start_time_index]) idx = pd.date_range( start=start_time, end=start_time + timedelta(minutes=5 * (len(ypred) - 1)), freq="5T", ) ypred = pd.DataFrame(ypred, columns=["ypred"], index=idx) fig = core.plot_forecast( patient.ytest[(start_time_index - 40):(start_time_index + nsteps)], ypred, return_flag=True, ) start_time_text = datetime.datetime.strftime(start_time, "%m/%d/%m %H:%M") fig.update_layout( title={ "text": "start time: " + start_time_text, "y": 0.88, "x": 0.5, "xanchor": "center", "yanchor": "top", }) st.plotly_chart(fig)
def __call__(self, times, locations): _times = [pd.Timestamp(d) for d in times] return self._call_( np.asarray(_times).reshape((-1, 1)), np.asarray(locations).reshape((1, -1)) ).astype(np.float32)
], "name_to_label": { "spring_no_effects": NO_VACCINATIONS + NO_SEASONALITY + NO_RAPID_TESTS, "spring_without_rapid_tests_and_no_vaccinations": NO_VACCINATIONS + WITH_SEASONALITY + NO_RAPID_TESTS, # just seasonality "spring_without_rapid_tests_without_seasonality": WITH_VACCINATIONS + NO_SEASONALITY + NO_RAPID_TESTS, # just vaccinations "spring_without_vaccinations_without_seasonality": NO_VACCINATIONS + NO_SEASONALITY + WITH_RAPID_TESTS, # just rapid tests "spring_baseline": WITH_VACCINATIONS + WITH_SEASONALITY + WITH_RAPID_TESTS, }, "colors": [BLUE, RED, ORANGE, GREEN, PURPLE], "plot_start": pd.Timestamp("2021-01-15"), }, "one_off_and_combined": { "title": "The Effect of Each Channel on {outcome} Separately", "scenarios": [ "spring_baseline", "spring_no_effects", "spring_without_seasonality", "spring_without_vaccines", "spring_without_rapid_tests", ], "name_to_label": { "spring_no_effects": NO_VACCINATIONS + NO_SEASONALITY + NO_RAPID_TESTS, "spring_without_seasonality": WITH_VACCINATIONS + NO_SEASONALITY + WITH_RAPID_TESTS,
class TestGenRangeGeneration: def test_generate(self): rng1 = list(generate_range(START, END, offset=BDay())) rng2 = list(generate_range(START, END, offset="B")) assert rng1 == rng2 def test_generate_cday(self): rng1 = list(generate_range(START, END, offset=CDay())) rng2 = list(generate_range(START, END, offset="C")) assert rng1 == rng2 def test_1(self): rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] assert rng == expected def test_2(self): rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3))) expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)] assert rng == expected def test_3(self): rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6))) expected = [] assert rng == expected def test_precision_finer_than_offset(self): # GH#9907 result1 = pd.date_range( start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q" ) result2 = pd.date_range( start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W" ) expected1_list = [ "2015-06-30 00:00:03", "2015-09-30 00:00:03", "2015-12-31 00:00:03", "2016-03-31 00:00:03", ] expected2_list = [ "2015-04-19 00:00:03", "2015-04-26 00:00:03", "2015-05-03 00:00:03", "2015-05-10 00:00:03", "2015-05-17 00:00:03", "2015-05-24 00:00:03", "2015-05-31 00:00:03", "2015-06-07 00:00:03", "2015-06-14 00:00:03", "2015-06-21 00:00:03", ] expected1 = DatetimeIndex( expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None ) expected2 = DatetimeIndex( expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None ) tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) dt1, dt2 = "2017-01-01", "2017-01-01" tz1, tz2 = "US/Eastern", "Europe/London" @pytest.mark.parametrize( "start,end", [ (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)), (pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)), (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)), (pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)), ], ) def test_mismatching_tz_raises_err(self, start, end): # issue 18488 msg = "Start and end cannot both be tz-aware with different timezones" with pytest.raises(TypeError, match=msg): pd.date_range(start, end) with pytest.raises(TypeError, match=msg): pd.date_range(start, end, freq=BDay())
def test_arithmetic_overflow(self): with pytest.raises(OverflowError): pd.Timestamp('1700-01-01') + pd.Timedelta(13 * 19999, unit='D') with pytest.raises(OverflowError): pd.Timestamp('1700-01-01') + timedelta(days=13 * 19999)
def get_datetime(): return pd.Timestamp("2015-01-27", tz='UTC')
def __init__(self, indir=None, outdir=None, theDate=None, infile='trades.csv', inputType='DAS', infile2='positions.csv', mydevel=False): ''' Creates the required path and field names to run the program. Raises value error if the input file cannot be located. If mydevel is True, the default locations change. :params indir: The location of the input file. Defaut is (cwd)/data. :params outdir The name of the output directory. Default is (indir)/out. :params theDate: A Datetime object or timestamp of the date of the transactions in the input file. Will be used if the input file lacks dates. Defaults to today. :params infile: The name of the input file. Defaults to 'trades.csv'. :params inputType: One of DAS, IB_HTML, or IB_CVS. Either IB input file should be an activity statement with the tables: Trades, Open Positions and Account Information. :params infile2: This is the positions file. Required for DAS Trader Pro only and only if positions are held before or after this input file's trades. If missing, the program will ask for the information. Defaults to 'positions.csv' :raise ValueError: If theDate is not a valid time. :raise NameError: If the infile is not located. ''' if theDate: try: theDate = pd.Timestamp(theDate) assert isinstance(theDate, dt.datetime) except ValueError as ex: msg = f"\n\nTheDate ({theDate}) must be a valid timestamp or string.\n" msg += "Leave it blank to accept today's date\n" msg += ex.__str__() + "\n" print(msg) raise ValueError(msg) theDate = theDate else: theDate = dt.date.today() assert inputType in JournalFiles.inputType.values() self.inputType = inputType self.theDate = theDate self.monthformat = "_%Y%m_%B" self.dayformat = "_%m%d_%A" self.root = os.getcwd() self.indir = indir if indir else os.path.join(self.root, 'data/') self.outdir = outdir if outdir else os.path.join(self.root, 'out/') self.infile = infile if infile else 'trades.csv' self.infile2 = infile2 self.inpathfile2 = None self.outfile = os.path.splitext( self.infile)[0] + self.theDate.strftime("%A_%m%d.xlsx") if not mydevel: self.inpathfile = os.path.join(self.indir, self.infile) self.outpathfile = os.path.join(self.outdir, self.outfile) if self.infile2: self.inpathfile2 = os.path.join(self.indir, self.infile2) else: self.setMyParams(indir, outdir) if self.inpathfile2 and not os.path.exists(self.inpathfile2): # Fail or succeed quietly here self.infile2 = None self.inpathfile2 = None self._checkPaths()
class OrderTrackerUnitTests(unittest.TestCase): start: pd.Timestamp = pd.Timestamp("2019-01-01", tz="UTC") end: pd.Timestamp = pd.Timestamp("2019-01-01 01:00:00", tz="UTC") start_timestamp: float = start.timestamp() end_timestamp: float = end.timestamp() clock_tick_size = 10 @classmethod def setUpClass(cls): cls.ev_loop = asyncio.get_event_loop() cls.trading_pair = "COINALPHA-HBOT" cls.limit_orders: List[LimitOrder] = [ LimitOrder(client_order_id=f"LIMIT//-{i}-{int(time.time()*1e6)}", trading_pair=cls.trading_pair, is_buy=True if i % 2 == 0 else False, base_currency=cls.trading_pair.split("-")[0], quote_currency=cls.trading_pair.split("-")[1], price=Decimal(f"{100 - i}") if i % 2 == 0 else Decimal(f"{100 + i}"), quantity=Decimal(f"{10 * (i + 1)}"), creation_timestamp=int(time.time() * 1e6)) for i in range(20) ] cls.market_orders: List[MarketOrder] = [ MarketOrder(order_id=f"MARKET//-{i}-{int(time.time()*1e3)}", trading_pair=cls.trading_pair, is_buy=True if i % 2 == 0 else False, base_asset=cls.trading_pair.split("-")[0], quote_asset=cls.trading_pair.split("-")[1], amount=float(f"{10 * (i + 1)}"), timestamp=time.time()) for i in range(20) ] cls.market: MockPaperExchange = MockPaperExchange() cls.market_info: MarketTradingPairTuple = MarketTradingPairTuple( cls.market, cls.trading_pair, *cls.trading_pair.split("-")) def setUp(self): self.order_tracker: OrderTracker = OrderTracker() self.clock: Clock = Clock(ClockMode.BACKTEST, self.clock_tick_size, self.start_timestamp, self.end_timestamp) self.clock.add_iterator(self.order_tracker) self.clock.backtest_til(self.start_timestamp) @staticmethod def simulate_place_order(order_tracker: OrderTracker, order: Union[LimitOrder, MarketOrder], market_info: MarketTradingPairTuple): """ Simulates an order being succesfully placed. """ if isinstance(order, LimitOrder): order_tracker.add_create_order_pending(order.client_order_id) order_tracker.start_tracking_limit_order( market_pair=market_info, order_id=order.client_order_id, is_buy=order.is_buy, price=order.price, quantity=order.quantity) else: order_tracker.add_create_order_pending(order.order_id) order_tracker.start_tracking_market_order(market_pair=market_info, order_id=order.order_id, is_buy=order.is_buy, quantity=order.amount) @staticmethod def simulate_order_created(order_tracker: OrderTracker, order: Union[LimitOrder, MarketOrder]): order_id = order.client_order_id if isinstance( order, LimitOrder) else order.order_id order_tracker.remove_create_order_pending(order_id) @staticmethod def simulate_stop_tracking_order(order_tracker: OrderTracker, order: Union[LimitOrder, MarketOrder], market_info: MarketTradingPairTuple): """ Simulates an order being cancelled or filled completely. """ if isinstance(order, LimitOrder): order_tracker.stop_tracking_limit_order( market_pair=market_info, order_id=order.client_order_id, ) else: order_tracker.stop_tracking_market_order(market_pair=market_info, order_id=order.order_id) @staticmethod def simulate_cancel_order(order_tracker: OrderTracker, order: Union[LimitOrder, MarketOrder]): """ Simulates order being cancelled. """ order_id = order.client_order_id if isinstance( order, LimitOrder) else order.order_id if order_id: order_tracker.check_and_track_cancel(order_id) def test_active_limit_orders(self): # Check initial output self.assertTrue(len(self.order_tracker.active_limit_orders) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.active_limit_orders) == len( self.limit_orders)) # Simulates order cancellation request being sent to exchange order_to_cancel = self.limit_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) self.assertTrue( len(self.order_tracker.active_limit_orders) == len(self.limit_orders) - 1) def test_shadow_limit_orders(self): # Check initial output self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.shadow_limit_orders) == len( self.limit_orders)) # Simulates order cancellation request being sent to exchange order_to_cancel = self.limit_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) self.assertTrue( len(self.order_tracker.shadow_limit_orders) == len(self.limit_orders) - 1) def test_market_pair_to_active_orders(self): # Check initial output self.assertTrue( len(self.order_tracker.market_pair_to_active_orders) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.market_pair_to_active_orders[ self.market_info]) == len(self.limit_orders)) def test_active_bids(self): # Check initial output self.assertTrue(len(self.order_tracker.active_bids) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.active_bids) == len(self.limit_orders) / 2) def test_active_asks(self): # Check initial output self.assertTrue(len(self.order_tracker.active_asks) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.active_asks) == len(self.limit_orders) / 2) def test_tracked_limit_orders(self): # Check initial output self.assertTrue(len(self.order_tracker.tracked_limit_orders) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.tracked_limit_orders) == len( self.limit_orders)) # Simulates order cancellation request being sent to exchange order_to_cancel = self.limit_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) # Note: This includes all orders(open, cancelled, filled, partially filled). # Hence it should not differ from initial list of orders self.assertTrue( len(self.order_tracker.tracked_limit_orders) == len( self.limit_orders)) def test_tracked_limit_orders_data_frame(self): # Check initial output self.assertTrue( len(self.order_tracker.tracked_limit_orders_data_frame) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.tracked_limit_orders_data_frame) == len( self.limit_orders)) # Simulates order cancellation request being sent to exchange order_to_cancel = self.limit_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) # Note: This includes all orders(open, cancelled, filled, partially filled). # Hence it should not differ from initial list of orders self.assertTrue( len(self.order_tracker.tracked_limit_orders_data_frame) == len( self.limit_orders)) def test_tracked_market_orders(self): # Check initial output self.assertTrue(len(self.order_tracker.tracked_market_orders) == 0) # Simulate orders being placed and tracked for order in self.market_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.tracked_market_orders) == len( self.market_orders)) # Simulates order cancellation request being sent to exchange order_to_cancel = self.market_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) # Note: This includes all orders(open, cancelled, filled, partially filled). # Hence it should not differ from initial list of orders self.assertTrue( len(self.order_tracker.tracked_market_orders) == len( self.market_orders)) def test_tracked_market_order_data_frame(self): # Check initial output self.assertTrue( len(self.order_tracker.tracked_market_orders_data_frame) == 0) # Simulate orders being placed and tracked for order in self.market_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) self.assertTrue( len(self.order_tracker.tracked_market_orders_data_frame) == len( self.market_orders)) # Simulates order cancellation request being sent to exchange order_to_cancel = self.market_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) # Note: This includes all orders(open, cancelled, filled, partially filled). # Hence it should not differ from initial list of orders self.assertTrue( len(self.order_tracker.tracked_market_orders_data_frame) == len( self.market_orders)) def test_in_flight_cancels(self): # Check initial output self.assertTrue(len(self.order_tracker.in_flight_cancels) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) # Simulates order cancellation request being sent to exchange order_to_cancel = self.limit_orders[0] self.simulate_cancel_order(self.order_tracker, order_to_cancel) self.assertTrue(len(self.order_tracker.in_flight_cancels) == 1) def test_in_flight_pending_created(self): # Check initial output self.assertTrue(len(self.order_tracker.in_flight_pending_created) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.assertTrue( len(self.order_tracker.in_flight_pending_created) == len( self.limit_orders)) for order in self.limit_orders: self.simulate_order_created(self.order_tracker, order) self.assertTrue(len(self.order_tracker.in_flight_pending_created) == 0) def test_get_limit_orders(self): # Check initial output self.assertTrue( len(list(self.order_tracker.get_limit_orders().values())) == 0) # Simulate orders being placed and tracked for order in self.limit_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.assertTrue( len(self.order_tracker.get_limit_orders()[self.market_info].keys()) == len(self.limit_orders)) def test_get_market_orders(self): # Check initial output self.assertTrue( len(list(self.order_tracker.get_market_orders().values())) == 0) # Simulate orders being placed and tracked for order in self.market_orders: self.simulate_place_order(self.order_tracker, order, self.market_info) self.assertTrue( len(self.order_tracker.get_market_orders()[ self.market_info].keys()) == len(self.market_orders)) def test_get_shadow_limit_orders(self): # Check initial output self.assertTrue(self.market_info not in self.order_tracker.get_shadow_limit_orders()) # Simulates order being placed and tracked order: LimitOrder = self.limit_orders[0] self.simulate_place_order(self.order_tracker, order, self.market_info) # Compare order details and output other_order = self.order_tracker.get_shadow_limit_orders()[ self.market_info][order.client_order_id] self.assertEqual(order.trading_pair, other_order.trading_pair) self.assertEqual(order.price, other_order.price) self.assertEqual(order.quantity, other_order.quantity) self.assertEqual(order.is_buy, other_order.is_buy) # Simulate order being cancelled self.simulate_cancel_order(self.order_tracker, order) self.simulate_stop_tracking_order(self.order_tracker, order, self.market_info) # Check that order is not yet removed from shadow_limit_orders other_order = self.order_tracker.get_shadow_limit_orders()[ self.market_info][order.client_order_id] self.assertEqual(order.trading_pair, other_order.trading_pair) self.assertEqual(order.price, other_order.price) self.assertEqual(order.quantity, other_order.quantity) self.assertEqual(order.is_buy, other_order.is_buy) # Simulates current_timestamp > SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION self.clock.backtest_til( self.start_timestamp + OrderTracker.SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION + 1) self.order_tracker.check_and_cleanup_shadow_records() # Check that check_and_cleanup_shadow_records clears shadow_limit_orders self.assertTrue(self.market_info not in self.order_tracker.get_shadow_limit_orders()) def test_has_in_flight_cancel(self): # Check initial output self.assertFalse( self.order_tracker.has_in_flight_cancel("ORDER_ID_DO_NOT_EXIST")) # Simulates order being placed and tracked order: LimitOrder = self.limit_orders[0] self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) # Order not yet cancelled. self.assertFalse( self.order_tracker.has_in_flight_cancel(order.client_order_id)) # Simulate order being cancelled self.simulate_cancel_order(self.order_tracker, order) # Order inflight cancel timestamp has not yet expired self.assertTrue( self.order_tracker.has_in_flight_cancel(order.client_order_id)) # Simulate in-flight cancel has expired self.clock.backtest_til(self.start_timestamp + OrderTracker.CANCEL_EXPIRY_DURATION + 1) self.assertFalse( self.order_tracker.has_in_flight_cancel(order.client_order_id)) # Simulates order being placed and tracked order: LimitOrder = self.limit_orders[0] self.simulate_place_order(self.order_tracker, order, self.market_info) self.simulate_order_created(self.order_tracker, order) # Simulate order being cancelled and no longer tracked self.simulate_cancel_order(self.order_tracker, order) self.simulate_stop_tracking_order(self.order_tracker, order, self.market_info) # Check that once the order is no longer tracker, it will no longer have a pending cancel self.assertFalse( self.order_tracker.has_in_flight_cancel(order.client_order_id)) def test_get_market_pair_from_order_id(self): # Initial validation order: LimitOrder = self.limit_orders[0] self.assertNotEqual( self.market_info, self.order_tracker.get_market_pair_from_order_id( order.client_order_id)) # Simulate order being placed and tracked self.simulate_place_order(self.order_tracker, order, self.market_info) self.assertEqual( self.market_info, self.order_tracker.get_market_pair_from_order_id( order.client_order_id)) def test_get_shadow_market_pair_from_order_id(self): # Simulate order being placed and tracked order: LimitOrder = self.limit_orders[0] self.assertNotEqual( self.market_info, self.order_tracker.get_shadow_market_pair_from_order_id( order.client_order_id)) self.simulate_place_order(self.order_tracker, order, self.market_info) self.assertEqual( self.market_info, self.order_tracker.get_shadow_market_pair_from_order_id( order.client_order_id)) def test_get_limit_order(self): # Initial validation order: LimitOrder = self.limit_orders[0] # Order not yet placed self.assertNotEqual( order, self.order_tracker.get_limit_order(self.market_info, order.client_order_id)) # Simulate order being placed and tracked self.simulate_place_order(self.order_tracker, order, self.market_info) # Unrecognized Order self.assertNotEqual( order, self.order_tracker.get_limit_order(self.market_info, "UNRECOGNIZED_ORDER")) # Matching Order other_order = self.order_tracker.get_limit_order( self.market_info, order.client_order_id) self.assertEqual(order.trading_pair, other_order.trading_pair) self.assertEqual(order.price, other_order.price) self.assertEqual(order.quantity, other_order.quantity) self.assertEqual(order.is_buy, other_order.is_buy) def test_get_market_order(self): # Initial validation order: MarketOrder = MarketOrder( order_id=f"MARKET//-{self.clock.current_timestamp}", trading_pair=self.trading_pair, is_buy=True, base_asset=self.trading_pair.split("-")[0], quote_asset=self.trading_pair.split("-")[1], amount=float(10), timestamp=self.clock.current_timestamp) # Order not yet placed self.assertNotEqual( order, self.order_tracker.get_market_order(self.market_info, order.order_id)) # Simulate order being placed and tracked self.simulate_place_order(self.order_tracker, order, self.market_info) # Unrecognized Order self.assertNotEqual( order, self.order_tracker.get_market_order(self.market_info, "UNRECOGNIZED_ORDER")) # Matching Order self.assertEqual( str(order), str( self.order_tracker.get_market_order(self.market_info, order.order_id))) def test_get_shadow_limit_order(self): # Initial validation order: LimitOrder = self.limit_orders[0] # Order not yet placed self.assertNotEqual( order, self.order_tracker.get_shadow_limit_order(order.client_order_id)) # Simulate order being placed and tracked self.simulate_place_order(self.order_tracker, order, self.market_info) # Unrecognized Order self.assertNotEqual( order, self.order_tracker.get_shadow_limit_order("UNRECOGNIZED_ORDER")) # Matching Order shadow_order = self.order_tracker.get_shadow_limit_order( order.client_order_id) self.assertEqual(order.trading_pair, shadow_order.trading_pair) self.assertEqual(order.price, shadow_order.price) self.assertEqual(order.quantity, shadow_order.quantity) self.assertEqual(order.is_buy, shadow_order.is_buy) # Simulate order cancel self.simulate_cancel_order(self.order_tracker, order) self.assertNotEqual( order, self.order_tracker.get_shadow_limit_order(order.client_order_id)) def test_check_and_cleanup_shadow_records(self): order: LimitOrder = self.limit_orders[0] # Simulate order being placed and tracked self.simulate_place_order(self.order_tracker, order, self.market_info) # Check for shadow_tracked_limit_order self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 1) # Simulate order cancel and stop tracking order self.simulate_cancel_order(self.order_tracker, order) self.simulate_stop_tracking_order(self.order_tracker, order, self.market_info) # Check for shadow_tracked_limit_order self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 1) # Simulates current_timestamp > SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION self.clock.backtest_til( self.start_timestamp + OrderTracker.SHADOW_MAKER_ORDER_KEEP_ALIVE_DURATION + 1) self.order_tracker.check_and_cleanup_shadow_records() # Check that check_and_cleanup_shadow_records clears shadow_limit_orders self.assertTrue(len(self.order_tracker.shadow_limit_orders) == 0)
# Third-party imports import numpy as np import pandas as pd import pytest # First-party imports from gluonts.model.forecast import QuantileForecast, SampleForecast QUANTILES = np.arange(1, 100) / 100 SAMPLES = np.arange(101).reshape(101, 1) / 100 START_DATE = pd.Timestamp(2017, 1, 1, 12) FREQ = '1D' FORECASTS = { 'QuantileForecast': QuantileForecast( forecast_arrays=QUANTILES.reshape(-1, 1), start_date=START_DATE, forecast_keys=np.array(QUANTILES, str), freq=FREQ, ), 'SampleForecast': SampleForecast( samples=SAMPLES, start_date=START_DATE, freq=FREQ ), } @pytest.mark.parametrize("name", FORECASTS.keys()) def test_Forecast(name): forecast = FORECASTS[name] def percentile(value):
def make_equity_info(cls): return pd.DataFrame.from_dict( { 24: { 'start_date': pd.Timestamp('2006-01-01', tz='UTC'), 'end_date': pd.Timestamp('2007-01-01', tz='UTC'), 'symbol': 'AAPL', 'asset_type': 'equity', 'exchange': 'nasdaq' }, 3766: { 'start_date': pd.Timestamp('2006-01-01', tz='UTC'), 'end_date': pd.Timestamp('2007-01-01', tz='UTC'), 'symbol': 'IBM', 'asset_type': 'equity', 'exchange': 'nasdaq' }, 5061: { 'start_date': pd.Timestamp('2006-01-01', tz='UTC'), 'end_date': pd.Timestamp('2007-01-01', tz='UTC'), 'symbol': 'MSFT', 'asset_type': 'equity', 'exchange': 'nasdaq' }, 14848: { 'start_date': pd.Timestamp('2006-01-01', tz='UTC'), 'end_date': pd.Timestamp('2007-01-01', tz='UTC'), 'symbol': 'YHOO', 'asset_type': 'equity', 'exchange': 'nasdaq' }, 25317: { 'start_date': pd.Timestamp('2006-01-01', tz='UTC'), 'end_date': pd.Timestamp('2007-01-01', tz='UTC'), 'symbol': 'DELL', 'asset_type': 'equity', 'exchange': 'nasdaq' }, 13: { 'start_date': pd.Timestamp('2006-01-01', tz='UTC'), 'end_date': pd.Timestamp('2010-01-01', tz='UTC'), 'symbol': 'NFLX', 'asset_type': 'equity', 'exchange': 'nasdaq' } }, orient='index', )
df_wiki_china.columns = ['Place', 'Start date', 'End date'] df_wiki_china['url'] = wiki_china_links[1:] df_wiki_china['update'] = pd.to_datetime(wiki_china_dates[1:], format='%d %B %Y') df_wiki_china['Country'] = 'China' df_wiki_china['Level'] = 'City' df_wiki_china['Confirmed'] = True df_wiki = pd.concat((df_wiki_inter, df_wiki_china), sort=False) df_wiki.to_csv('wiki_lockdown_dates.csv', index=False) df_aura = pd.read_csv('aura_lockdown_dates.csv') df_aura['update'] = pd.to_datetime(df_aura['update'], format='%Y-%m-%d') df_quar = pd.concat((df_wiki_inter, df_wiki_china, df_aura), sort=False) df_quar['update'] = df_quar['update'].fillna(pd.Timestamp('2000/11/12 13:35')) df_quar = df_quar.sort_values('update') df_quar = df_quar.drop_duplicates(['Country', 'Place'], keep='last') df_quar = df_quar.dropna(subset=['Start date']) df_quar.loc[df_quar['Place'] == df_quar['Country'], 'Place'] = np.nan print('not in arewe') print(set(df_quar['Country']) - set(df_arewe_ls['Country'])) print('not in ours') print(set(df_arewe_ls['Country']) - set(df_quar['Country'])) df_quar.to_csv('deploy/lockdown_dates.csv', index=False) df_quar.to_csv('history/lockdown_dates_%s.csv' % (pd.datetime.now().strftime('%d-%m-%y')), index=False) df_quar_old = pd.read_csv(
#coding:utf-8 from threading import Timer import time import os import csv import tushare as ts import numpy as np import pandas as pd import datetime from jqdatasdk import * from pathlib import Path #前置全局变量 global today, todaytime, yesterday, yesterdaytime, tommow, tommowtime, pandastime, cwf, alltradeday, homefolder #保存当前日期 pandastime = pd.Timestamp("2017-6-19 9:13:45") today = datetime.date.today() todaytime = datetime.datetime.strptime(str(today), '%Y-%m-%d') tommow = today + datetime.timedelta(days=1) tommowtime = datetime.datetime.strptime(str(tommow), '%Y-%m-%d') yesterday = today - datetime.timedelta(days=1) yesterdaytime = datetime.datetime.strptime(str(yesterday), '%Y-%m-%d') #建立聚宽数据的最早日期线 jqmonthheadtime = datetime.datetime(2005, 2, 1, 0, 0) jqweekheadtime = datetime.datetime(2005, 1, 15, 0, 0) jqdayheadtime = datetime.datetime(2005, 1, 5, 0, 0) jq60mheadtime = datetime.datetime(2005, 1, 4, 10, 31) jq30mheadtime = datetime.datetime(2005, 1, 4, 10, 1) jq15mheadtime = datetime.datetime(2005, 1, 4, 9, 46) jq5mheadtime = datetime.datetime(2005, 1, 4, 9, 36) jq1mheadtime = datetime.datetime(2005, 1, 4, 9, 31)
def describe_cluster(cluster_df, features, weight_column, oven_refills): """ Create the statistics for a cluster. Datapoints that are part of a breakdown period are excluded. Parameters ---------- cluster_df : DataFrame A dataframe that contains all points of the cluster you want to describe. features : list of source features All source feature for which the statistics should be generated weight_column : string Name of the column to use for weighting data points, typically `datapoint_duration` (``ProcessingFeatures.DATAPOINT_DURATION``) oven_refills : list of timestamp End of the oven refill periods Returns ------- Series A Series of the following statistics For each parameter in `features`: 1. mean 2. std 3. std% (std in percent of mean) 4. avg_dev (average deviation of mean) 5. min 6. 25% (lower quartile) 7. median 8. 75% (upper quartile) 9. max Once for the cluster: 10. Density/count (number of data points in the cluster) 11. Duration/in_hours (total duration of cluster) 12. Duration/longest (duration of longest fragment) 13. Duration/num_splits (number of fragments) 14. Refill/index (index of oven refill that came directly before the beginning of the longest fragment) 15. Refill/delta_in_hours (delta from the end of the closest oven refill) 16. num_breakdowns/per_hour (number of breakdowns per hour) """ values = ["mean", "std", "std%", "avg_dev", "min", "25%", "median", "75%", "max"] index = pd.MultiIndex.from_tuples( [(p, v) for p in features for v in values] + [ ("DENSITY", "count"), ("DURATION", "in_hours"), ("DURATION", "longest_in_hours"), ("DURATION", "num_splits"), ("REFILL", "index"), ("REFILL", "delta_in_hours"), ("num_breakdowns", "per_hour"), ] ) data = cluster_df.loc[ (cluster_df[ProcessingFeatures.HT_VOLTAGE_BREAKDOWN] == 0), features ].values # TODO maybe only include non breakdown here??? weights = cluster_df.loc[ (cluster_df[ProcessingFeatures.HT_VOLTAGE_BREAKDOWN] == 0), weight_column ].values if data.size == 0: return None stats = DescrStatsW(data, weights, ddof=1) mean = np.array(stats.mean) # np.mean(data, axis=0) std = np.array(stats.std) # np.std(data, axis=0) quantiles = stats.quantile([0, 0.25, 0.5, 0.75, 1], return_pandas=False) # np.quantile(data, [0, 0.25, 0.5, 0.75, 1], axis=0) avg_dev = np.dot(weights, np.absolute(data - mean)) / np.sum(weights) count = len(data) duration_in_seconds = cluster_df[ProcessingFeatures.DATAPOINT_DURATION].sum() duration_in_hours = duration_in_seconds / 3600 ( duration_longest_start, duration_longest, duration_num_splits, ) = get_cluster_duration(cluster_df, weight_column) duration_longest /= 3600 closest_refill = None for i, refill in reversed(list(enumerate(oven_refills))): if duration_longest_start > refill: closest_refill = i break refill_delta = -1 if not closest_refill is None: refill_delta = ( pd.Timestamp(duration_longest_start) - oven_refills[closest_refill] ).total_seconds() / 3600 description = [ [ mean[i], std[i], np.abs(std[i] / mean[i]) * 100, avg_dev[i], quantiles[0][i], quantiles[1][i], quantiles[2][i], quantiles[3][i], quantiles[4][i], ] for i in range(len(features)) ] description = [item for sublist in description for item in sublist] description.append(count) description.append(duration_in_hours) description.append(duration_longest) description.append(duration_num_splits) description.append(closest_refill) description.append(refill_delta) description.append( cluster_df.loc[ cluster_df[ProcessingFeatures.HT_SPARKS_COUNTER] > 0, ProcessingFeatures.HT_SPARKS_COUNTER, ].nunique() / duration_in_hours ) return pd.Series(description, index=index)
def _to_timestamp(d): return pd.Timestamp(d).replace(hour=0, minute=0, second=0, microsecond=0)
import statsmodels.api as sm __all__=['predict'] # 函数部分 def predict(X,Y,x): ''' @note:对X,Y做线性回归,并得到预测值 @X:自变量 @Y:应变量 @x:输入x值求预测值 ''' X=np.array(X) Y=np.array(Y) X=sm.add_constant(X) model=sm.OLS(Y,X) results=model.fit() x=np.hstack(([1],np.array(x))) # return results.summary() return model.predict(results.params,x) # 数据部分 p=pd.read_csv('../data/margin_m.csv', index_col=0) p.index=[pd.Timestamp(str(x)) for x in p.index] d=p[p>0].dropna(how="any")#去掉无效值 d=d[-12:]#取最近一年的 print(predict(d['commodity_pre'],d['margin'],[1])) if __name__=="__main__": import ols print(help(ols))
def _create_dataframe(): sample_dataframe = pd.read_csv(os.path.join(RESOURCE_DIR, 'data.csv')) sample_dataframe['timestamp'] = sample_dataframe['timestamp'].apply( lambda x: pd.Timestamp(x)) return sample_dataframe.set_index('timestamp')
class TestSparseArray: def setup_method(self, method): self.arr_data = np.array( [np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) def test_constructor_dtype(self): arr = SparseArray([np.nan, 1, 2, np.nan]) assert arr.dtype == SparseDtype(np.float64, np.nan) assert arr.dtype.subtype == np.float64 assert np.isnan(arr.fill_value) arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) assert arr.dtype == SparseDtype(np.float64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], dtype=np.float64) assert arr.dtype == SparseDtype(np.float64, np.nan) assert np.isnan(arr.fill_value) arr = SparseArray([0, 1, 2, 4], dtype=np.int64) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], dtype=None) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) assert arr.dtype == SparseDtype(np.int64, 0) assert arr.fill_value == 0 def test_constructor_dtype_str(self): result = SparseArray([1, 2, 3], dtype="int") expected = SparseArray([1, 2, 3], dtype=int) tm.assert_sp_array_equal(result, expected) def test_constructor_sparse_dtype(self): result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1)) expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64) tm.assert_sp_array_equal(result, expected) assert result.sp_values.dtype == np.dtype("int64") def test_constructor_sparse_dtype_str(self): result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]") expected = SparseArray([1, 0, 0, 1], dtype=np.int32) tm.assert_sp_array_equal(result, expected) assert result.sp_values.dtype == np.dtype("int32") def test_constructor_object_dtype(self): # GH 11856 arr = SparseArray(["A", "A", np.nan, "B"], dtype=object) assert arr.dtype == SparseDtype(object) assert np.isnan(arr.fill_value) arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A") assert arr.dtype == SparseDtype(object, "A") assert arr.fill_value == "A" # GH 17574 data = [False, 0, 100.0, 0.0] arr = SparseArray(data, dtype=object, fill_value=False) assert arr.dtype == SparseDtype(object, False) assert arr.fill_value is False arr_expected = np.array(data, dtype=object) it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) assert np.fromiter(it, dtype=np.bool_).all() @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) def test_constructor_na_dtype(self, dtype): with pytest.raises(ValueError, match="Cannot convert"): SparseArray([0, 1, np.nan], dtype=dtype) def test_constructor_warns_when_losing_timezone(self): # GH#32501 warn when losing timezone inforamtion dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]")) with tm.assert_produces_warning(UserWarning): result = SparseArray(dti) tm.assert_sp_array_equal(result, expected) with tm.assert_produces_warning(UserWarning): result = SparseArray(pd.Series(dti)) tm.assert_sp_array_equal(result, expected) def test_constructor_spindex_dtype(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) # XXX: Behavior change: specifying SparseIndex no longer changes the # fill_value expected = SparseArray([0, 1, 2, 0], kind="integer") tm.assert_sp_array_equal(arr, expected) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray( data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), dtype=np.int64, fill_value=0, ) exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray( data=[1, 2, 3], sparse_index=IntIndex(4, [1, 2, 3]), dtype=None, fill_value=0, ) exp = SparseArray([0, 1, 2, 3], dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])]) def test_constructor_spindex_dtype_scalar(self, sparse_index): # scalar input arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) exp = SparseArray([1], dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) exp = SparseArray([1], dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 def test_constructor_spindex_dtype_scalar_broadcasts(self): arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None) exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 @pytest.mark.parametrize( "data, fill_value", [ (np.array([1, 2]), 0), (np.array([1.0, 2.0]), np.nan), ([True, False], False), ([pd.Timestamp("2017-01-01")], pd.NaT), ], ) def test_constructor_inferred_fill_value(self, data, fill_value): result = SparseArray(data).fill_value if pd.isna(fill_value): assert pd.isna(result) else: assert result == fill_value @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) @pytest.mark.parametrize("size", [0, 10]) @td.skip_if_no_scipy def test_from_spmatrix(self, size, format): import scipy.sparse mat = scipy.sparse.random(size, 1, density=0.5, format=format) result = SparseArray.from_spmatrix(mat) result = np.asarray(result) expected = mat.toarray().ravel() tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) @td.skip_if_no_scipy def test_from_spmatrix_including_explicit_zero(self, format): import scipy.sparse mat = scipy.sparse.random(10, 1, density=0.5, format=format) mat.data[0] = 0 result = SparseArray.from_spmatrix(mat) result = np.asarray(result) expected = mat.toarray().ravel() tm.assert_numpy_array_equal(result, expected) @td.skip_if_no_scipy def test_from_spmatrix_raises(self): import scipy.sparse mat = scipy.sparse.eye(5, 4, format="csc") with pytest.raises(ValueError, match="not '4'"): SparseArray.from_spmatrix(mat) @pytest.mark.parametrize( "scalar,dtype", [ (False, SparseDtype(bool, False)), (0.0, SparseDtype("float64", 0)), (1, SparseDtype("int64", 1)), ("z", SparseDtype("object", "z")), ], ) def test_scalar_with_index_infer_dtype(self, scalar, dtype): # GH 19163 arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) tm.assert_sp_array_equal(arr, exp) assert arr.dtype == dtype assert exp.dtype == dtype def test_get_item(self): assert np.isnan(self.arr[1]) assert self.arr[2] == 1 assert self.arr[7] == 5 assert self.zarr[0] == 0 assert self.zarr[2] == 1 assert self.zarr[7] == 5 errmsg = re.compile("bounds") with pytest.raises(IndexError, match=errmsg): self.arr[11] with pytest.raises(IndexError, match=errmsg): self.arr[-11] assert self.arr[-1] == self.arr[len(self.arr) - 1] def test_take_scalar_raises(self): msg = "'indices' must be an array, not a scalar '2'." with pytest.raises(ValueError, match=msg): self.arr.take(2) def test_take(self): exp = SparseArray(np.take(self.arr_data, [2, 3])) tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) exp = SparseArray(np.take(self.arr_data, [0, 1, 2])) tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp) def test_take_all_empty(self): a = pd.array([0, 0], dtype=SparseDtype("int64")) result = a.take([0, 1], allow_fill=True, fill_value=np.nan) tm.assert_sp_array_equal(a, result) def test_take_fill_value(self): data = np.array([1, np.nan, 0, 3, 0]) sparse = SparseArray(data, fill_value=0) exp = SparseArray(np.take(data, [0]), fill_value=0) tm.assert_sp_array_equal(sparse.take([0]), exp) exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp) def test_take_negative(self): exp = SparseArray(np.take(self.arr_data, [-1])) tm.assert_sp_array_equal(self.arr.take([-1]), exp) exp = SparseArray(np.take(self.arr_data, [-4, -3, -2])) tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) @pytest.mark.parametrize("fill_value", [0, None, np.nan]) def test_shift_fill_value(self, fill_value): # GH #24128 sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0) res = sparse.shift(1, fill_value=fill_value) if isna(fill_value): fill_value = res.dtype.na_value exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0) tm.assert_sp_array_equal(res, exp) def test_bad_take(self): with pytest.raises(IndexError, match="bounds"): self.arr.take([11]) def test_take_filling(self): # similar tests as GH 12631 sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([np.nan, np.nan, 4]) tm.assert_sp_array_equal(result, expected) # XXX: test change: fill_value=True -> allow_fill=True result = sparse.take(np.array([1, 0, -1]), allow_fill=True) expected = SparseArray([np.nan, np.nan, np.nan]) tm.assert_sp_array_equal(result, expected) # allow_fill=False result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = SparseArray([np.nan, np.nan, 4]) tm.assert_sp_array_equal(result, expected) msg = "Invalid value in 'indices'" with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) msg = "out of bounds value in 'indices'" with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, -6])) with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, 5])) with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, 5]), allow_fill=True) def test_take_filling_fill_value(self): # same tests as GH 12631 sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) # fill_value result = sparse.take(np.array([1, 0, -1]), allow_fill=True) # XXX: behavior change. # the old way of filling self.fill_value doesn't follow EA rules. # It's supposed to be self.dtype.na_value (nan in this case) expected = SparseArray([0, np.nan, np.nan], fill_value=0) tm.assert_sp_array_equal(result, expected) # allow_fill=False result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) msg = "Invalid value in 'indices'." with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) msg = "out of bounds value in 'indices'" with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, -6])) with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, 5])) with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, 5]), fill_value=True) def test_take_filling_all_nan(self): sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan]) # XXX: did the default kind from take change? result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([np.nan, np.nan, np.nan], kind="block") tm.assert_sp_array_equal(result, expected) result = sparse.take(np.array([1, 0, -1]), fill_value=True) expected = SparseArray([np.nan, np.nan, np.nan], kind="block") tm.assert_sp_array_equal(result, expected) msg = "out of bounds value in 'indices'" with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, -6])) with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, 5])) with pytest.raises(IndexError, match=msg): sparse.take(np.array([1, 5]), fill_value=True) def test_set_item(self): def setitem(): self.arr[5] = 3 def setslice(): self.arr[1:5] = 2 with pytest.raises(TypeError, match="assignment via setitem"): setitem() with pytest.raises(TypeError, match="assignment via setitem"): setslice() def test_constructor_from_too_large_array(self): with pytest.raises(TypeError, match="expected dimension <= 1 data"): SparseArray(np.arange(10).reshape((2, 5))) def test_constructor_from_sparse(self): res = SparseArray(self.zarr) assert res.fill_value == 0 tm.assert_almost_equal(res.sp_values, self.zarr.sp_values) def test_constructor_copy(self): cp = SparseArray(self.arr, copy=True) cp.sp_values[:3] = 0 assert not (self.arr.sp_values[:3] == 0).any() not_copy = SparseArray(self.arr) not_copy.sp_values[:3] = 0 assert (self.arr.sp_values[:3] == 0).all() def test_constructor_bool(self): # GH 10648 data = np.array([False, False, True, True, False, False]) arr = SparseArray(data, fill_value=False, dtype=bool) assert arr.dtype == SparseDtype(bool) tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) # Behavior change: np.asarray densifies. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) dense = arr.to_dense() assert dense.dtype == bool tm.assert_numpy_array_equal(dense, data) def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=None) assert arr.dtype == SparseDtype(np.bool_) assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool_) assert arr.dtype == SparseDtype(np.bool_) assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True) assert arr.dtype == SparseDtype(np.bool_, True) assert arr.fill_value def test_constructor_float32(self): # GH 10648 data = np.array([1.0, np.nan, 3], dtype=np.float32) arr = SparseArray(data, dtype=np.float32) assert arr.dtype == SparseDtype(np.float32) tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) # Behavior change: np.asarray densifies. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2], dtype=np.int32)) dense = arr.to_dense() assert dense.dtype == np.float32 tm.assert_numpy_array_equal(dense, data) def test_astype(self): # float -> float arr = SparseArray([None, None, 0, 2]) result = arr.astype("Sparse[float32]") expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32")) tm.assert_sp_array_equal(result, expected) dtype = SparseDtype("float64", fill_value=0) result = arr.astype(dtype) expected = SparseArray._simple_new( np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype) tm.assert_sp_array_equal(result, expected) dtype = SparseDtype("int64", 0) result = arr.astype(dtype) expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype) tm.assert_sp_array_equal(result, expected) arr = SparseArray([0, np.nan, 0, 1], fill_value=0) with pytest.raises(ValueError, match="NA"): arr.astype("Sparse[i8]") def test_astype_bool(self): a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) result = a.astype(bool) expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0)) tm.assert_sp_array_equal(result, expected) # update fill value result = a.astype(SparseDtype(bool, False)) expected = SparseArray([True, False, False, True], dtype=SparseDtype(bool, False)) tm.assert_sp_array_equal(result, expected) def test_astype_all(self, any_real_dtype): vals = np.array([1, 2, 3]) arr = SparseArray(vals, fill_value=1) typ = np.dtype(any_real_dtype) res = arr.astype(typ) assert res.dtype == SparseDtype(typ, 1) assert res.sp_values.dtype == typ tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) @pytest.mark.parametrize( "array, dtype, expected", [ ( SparseArray([0, 1]), "float", SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)), ), (SparseArray([0, 1]), bool, SparseArray([False, True])), ( SparseArray([0, 1], fill_value=1), bool, SparseArray([False, True], dtype=SparseDtype(bool, True)), ), pytest.param( SparseArray([0, 1]), "datetime64[ns]", SparseArray( np.array([0, 1], dtype="datetime64[ns]"), dtype=SparseDtype("datetime64[ns]", pd.Timestamp("1970")), ), marks=[pytest.mark.xfail(reason="NumPy-7619")], ), ( SparseArray([0, 1, 10]), str, SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")), ), (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])), ( SparseArray([0, 1, 0]), object, SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)), ), ], ) def test_astype_more(self, array, dtype, expected): result = array.astype(dtype) tm.assert_sp_array_equal(result, expected) def test_astype_nan_raises(self): arr = SparseArray([1.0, np.nan]) with pytest.raises(ValueError, match="Cannot convert non-finite"): arr.astype(int) def test_set_fill_value(self): arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan) arr.fill_value = 2 assert arr.fill_value == 2 arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) arr.fill_value = 2 assert arr.fill_value == 2 # XXX: this seems fine? You can construct an integer # sparsearray with NaN fill value, why not update one? # coerces to int # msg = "unable to set fill_value 3\\.1 to int64 dtype" # with pytest.raises(ValueError, match=msg): arr.fill_value = 3.1 assert arr.fill_value == 3.1 # msg = "unable to set fill_value nan to int64 dtype" # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) arr.fill_value = True assert arr.fill_value # coerces to bool # msg = "unable to set fill_value 0 to bool dtype" # with pytest.raises(ValueError, match=msg): arr.fill_value = 0 assert arr.fill_value == 0 # msg = "unable to set fill_value nan to bool dtype" # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) def test_set_fill_invalid_non_scalar(self, val): arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) msg = "fill_value must be a scalar" with pytest.raises(ValueError, match=msg): arr.fill_value = val def test_copy(self): arr2 = self.arr.copy() assert arr2.sp_values is not self.arr.sp_values assert arr2.sp_index is self.arr.sp_index def test_values_asarray(self): tm.assert_almost_equal(self.arr.to_dense(), self.arr_data) @pytest.mark.parametrize( "data,shape,dtype", [ ([0, 0, 0, 0, 0], (5, ), None), ([], (0, ), None), ([0], (1, ), None), (["A", "A", np.nan, "B"], (4, ), object), ], ) def test_shape(self, data, shape, dtype): # GH 21126 out = SparseArray(data, dtype=dtype) assert out.shape == shape @pytest.mark.parametrize( "vals", [ [np.nan, np.nan, np.nan, np.nan, np.nan], [1, np.nan, np.nan, 3, np.nan], [1, np.nan, 0, 3, 0], ], ) @pytest.mark.parametrize("fill_value", [None, 0]) def test_dense_repr(self, vals, fill_value): vals = np.array(vals) arr = SparseArray(vals, fill_value=fill_value) res = arr.to_dense() tm.assert_numpy_array_equal(res, vals) res2 = arr._internal_get_values() tm.assert_numpy_array_equal(res2, vals) def test_getitem(self): def _checkit(i): tm.assert_almost_equal(self.arr[i], self.arr.to_dense()[i]) for i in range(len(self.arr)): _checkit(i) _checkit(-i) def test_getitem_arraylike_mask(self): arr = SparseArray([0, 1, 2]) result = arr[[True, False, True]] expected = SparseArray([0, 2]) tm.assert_sp_array_equal(result, expected) def test_getslice(self): result = self.arr[:-3] exp = SparseArray(self.arr.to_dense()[:-3]) tm.assert_sp_array_equal(result, exp) result = self.arr[-4:] exp = SparseArray(self.arr.to_dense()[-4:]) tm.assert_sp_array_equal(result, exp) # two corner cases from Series result = self.arr[-12:] exp = SparseArray(self.arr) tm.assert_sp_array_equal(result, exp) result = self.arr[:-12] exp = SparseArray(self.arr.to_dense()[:0]) tm.assert_sp_array_equal(result, exp) def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) res = sparse[(slice(4, None), )] exp = SparseArray(dense[4:]) tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) res = sparse[(slice(4, None), )] exp = SparseArray(dense[4:], fill_value=0) tm.assert_sp_array_equal(res, exp) msg = "too many indices for array" with pytest.raises(IndexError, match=msg): sparse[4:, :] with pytest.raises(IndexError, match=msg): # check numpy compat dense[4:, :] def test_boolean_slice_empty(self): arr = SparseArray([0, 1, 2]) res = arr[[False, False, False]] assert res.dtype == arr.dtype @pytest.mark.parametrize( "op", ["add", "sub", "mul", "truediv", "floordiv", "pow"]) def test_binary_operators(self, op): op = getattr(operator, op) data1 = np.random.randn(20) data2 = np.random.randn(20) data1[::2] = np.nan data2[::3] = np.nan arr1 = SparseArray(data1) arr2 = SparseArray(data2) data1[::2] = 3 data2[::3] = 3 farr1 = SparseArray(data1, fill_value=3) farr2 = SparseArray(data2, fill_value=3) def _check_op(op, first, second): res = op(first, second) exp = SparseArray(op(first.to_dense(), second.to_dense()), fill_value=first.fill_value) assert isinstance(res, SparseArray) tm.assert_almost_equal(res.to_dense(), exp.to_dense()) res2 = op(first, second.to_dense()) assert isinstance(res2, SparseArray) tm.assert_sp_array_equal(res, res2) res3 = op(first.to_dense(), second) assert isinstance(res3, SparseArray) tm.assert_sp_array_equal(res, res3) res4 = op(first, 4) assert isinstance(res4, SparseArray) # Ignore this if the actual op raises (e.g. pow). try: exp = op(first.to_dense(), 4) exp_fv = op(first.fill_value, 4) except ValueError: pass else: tm.assert_almost_equal(res4.fill_value, exp_fv) tm.assert_almost_equal(res4.to_dense(), exp) with np.errstate(all="ignore"): for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]: _check_op(op, first_arr, second_arr) def test_pickle(self): def _check_roundtrip(obj): unpickled = tm.round_trip_pickle(obj) tm.assert_sp_array_equal(unpickled, obj) _check_roundtrip(self.arr) _check_roundtrip(self.zarr) def test_generator_warnings(self): sp_arr = SparseArray([1, 2, 3]) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings(action="always", category=DeprecationWarning) warnings.filterwarnings(action="always", category=PendingDeprecationWarning) for _ in sp_arr: pass assert len(w) == 0 def test_fillna(self): s = SparseArray([1, np.nan, np.nan, 3, np.nan]) res = s.fillna(-1) exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) res = s.fillna(-1) exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([1, np.nan, 0, 3, 0]) res = s.fillna(-1) exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) res = s.fillna(-1) exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([np.nan, np.nan, np.nan, np.nan]) res = s.fillna(-1) exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) tm.assert_sp_array_equal(res, exp) s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) res = s.fillna(-1) exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) # float dtype's fill_value is np.nan, replaced by -1 s = SparseArray([0.0, 0.0, 0.0, 0.0]) res = s.fillna(-1) exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1) tm.assert_sp_array_equal(res, exp) # int dtype shouldn't have missing. No changes. s = SparseArray([0, 0, 0, 0]) assert s.dtype == SparseDtype(np.int64) assert s.fill_value == 0 res = s.fillna(-1) tm.assert_sp_array_equal(res, s) s = SparseArray([0, 0, 0, 0], fill_value=0) assert s.dtype == SparseDtype(np.int64) assert s.fill_value == 0 res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=0) tm.assert_sp_array_equal(res, exp) # fill_value can be nan if there is no missing hole. # only fill_value will be changed s = SparseArray([0, 0, 0, 0], fill_value=np.nan) assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) assert np.isnan(s.fill_value) res = s.fillna(-1) exp = SparseArray([0, 0, 0, 0], fill_value=-1) tm.assert_sp_array_equal(res, exp) def test_fillna_overlap(self): s = SparseArray([1, np.nan, np.nan, 3, np.nan]) # filling with existing value doesn't replace existing value with # fill_value, i.e. existing 3 remains in sp_values res = s.fillna(3) exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) tm.assert_numpy_array_equal(res.to_dense(), exp) s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) res = s.fillna(3) exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) def test_nonzero(self): # Tests regression #21172. sa = SparseArray( [float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) (result, ) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) (result, ) = sa.nonzero() tm.assert_numpy_array_equal(expected, result)
#[ttmax,vvmin]=d_c2.min() #aa=d_c2.loc[d_c2.y==vvmax, ["t"]] #bb=d_c2.loc[d_c2.y==vvmin, ["t"]] plt.axvspan(*mdates.datestr2num( ['2020-01-08 19:29:10.632', '2020-01-09 17:21:56.263']), color='red', alpha=0.5) plt.axvspan(*mdates.datestr2num( ['2020-01-09 17:21:56.263', '2020-01-10 13:04:06.263']), color='green', alpha=0.5) plt.axvspan(*mdates.datestr2num( ['2020-01-10 13:04:06.263', '2020-01-11 09:08:03.572']), color='gray', alpha=0.5) plt.text(pd.Timestamp('2020-01-09 02:00:00'), 2.9, 'Cycle 1', horizontalalignment='left', size='large', color='white') plt.text(pd.Timestamp('2020-01-10 00:00:00'), 2.9, 'Cycle 2', horizontalalignment='left', size='large', color='white') plt.text(pd.Timestamp('2020-01-10 19:00:00'), 2.9, 'Cycle 3', horizontalalignment='left',
def _create_schema(index="single"): if index == "multi": index = pa.MultiIndex([ pa.Index(pa.Int, name="int_index0"), pa.Index(pa.Int, name="int_index1"), pa.Index(pa.Int, name="int_index2"), ]) elif index == "single": # make sure io modules can handle case when index name is None index = pa.Index(pa.Int, name=None) else: index = None return pa.DataFrameSchema(columns={ "int_column": pa.Column( pa.Int, checks=[ pa.Check.greater_than(0), pa.Check.less_than(10), pa.Check.in_range(0, 10), ], ), "float_column": pa.Column( pa.Float, checks=[ pa.Check.greater_than(-10), pa.Check.less_than(20), pa.Check.in_range(-10, 20), ], ), "str_column": pa.Column( pa.String, checks=[ pa.Check.isin(["foo", "bar", "x", "xy"]), pa.Check.str_length(1, 3) ], ), "datetime_column": pa.Column(pa.DateTime, checks=[ pa.Check.greater_than(pd.Timestamp("20100101")), pa.Check.less_than(pd.Timestamp("20200101")), ]), "timedelta_column": pa.Column(pa.Timedelta, checks=[ pa.Check.greater_than(pd.Timedelta(1000, unit="ns")), pa.Check.less_than(pd.Timedelta(10000, unit="ns")), ]), "optional_props_column": pa.Column( pa.String, nullable=True, allow_duplicates=True, coerce=True, required=False, regex=True, checks=[pa.Check.str_length(1, 3)], ), }, index=index, coerce=False, strict=True)
alpaca = tradeapi.REST(alpaca_key, alpaca_secret) # In[179]: # Format current date as ISO format start = date.today().isoformat() end = date.today().isoformat() # Set the tickers tickers = ["SPY", "AGG"] # Set timeframe to '1D' for Alpaca API timeframe = "1D" # Get current closing prices for SPY and AGG close = pd.Timestamp(today, tz="US/Central").isoformat() agg_spy = alpaca.get_barset(tickers, timeframe, start=start, end=end).df # Preview DataFrame agg_spy.tail() # In[96]: # Pick AGG and SPY close prices agg_close_price = agg_spy.iloc[0, 3] spy_close_price = agg_spy.iloc[0, 8] # Print AGG and SPY close prices print(f"Current AGG closing price: ${agg_close_price}") print(f"Current SPY closing price: ${spy_close_price}") # In[104]:
import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import date, timedelta df = pd.read_csv( 'https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv', parse_dates=['Date']) # active cases df['Active'] = df['Confirmed'] - df['Recovered'] - df['Deaths'] yesterday = date.today() - timedelta(days=2) today_df = df[df['Date'] == pd.Timestamp(yesterday)] top_10 = today_df.sort_values(['Confirmed'], ascending=False)[:10] top_10.loc['rest-of-world'] = today_df.sort_values(['Confirmed'], ascending=False)[10:].sum() top_10.loc['rest-of-world', 'Country'] = 'Rest of World' # width of the donut size = 0.3 # values for chart vals1 = [] for i in range(len(top_10)): # [['Recovered'], ['Active'], ['Deaths']] val = [top_10.iloc[i][5], top_10.iloc[i][3] + top_10.iloc[i][4]] vals1.append(val) vals1 = np.array(vals1) cmap = plt.get_cmap('plasma') num_colors = len(top_10) * 3 theme_colors = [cmap(1. * i / num_colors) for i in range(num_colors)]