def get_date_ranges(self, start, end, scale='daily', include_bounds=True): ''' Returns a list of dates sampled according to the specified parameters. Parameters ---------- start: str First date that will be included. end: str Last date that will be included scale: {'daily', 'weekly', 'monthly', 'quarterly', 'yearly'} Scale specifies the sampling intervals. include_bounds: boolean Include start and end in the result if they are not included yet. ''' if scale not in ['daily', 'weekly', 'monthly', 'quarterly', 'yearly']: raise ValueError('Incorrect scale: %s' % scale) start = Timestamp(start) end = Timestamp(end) freq = dict(weekly='W', monthly='M', quarterly='3M', yearly='12M') offset = dict(weekly=off.Week(), monthly=off.MonthEnd(), quarterly=off.QuarterEnd(), yearly=off.YearEnd()) if scale == 'daily': ret = pd.date_range(start, end, freq='D') else: ret = pd.date_range(start + offset[scale], end, freq=freq[scale]) ret = list(ret) if include_bounds: if start not in ret: ret = [start] + ret if end not in ret: ret = ret + [end] return ret
def extract_data(self, path, condition, cols = None, ts_col = None, \ from_date = None, to_date = None): """ Extract data from a compound datatype path path to the data condition condition to search for cols columns to return ts_col column containing datetime from_date """ data = self._f.get_node(path) cond = self._build_condition(condition = condition) if ts_col is not None: if (from_date is None) or (to_date is None): raise ValueError("Dates not matching") f,t = Timestamp(from_date).value, Timestamp(to_date).value cond = cond + "&" + self._build_condition({ts_col: (">=", f)}) cond = cond + "&" + self._build_condition({ts_col: ("<=", t)}) if cols is None: cols = data.colnames df = [[x[col] for col in cols] for x in data.where(cond)] return DataFrame(df, columns = cols)
def get_dates_range(self, scale='auto', start=None, end=None, date_max='2010-01-01'): ''' Returns a list of dates sampled according to the specified parameters. :param scale: {'auto', 'maximum', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly'} Scale specifies the sampling intervals. 'auto' will heuristically choose a scale for quick processing :param start: First date that will be included. :param end: Last date that will be included ''' if scale not in [ 'auto', 'maximum', 'daily', 'weekly', 'monthly', 'quarterly', 'yearly' ]: raise ValueError('Incorrect scale: %s' % scale) start = Timestamp(start or self._start.min() or date_max) # FIXME: start != start is true for NaN objects... is NaT the same? start = Timestamp(date_max) if repr(start) == 'NaT' else start end = Timestamp(end or max(Timestamp(self._end.max()), self._start.max())) # FIXME: end != end ? end = datetime.utcnow() if repr(end) == 'NaT' else end start = start if self.check_in_bounds(start) else self._lbound end = end if self.check_in_bounds(end) else self._rbound if scale == 'auto': scale = self._auto_select_scale(start, end) if scale == 'maximum': start_dts = list(self._start.dropna().values) end_dts = list(self._end.dropna().values) dts = map(Timestamp, set(start_dts + end_dts)) dts = filter( lambda ts: self.check_in_bounds(ts) and ts >= start and ts <= end, dts) return dts freq = dict(daily='D', weekly='W', monthly='M', quarterly='3M', yearly='12M') offset = dict(daily=off.Day(n=0), weekly=off.Week(), monthly=off.MonthEnd(), quarterly=off.QuarterEnd(), yearly=off.YearEnd()) # for some reason, weekly date range gives one week less: end_ = end + off.Week() if scale == 'weekly' else end ret = list(pd.date_range(start + offset[scale], end_, freq=freq[scale])) ret = [dt for dt in ret if dt <= end] ret = [start] + ret if ret and start < ret[0] else ret ret = ret + [end] if ret and end > ret[-1] else ret ret = filter(lambda ts: self.check_in_bounds(ts), ret) return ret
def orbit_delta_list(start_time, end_time): orbits = [] s_ts = Timestamp(start_time) e_ts = Timestamp(end_time) delta = e_ts - s_ts for i in range(delta.days + 1): orbits = orbits + [str(s_ts + timedelta(i))] return orbits
def test_round_nearest_hour(): ts1 = Timestamp("2015-12-04 10:15:00") ts2 = Timestamp("2015-12-04 10:31:00") ts3 = Timestamp("2015-12-31 23:35:00") ts4 = Timestamp("2016-1-1 00:05:00") assert round_to_nearest_hour(ts1) == datetime(2015, 12, 4, 10, 0) assert round_to_nearest_hour(ts2) == datetime(2015, 12, 4, 11, 0) assert round_to_nearest_hour(ts3) == datetime(2016, 1, 1, 0, 0) assert round_to_nearest_hour(ts4) == datetime(2016, 1, 1, 0, 0)
def main(job_id, D): print "job_id", job_id, " params:", D equities1 = {} register( 'my-db-bundle', # name this whatever you like viadb(equities1), calendar='SHSZ') parsed = {} parsed['initialize'] = None parsed['handle_data'] = None parsed['before_trading_start'] = None parsed['analyze'] = None parsed['algotext'] = None parsed['defines'] = () parsed['capital_base'] = 1000000 parsed['data'] = None parsed['bundle'] = 'my-db-bundle' #parsed['bundle']='YAHOO' #parsed['bundle_timestamp']=None parsed['bundle_timestamp'] = pd.Timestamp.utcnow() parsed['start'] = Timestamp('2017-03-01 13:30:00+0000', tz='UTC') parsed['end'] = Timestamp('2017-06-01 13:30:00+0000', tz='UTC') parsed['algofile'] = open( '/data/kanghua/workshop/strategy/campaign/hyperparam/example-new/zipline_strategy.py' ) parsed['data_frequency'] = 'daily' parsed['print_algo'] = False parsed['output'] = 'os.devnull' parsed['local_namespace'] = None parsed['environ'] = os.environ parsed['bm_symbol'] = None # Below what we expect spearmint to pass us # parsed['algo_params']=[47,88.7,7.7] # D={} # D['timeperiod']=10 # D['nbdevup']=1.00 # D['nbdevdn']=1.00 parsed['algo_params'] = D perf = _run(**parsed) StartV = perf['portfolio_value'][0] EndV = perf['portfolio_value'][-1] # spearmint wants to minimize so return negative profit OPTIM = (StartV - EndV) return OPTIM
def adhoc_holidays(self): return list( chain( USNationalDaysofMourning, # ICE was only closed on the first day of the Hurricane Sandy # closings (was not closed on 2012-10-30) [Timestamp('2012-10-29', tz='UTC')]))
def double_exponential_smoothing(series, alpha, beta, number_of_prediction): time_list = series.index.tolist() print len(time_list) length = len(time_list) to_date = str(series.index.tolist()[length - 1]) from_date = str(series.index.tolist()[length - 2]) datetime_object1 = datetime.strptime(from_date, '%Y-%m-%d %H:%M:%S') datetime_object2 = datetime.strptime(to_date, '%Y-%m-%d %H:%M:%S') diff = (datetime_object2 - datetime_object1).total_seconds() s_len = number_of_prediction for n in range(0, s_len): datetime_object3 = datetime.strptime( str(time_list[len(time_list) - 1]), '%Y-%m-%d %H:%M:%S') time_list.append( Timestamp(datetime_object3 + timedelta(seconds=int(diff)))) print len(time_list) result = [series[0]] for n in range(1, len(series) + s_len): if n == 1: level, trend = series[0], series[1] - series[0] if n >= len(series): # we are forecasting value = result[-1] else: value = series[n] last_level, level = level, alpha * value + (1 - alpha) * (level + trend) trend = beta * (level - last_level) + (1 - beta) * trend result.append(level + trend) ser = pd.Series(result, index=time_list) return ser
def get_zipline_hist(symbol, field, end_dt, bar_count=1, frequency='1d', data_frequency='daily', bundle=None, calendar=None, dp=None): ''' Gets daily historical price data for `symbol` from a `zipline` data bundle. :returns: `field` data going back `bar_count` from `end_dt` for `symbol` :rtype: `pandas.Series` with `name` attribute set to `zipline.assets._assets.Equity` :param symbol: the ticker symbol of the instrument :param field: the desired OHLC field :param end_dt: the ending datetime of the series :param bar_count: the number of points in the timeseries :param frequency: the frequency of the timeseries (e.g. "1d" or "1m") :param bundle: optionally specify the `zipline` data bundle to use :param calendar: optionally specify the `zipline` calendar to use :type symbol: `str` :type field: `str` :type end_dt: `datetime.datetime` type object :type bar_count: `int` :type frequency: `str` :type bundle: `zipline.data.bundles.core.BundleData` :type calendar: `zipline.utils.calendars.exchange_calendar_nyse` type Snap a date to the calender with:: get_calendar(cal).all_sessions.asof(Timestamp(end_dt)) Get the last traded datetime for a symbol and calendar dt with:: dp.get_last_traded_dt( dp.asset_finder.lookup_symbol(symbol, None), get_calendar(cal).all_sessions.asof(Timestamp(end_dt)), 'daily', ) Get the session index with:: get_calendar(cal).all_sessions.searchsorted(Timestamp(end_dt)) ''' dp = get_zipline_dp(bundle, calendar) if dp is None else dp if type(symbol) is not Equity: symbol = dp.asset_finder.lookup_symbol(symbol, None) return dp.get_history_window( [symbol], Timestamp(end_dt), bar_count, frequency, field, data_frequency, ).iloc[:, 0]
def check_in_bounds(self, date): '''Check that left and right bounds are sane :param date: date to validate left/right bounds for ''' dt = Timestamp(date) return ((self._lbound is None or dt >= self._lbound) and (self._rbound is None or dt <= self._rbound))
def started_after(self, date): ''' Leaves only those objects whose first version started after the specified date. :param date: date string to use in calculation ''' dt = Timestamp(date) starts = self.groupby(self._oid).apply(lambda df: df._start.min()) oids = set(starts[starts > dt].index.tolist()) return self[self._oid.apply(lambda v: v in oids)]
def test_ephemeris_log(): """Test ephemeris data output by hdf5 tool. Will currently fail because we're not checking for approx. precision test. """ log_datafile \ = "./data/serial_link_log_20150314-190228_dl_sat_fail_test1.log.json.dat" filename = log_datafile + ".hdf5" processor = StoreToHDF5() with JSONLogIterator(log_datafile) as log: for delta, timestamp, msg in log.next(): processor.process_message(delta, timestamp, msg) processor.save(filename) assert os.path.isfile(filename) with pd.HDFStore(filename) as store: assert store.ephemerides[:, :, 27].to_dict() \ == {Timestamp('2015-03-15 03:59:44'): {'c_rs': nan, 'toe_wn': nan, 'prn': nan, 'inc_dot': nan, 'tgd': nan, 'c_rc': nan, 'toc_wn': nan, 'sqrta': nan, 'omegadot': nan, 'inc': nan, 'toe_tow': nan, 'c_uc': nan, 'c_us': nan, 'valid': nan, 'm0': nan, 'toc_tow': nan, 'dn': nan, 'ecc': nan, 'c_ic': nan, 'c_is': nan, 'healthy': nan, 'af1': nan, 'w': nan, 'af0': nan, 'omega0': nan, 'af2': nan}, Timestamp('2015-03-15 04:00:00'): {'c_rs': 15.96875, 'toe_wn': 1836.0, 'prn': 27.0, 'inc_dot': 2.7322566666000417e-10, 'tgd': -1.1175870895385742e-08, 'c_rc': 320.96875, 'toc_wn': 1836.0, 'sqrta': 5153.6934394836426, 'omegadot': -7.7553230403337661e-09, 'inc': 0.98869366123094204, 'toe_tow': 14400.0, 'c_uc': 9.2200934886932373e-07, 'c_us': 3.468245267868042e-06, 'valid': 1.0, 'm0': -2.3437882587715801, 'toc_tow': 14400.0, 'dn': 4.0358823964157481e-09, 'ecc': 0.019611002877354622, 'c_ic': 2.4586915969848633e-07, 'c_is': 1.4528632164001465e-07, 'healthy': 1.0, 'af1': 2.6147972675971687e-12, 'w': -1.6667971409741453, 'af0': 0.00042601628229022026, 'omega0': -2.7040169769321869, 'af2': 0.0}}
def lazy_timestamps(): start = BAR_RANGE[0] end = BAR_RANGE[1] exchange_opens = datetime.time(hour=13, minute=30) # UTC exchange_closes = datetime.time(hour=20, minute=0) # UTC step = STEP running_timestamp = start while running_timestamp <= end: yield running_timestamp if exchange_opens <= running_timestamp.time() <= exchange_closes: running_timestamp += step elif running_timestamp.time() < exchange_opens: d = running_timestamp.date() z = running_timestamp.tz running_timestamp = Timestamp(d, exchange_opens, z) elif running_timestamp.time() > exchange_closes: d = running_timestamp.date() z = running_timestamp.tz running_timestamp = datetime.datetime.combine( d + datetime.timedelta(days=1), exchange_opens) running_timestamp = running_timestamp.replace(tzinfo=pytz.UTC) running_timestamp = Timestamp(running_timestamp)
def __init__(self, initial_amount=0, TS=datetime.datetime.now(), **kwargs): """ Initialize the cash register with an amount kwargs ===== TS : TimeStamp for the data """ self._cash = [] self._columns = ['A', 'TS', 'I'] self._cash.append({ "A": initial_amount, "TS": Timestamp(TS), "I": "Opening Balance" })
def test_interpolate_gps_time(): filename = "data/serial-link-20150429-163230.log.json.hdf5" assert os.path.isfile(filename) with pd.HDFStore(filename) as store: idx = store.rover_spp.T.host_offset.reset_index() model = t.interpolate_gpst_model(idx) assert isinstance(model, pd.stats.ols.OLS) assert np.allclose([model.beta.x, model.beta.intercept], [1.00000368376, -64.2579561376]) init_offset = store.rover_spp.T.host_offset[0] init_date = store.rover_spp.T.index[0] f = lambda t1: t.apply_gps_time(t1 * t.MSEC_TO_SEC, init_date, model) dates = store.rover_logs.T.host_offset.apply(f) l = dates.tolist() start, end = l[0], l[-1] assert start == Timestamp("2015-04-29 23:32:55.272075") assert end == Timestamp("2015-04-29 23:57:46.457568") init_secs_offset \ = store.rover_spp.T.host_offset[0] - store.rover_logs.T.index[0] assert np.allclose([init_secs_offset * t.MSEC_TO_SEC], [55.859]) assert (init_date - start) == Timedelta('0 days 00:00:55.848925') assert (end - init_date) == Timedelta('0 days 00:23:55.336568') assert pd.DatetimeIndex(dates).is_monotonic_increasing assert dates.shape == (2457, )
def on_date(self, date, only_count=False): ''' Filters out only the rows that match the spectified date. Works only on a Result that has _start and _end columns. :param date: date can be anything Pandas.Timestamp supports parsing :param only_count: return back only the match count ''' if not self.check_in_bounds(date): raise ValueError('Date %s is not in the queried range.' % date) date = Timestamp(date) after_start = self._start <= date before_end = (self._end > date) | self._end_isnull if only_count: return np.sum(before_end & after_start) else: return self.filter(before_end & after_start)
def add(self, A, TS=datetime.datetime.now(), I="Cash added", **kwargs): """ Add cash to this register A: Amount of cash TS: Date or time in specified format **kwargs ======== You could add any number of keyword arguments. Each argument is added as a separate column. """ D = {'A': abs(A), 'TS': Timestamp(TS), 'I': I} self._cash.append(D) return self.balance
def on_date(self, date, only_count=False): ''' Filters out only the rows that match the spectified date. Works only on a Result that has _start and _end columns. Parameters ---------- date : str ''' if not self.check_in_bounds(date): raise ValueError('Date %s is not in the queried range.' % date) date = Timestamp(date) after_start = self._start <= date before_end = (self._end > date) | self._end.isnull() if only_count: return np.sum(before_end & after_start) else: return self[before_end & after_start]
def test_astype(self): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(object) expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index([1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64) tm.assert_index_equal(result, expected) rng = date_range('1/1/2000', periods=10) result = rng.astype('i8') self.assert_index_equal(result, Index(rng.asi8)) self.assert_numpy_array_equal(result.values, rng.asi8)
def test_ticks(): assert len(get_ticks("NI1804.XSGE", end_dt="2018-03-16", count=100)) == 100 assert get_ticks("NI1804.XSGE", end_dt="2018-03-16", count=10, fields=[ "current", "volume", "position", "a1_v", "a1_p", "b1_v", "b1_p" ]).shape == (10, 7) assert len(get_ticks("000001.XSHE", end_dt="2018-03-16", count=10)) == 10 assert get_ticks( "SM1809.XZCE", '2018-07-06', '2018-07-07').iloc[3][0] == Timestamp('2018-07-06 09:00:01.500000') assert get_ticks("000001.XSHE", end_dt="2018-03-16", count=10, fields=[ "a1_v", "a2_v", "a3_v", "a4_v", "a5_v", "b1_v", "b2_v", "b3_v", "b4_v", "b5_v" ]).shape == (10, 10)
def triple_exponential_smoothing(series, slen, alpha, beta, gamma, n_preds): time_list = series.index.tolist() print len(time_list) length = len(time_list) to_date = str(series.index.tolist()[length - 1]) from_date = str(series.index.tolist()[length - 2]) datetime_object1 = datetime.strptime(from_date, '%Y-%m-%d %H:%M:%S') datetime_object2 = datetime.strptime(to_date, '%Y-%m-%d %H:%M:%S') diff = (datetime_object2 - datetime_object1).total_seconds() for n in range(0, n_preds): datetime_object3 = datetime.strptime( str(time_list[len(time_list) - 1]), '%Y-%m-%d %H:%M:%S') time_list.append( Timestamp(datetime_object3 + timedelta(seconds=int(diff)))) print len(time_list) result = [] seasonals = initial_seasonal_components(series, slen) for i in range(len(series) + n_preds): if i == 0: # initial values smooth = series[0] trend = initial_trend(series, slen) result.append(series[0]) continue if i >= len(series): # we are forecasting m = i - len(series) + 1 result.append((smooth + m * trend) + seasonals[i % slen]) else: val = series[i] last_smooth, smooth = smooth, alpha * ( val - seasonals[i % slen]) + (1 - alpha) * (smooth + trend) trend = beta * (smooth - last_smooth) + (1 - beta) * trend seasonals[i % slen] = gamma * (val - smooth) + ( 1 - gamma) * seasonals[i % slen] result.append(smooth + trend + seasonals[i % slen]) ser = pd.Series(result, index=time_list) return ser
def date(self, date): ''' Pass in the date used in the original query. Parameters ---------- date : str Date (date range) that was queried: date -> 'd', '~d', 'd~', 'd~d' d -> '%Y-%m-%d %H:%M:%S,%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d' ''' if date is not None: split = date.split('~') if len(split) == 1: self._lbound = Timestamp(date) self._rbound = Timestamp(date) elif split[0] == '': self._rbound = Timestamp(split[1]) elif split[1] == '': self._lbound = Timestamp(split[0]) else: self._lbound = Timestamp(split[0]) self._rbound = Timestamp(split[1])
def test_hdf5(): log_datafile \ = "./data/serial_link_log_20150314-190228_dl_sat_fail_test1.log.json.dat" filename = log_datafile + ".hdf5" processor = StoreToHDF5() hdf5_write(log_datafile, filename) assert os.path.isfile(filename) with pd.HDFStore(filename) as store: assert store assert isinstance(store.base_obs, pd.Panel) assert store.base_obs.shape == (91, 6, 5) assert store.base_obs[0, :, :].to_dict()\ == {10: {'L': 10374.46875, 'P': 22165269.800000001, 'cn0': 22.0, 'lock': 3381.0, 'host_offset': 58353.0, 'host_time': 1426385006.0}, 12: {'L': 64486.11328125, 'P': 20302353.989999998, 'cn0': 46.0, 'lock': 63239.0, 'host_offset': 58353.0, 'host_time': 1426385006.0}, 14: {'L': 120079.03125, 'P': 22980000.0, 'cn0': 18.0, 'lock': 64615.0, 'host_offset': 58353.0, 'host_time': 1426385006.0}, 16: {'L': 99532.98828125, 'P': 20753378.68, 'cn0': 48.0, 'lock': 31377.0, 'host_offset': 58353.0, 'host_time': 1426385006.0}, 27: {'L': -5162.3359375, 'P': 19956188.879999999, 'cn0': 18.0, 'lock': 1482.0, 'host_offset': 58353.0, 'host_time': 1426385006.0}} assert isinstance(store.ephemerides, pd.Panel) assert store.ephemerides.shape == (2, 29, 3) assert isinstance(store.rover_obs, pd.Panel) assert store.rover_obs.shape == (3, 6, 8) assert store.rover_obs[0, :, :].to_dict() \ == {0: {'L': 49613.0, 'P': 22980000.0, 'cn0': 14.0, 'lock': 10583.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 6: {'L': -154459.46484375, 'P': 21443899.43, 'cn0': 29.0, 'lock': 24867.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 10: {'L': -4688.3203125, 'P': 22089093.379999999, 'cn0': 14.0, 'lock': 24238.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 12: {'L': 82371.0859375, 'P': 20239084.489999998, 'cn0': 41.0, 'lock': 55100.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 14: {'L': 161655.17578125, 'P': 22931626.530000001, 'cn0': 21.0, 'lock': 6171.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 16: {'L': 165409.55078125, 'P': 20705699.739999998, 'cn0': 37.0, 'lock': 60164.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 27: {'L': -33727.6875, 'P': 19874306.629999999, 'cn0': 39.0, 'lock': 21822.0, 'host_offset': 0.0, 'host_time': 1426384948.0}, 29: {'L': -80021.1796875, 'P': 19864244.440000001, 'cn0': 50.0, 'lock': 42862.0, 'host_offset': 0.0, 'host_time': 1426384948.0}} assert isinstance(store.rover_rtk_ned, pd.DataFrame) assert store.rover_rtk_ned.shape == (0, 0) assert isinstance(store.rover_rtk_ecef, pd.DataFrame) assert store.rover_rtk_ecef.shape == (0, 0) assert isinstance(store.rover_spp, pd.DataFrame) assert store.rover_spp.shape == (9, 5) assert store.rover_spp.ix[['n_sats', 'x']].to_dict() \ == {Timestamp('2015-03-15 02:02:23.600000'): {'n_sats': 8.0, 'x': -2704372.4505344247}, Timestamp('2015-03-15 02:02:23.700000'): {'n_sats': 8.0, 'x': -2704372.2245847895}, Timestamp('2015-03-15 02:02:23.800000'): {'n_sats': 8.0, 'x': -2704372.5723111397}, Timestamp('2015-03-15 02:02:23.900000'): {'n_sats': 8.0, 'x': -2704371.0836650538}, Timestamp('2015-03-15 02:02:24'): {'n_sats': 8.0, 'x': -2704370.9813769697}} assert isinstance(store.rover_tracking, pd.Panel) assert store.rover_tracking.shape == (19, 5, 318) rd = store.rover_tracking[:, :, 0].to_dict() assert rd.keys() == [ 0, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 25, 27, 29 ] assert rd[0] \ == {'cn0': 3.7272727489471436, 'prn': 0.0, 'state': 1.0, 'host_offset': 0.0, 'host_time': 1426384948.0} assert rd[29] \ == {'cn0': 12.685534477233887, 'prn': 29.0, 'state': 1.0, 'host_offset': 0.0, 'host_time': 1426384948.0} assert isinstance(store.rover_iar_state, pd.DataFrame) store.rover_iar_state.shape == (0, 0)
def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex result = DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='Asia/Tokyo') ], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='US/Eastern'), Timestamp('2011-08-01 10:00', tz='US/Eastern') ], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([ Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # tz mismatch affecting to tz-aware raises TypeError/ValueError with tm.assertRaises(ValueError): DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): DatetimeIndex([ Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], tz='Asia/Tokyo', name='idx') with tm.assertRaises(ValueError): DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], tz='US/Eastern', name='idx') with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], tz='Asia/Tokyo', name='idx')
def check_in_bounds(self, date): dt = Timestamp(date) return ((self._lbound is None or dt >= self._lbound) and (self._rbound is None or dt <= self._rbound))
] df_newslist = pd.DataFrame(columns=columns) for _, list_news in list_cluster: dict_onepiece = {} dict_onepiece['source_id'] = df_sql.iloc[list_news[0]]['id'] dict_onepiece['title'] = df_sql.iloc[list_news[0]]['title'] dict_onepiece['summary'] = df_sql.iloc[list_news[0]]['summary'] dict_onepiece['fetch_at'] = pd.to_datetime( df_sql.iloc[list_news[0]]['createDate'], 'coerce') dict_onepiece['publish_at'] = pd.to_datetime( df_sql.iloc[list_news[0]]['publishDate'], 'coerce') dict_onepiece['source'] = df_sql.iloc[list_news[0]]['source'] list_sources = [dict_onepiece['source'] ] # filter multiple docs from same source dict_onepiece['source_url'] = df_sql.iloc[list_news[0]]['sourceUrl'] dict_onepiece['created_at'] = Timestamp('now') json_similar = [] for news_similar in list_news[ 1:]: # if len() is only 1, this statement is also OK if df_sql.iloc[news_similar]['source'] not in list_sources: dict_similar = {} dict_similar['id'] = df_sql.iloc[news_similar]['id'] dict_similar['title'] = df_sql.iloc[news_similar]['title'] dict_similar['source'] = df_sql.iloc[news_similar]['source'] dict_similar['sourceUrl'] = df_sql.iloc[news_similar][ 'sourceUrl'] json_similar.append(dict_similar) list_sources.append(df_sql.iloc[news_similar]['source']) if not json_similar: dict_onepiece['info_sources'] = None else:
def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.df = pd.DataFrame({ 'Session ID': pd.Series([ 5, 5, 5, 10, 10, 15, 15, 15, 20, 20, 20, 20, 25, 25, 25, 25, 25, 25, 30, 30 ], dtype=np.int32), 'Timestamp': pd.Series([ Timestamp('2014-04-07 17:13:46.713'), Timestamp('2014-04-07 17:20:56.973'), Timestamp('2014-04-07 17:21:19.602'), Timestamp('2014-04-04 07:44:14.590'), Timestamp('2014-04-04 07:45:20.245'), Timestamp('2014-04-05 08:14:28.645'), Timestamp('2014-04-05 08:15:49.200'), Timestamp('2014-04-05 08:19:55.455'), Timestamp('2014-04-03 14:42:25.879'), Timestamp('2014-04-03 14:42:43.585'), Timestamp('2014-04-03 14:44:40.147'), Timestamp('2014-04-03 14:45:09.199'), Timestamp('2014-04-07 03:32:19.078'), Timestamp('2014-04-07 03:32:44.139'), Timestamp('2014-04-07 03:32:47.525'), Timestamp('2014-04-07 03:32:48.647'), Timestamp('2014-04-07 03:32:57.321'), Timestamp('2014-04-07 03:32:59.910'), Timestamp('2014-04-06 11:11:19.232'), Timestamp('2014-04-06 11:11:19.232') ]), 'Item ID': np.array([ 214530776, 214530776, 214530776, 214820942, 214826810, 214555903, 214547255, 214547255, 214829282, 214718203, 214829282, 214819552, 214836761, 214839313, 214839313, 214839313, 214839313, 214839313, 214820201, 214820201 ], dtype=np.int32) }) self.gb = self.df.groupby('Session ID')
def adhoc_holidays(self): return list(chain([Timestamp('2012-10-29', tz='UTC')]))
(['QQQ*', 'SPY', 'VIX'], 2 * 252, 'hive', 2, ['symbol', 'year'], [ ('symbol', '==', 'SPY') ]), (['QQQ!', 'SPY', 'VIX'], 2 * 252, 'hive', 2, ['symbol', 'year'], [ ('symbol', '==', 'SPY') ]), (['Q%QQ', 'SPY', 'VIX'], 2 * 252, 'hive', 2, ['symbol', 'year'], [ ('symbol', '==', 'SPY') ]), (['NOW', 'SPY', 'VIX'], 10, 'hive', 2, ['symbol', 'dtTrade' ], [('symbol', '==', 'SPY')]), (['NOW', 'SPY', 'VIX'], 10, 'hive', 2, ['symbol', 'dtTrade'], [ ('dtTrade', '==', '2005-01-02T00:00:00.000000000') ]), (['NOW', 'SPY', 'VIX'], 10, 'hive', 2, ['symbol', 'dtTrade'], [ ('dtTrade', '==', Timestamp('2005-01-01 00:00:00')) ]), ]) def test_frame_write_read_verify(tempdir, input_symbols, input_days, file_scheme, input_columns, partitions, filters): # Generate Temp Director for parquet Files fdir = str(tempdir) fname = os.path.join(fdir, 'test') # Generate Test Input Frame input_df = frame_symbol_dtTrade_type_strike(days=input_days, symbols=input_symbols, numbercolumns=input_columns) input_df.reset_index(inplace=True) write(fname,
def test_construction_index_with_mixed_timezones_with_NaT(self): # GH 11488 result = Index( [pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex( [pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNone(result.tz) # same tz results in DatetimeIndex result = Index([ pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo') ], name='idx') exp = DatetimeIndex([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00') ], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) self.assertEqual(result.tz, exp.tz) # same tz results in DatetimeIndex (DST) result = Index([ Timestamp('2011-01-01 10:00', tz='US/Eastern'), pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern') ], name='idx') exp = DatetimeIndex([ Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00') ], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) self.assertEqual(result.tz, exp.tz) # different tz results in Index(dtype=object) result = Index([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') exp = Index([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], dtype='object', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertFalse(isinstance(result, DatetimeIndex)) result = Index([ pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') exp = Index([ pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], dtype='object', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertFalse(isinstance(result, DatetimeIndex)) # all NaT result = Index([pd.NaT, pd.NaT], name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNone(result.tz) # all NaT with tz result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) self.assertEqual(result.tz, exp.tz)