def update_holdings(self, end): if not self.transactions.all(): return hlds = {} for txn in self.transactions.all(): sym = txn.security.symbol if sym not in hlds: # Transaction for a new security # find the latest holding record for this symbol # if found, the current transaction must has been processed in the past and will # be skipped below # if not found, start an empty holding record here hlds[sym] = self.get_latest_record_or_empty(txn) # convert date into pandas timestamp with business offset to automatically skip holidays # note holding record is kept for every business day, so detail time of the transaction # is stripped off. hld_date = to_business_timestamp(hlds[sym].date) txn_date = to_business_timestamp(date_(txn.datetime)) if txn_date < hld_date: # already processed in the past continue elif txn_date == hld_date: # there might be multiple transactions for a day self._transact_and_save(hlds[sym], txn) else: # txn_date > hld_date self.fill_in_gaps(hlds[sym], pd.bdate_range(start=hld_date+1, end=txn_date-1)) self.insert_holding(hlds[sym], txn_date) self._transact_and_save(hlds[sym], txn) # fill the gaps between last transaction and end for sym, hld in hlds.items(): self.fill_in_gaps(hld, pd.bdate_range(start=to_business_timestamp(hld.date)+1, end=end))
def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range('12/5/2011', '12/5/2011', freq='C') rng2 = bdate_range('12/2/2011', '12/5/2011', freq='C') rng2.freq = CDay() result = rng1.union(rng2) assert isinstance(result, DatetimeIndex)
def test_all_custom_freq(self, freq): # should not raise bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', holidays=['2009-03-14']) bad_freq = freq + 'FOO' msg = 'invalid custom frequency string: {freq}' with tm.assert_raises_regex(ValueError, msg.format(freq=bad_freq)): bdate_range(START, END, freq=bad_freq)
def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") msg = 'tz-naive.*tz-aware' with tm.assert_raises_regex(TypeError, msg): naive.join(aware) with tm.assert_raises_regex(TypeError, msg): aware.join(naive)
def test_cdaterange_holidays(self): result = bdate_range('2013-05-01', periods=3, freq='C', holidays=['2013-05-01']) expected = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) tm.assert_index_equal(result, expected) # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') with tm.assert_raises_regex(ValueError, msg): bdate_range('2013-05-01', periods=3, holidays=['2013-05-01'])
def bizday_distance(t1, t2, offset=fi_bd): """ For given two datetime, this module calculates their difference in number of bussiness days. i.g.: bizday_diff(dt.datetime(2017,1,4), dt.datetime(2017,1,5)) = 1 """ warnings.warn('Depreciated. Use cbday_distance instead') if t1 < t2: dday = max(len(pd.bdate_range(t1, t2, freq=offset)) - 1, 0) else: dday = min(1 - len(pd.bdate_range(t2, t1, freq=offset)),0) return dday
def test_cdaterange_weekmask(self): result = bdate_range('2013-05-01', periods=3, freq='C', weekmask='Sun Mon Tue Wed Thu') expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) tm.assert_index_equal(result, expected) # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu')
def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) msg = 'periods must be a number, got B' with tm.assert_raises_regex(TypeError, msg): date_range('2011-1-1', '2012-1-1', 'B') with tm.assert_raises_regex(TypeError, msg): bdate_range('2011-1-1', '2012-1-1', 'B') msg = 'freq must be specified for bdate_range; use date_range instead' with tm.assert_raises_regex(TypeError, msg): bdate_range(START, END, periods=10, freq=None)
def test_update_holdings(self): days = pd.bdate_range(start='2016-01-01', periods=5) transaction_factory('buy', self.p1, self.s1, days[0], price=10, shares=200, fee=100) transaction_factory('sell', self.p1, self.s1, days[1], price=8, shares=100, fee=50) transaction_factory('dividend', self.p1, self.s1, days[2], dividend=35) transaction_factory('split', self.p1, self.s1, days[3], ratio=1.5) transaction_factory('sell', self.p1, self.s1, days[4], price=12, shares=100, fee=90) self.p1.update_holdings(days[4]+1) hlds = list(Holding.objects.all()) self.assertEqual(len(hlds), 6) self.assertEqual(hlds[0].shares, 200) self.assertAlmostEqual(hlds[0].cost, -2100) self.assertEqual(hlds[0].gain, 0) self.assertEqual(hlds[0].dividend, 0) self.assertEqual(hlds[1].shares, 100) self.assertAlmostEqual(hlds[1].cost, -1050) self.assertAlmostEqual(hlds[1].gain, -300) self.assertEqual(hlds[1].dividend, 0) self.assertEqual(hlds[2].shares, 100) self.assertEqual(hlds[2].dividend, 35) self.assertEqual(hlds[3].shares, 150) self.assertAlmostEqual(hlds[3].cost, -1050) self.assertEqual(hlds[4].shares, 50) self.assertEqual(hlds[4].cost, -350) self.assertAlmostEqual(hlds[4].gain, 110) self.assertEqual(hlds[4].dividend, 35) self.assertEqual(hlds[5].shares, 50)
def setUp(self): self.data = { "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], "C": np.arange(10, dtype=np.float64), "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan], } self.dates = bdate_range("1/1/2011", periods=10) self.orig = pd.DataFrame(self.data, index=self.dates) self.iorig = pd.DataFrame(self.data, index=self.dates) self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind="integer") values = self.frame.values.copy() values[np.isnan(values)] = 0 self.zorig = pd.DataFrame(values, columns=["A", "B", "C", "D"], index=self.dates) self.zframe = SparseDataFrame(values, columns=["A", "B", "C", "D"], default_fill_value=0, index=self.dates) values = self.frame.values.copy() values[np.isnan(values)] = 2 self.fill_orig = pd.DataFrame(values, columns=["A", "B", "C", "D"], index=self.dates) self.fill_frame = SparseDataFrame(values, columns=["A", "B", "C", "D"], default_fill_value=2, index=self.dates) self.empty = SparseDataFrame()
def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=np.float64), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} self.dates = bdate_range('1/1/2011', periods=10) self.orig = pd.DataFrame(self.data, index=self.dates) self.iorig = pd.DataFrame(self.data, index=self.dates) self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') self.mixed_frame = self.frame.copy(False) self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates)) values = self.frame.values.copy() values[np.isnan(values)] = 0 self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], index=self.dates) self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=0, index=self.dates) values = self.frame.values.copy() values[np.isnan(values)] = 2 self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], index=self.dates) self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=2, index=self.dates) self.empty = SparseDataFrame()
def compute_sentiments(start, end, fname, tickers): # dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') df_tweets = pd.read_csv(fname, header=0, parse_dates=['created_at'], index_col=1) df_tweets.index = df_tweets.index.map(lambda t: t.strftime('%Y-%m-%d')) # remove time from index f = open('MultinomialNB.pickle', 'rb') classifier = pickle.load(f) f.close() # tweets_all = get_tweets(df_tweets) Bdays = pd.bdate_range(start, end) Bdays = pd.DatetimeIndex(Bdays).normalize() # remove hours from datetime tickers = [s.strip('$') for s in tickers] df_sentiments = pd.DataFrame(index=Bdays, columns=tickers) for ind in df_sentiments.index: # for each day for ticker in tickers: # calculate the sentiment of each tweet and aggregate tweets = get_tweets(df_tweets, ticker, ind.date()) sent = get_aggregate_sentiment(tweets, classifier) print ticker, sent df_sentiments.loc[ind, ticker] = sent return df_sentiments
def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=np.float64), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} self.dates = bdate_range('1/1/2011', periods=10) self.orig = pd.DataFrame(self.data, index=self.dates) self.iorig = pd.DataFrame(self.data, index=self.dates) self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') values = self.frame.values.copy() values[np.isnan(values)] = 0 self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], index=self.dates) self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=0, index=self.dates) values = self.frame.values.copy() values[np.isnan(values)] = 2 self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], index=self.dates) self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=2, index=self.dates) self.empty = SparseDataFrame()
def setUp(self): arr, index = _test_data1() date_index = bdate_range('1/1/2011', periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind='block', name='bseries') self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind='block') self.iseries = SparseSeries(arr, index=index, kind='integer', name='iseries') arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind='block') self.iseries2 = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0, name='zbseries') self.ziseries = SparseSeries(arr, index=index, kind='integer', fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0)
def initialize_transitions(self, bt): self.backtest_start_time = time.time() bt.table_name = bt.instr_name + '_LAST_COMPRESSED' start_stamp = pd.Timestamp(bt.init_day).tz_localize('US/Central') bt.start_stamp_utc = start_stamp.tz_convert('utc') final_stamp = pd.Timestamp(bt.final_day).tz_localize('US/Central') bt.final_stamp_utc = final_stamp.tz_convert('utc') self.num_bdays = len(pd.bdate_range(bt.start_stamp_utc, bt.final_stamp_utc)) bt.futures_db = FuturesDatabase() bt.range_bar = RangeBar(bt.instr_name, bt.RANGE) bt.daily_tick = DailyTick() set_strategies(bt) Transitions.determine_optimization(bt) print "Backtest start time: {}".format(pd.Timestamp(datetime.datetime.now())) print "------------------------------------------------" print "Instrument: {}".format(bt.instr_name) print " Range: {}".format(bt.RANGE) print " Start: {}".format(bt.init_day) print " End: {}".format(bt.final_day) print "------------------------------------------------" new_state = "load_daily_data" return new_state, bt
def test_reindex_fill_value(self): rng = bdate_range('20110110', periods=20) result = self.zframe.reindex(rng, fill_value=0) exp = self.zorig.reindex(rng, fill_value=0) exp = exp.to_sparse(self.zframe.default_fill_value) tm.assert_sp_frame_equal(result, exp)
def test_resample_bms_2752(self): # GH2753 foo = pd.Series(index=pd.bdate_range("20000101", "20000201")) res1 = foo.resample("BMS") res2 = foo.resample("BMS").resample("B") self.assertEqual(res1.index[0], Timestamp("20000103")) self.assertEqual(res1.index[0], res2.index[0])
def setup_method(self, method): super(TestIndex, self).setup_method(method) self.d = { 'string': tm.makeStringIndex(100), 'date': tm.makeDateIndex(100), 'int': tm.makeIntIndex(100), 'rng': tm.makeRangeIndex(100), 'float': tm.makeFloatIndex(100), 'empty': Index([]), 'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])), 'period': Index(period_range('2012-1-1', freq='M', periods=3)), 'date2': Index(date_range('2013-01-1', periods=10)), 'bdate': Index(bdate_range('2013-01-02', periods=10)), 'cat': tm.makeCategoricalIndex(100), 'interval': tm.makeIntervalIndex(100), 'timedelta': tm.makeTimedeltaIndex(100, 'H') } self.mi = { 'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')], names=['first', 'second']), }
def test_resample_bms_2752(self): # GH2753 foo = Series(index=pd.bdate_range('20000101', '20000201')) res1 = foo.resample("BMS").mean() res2 = foo.resample("BMS").mean().resample("B").mean() assert res1.index[0] == Timestamp('20000103') assert res1.index[0] == res2.index[0]
def get_days(num4days): now = datetime.date.today() yestoday = now - datetime.timedelta(days=1) end_day = now - datetime.timedelta(days=num4days) workday = pd.bdate_range(start=str(end_day),end=str(yestoday)) days_list = [now,yestoday,end_day,workday] return(days_list)
def setUp(self): arr, index = _test_data1() date_index = bdate_range("1/1/2011", periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind="block") self.bseries.name = "bseries" self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind="block") self.iseries = SparseSeries(arr, index=index, kind="integer") arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind="block") self.iseries2 = SparseSeries(arr, index=index, kind="integer") arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind="block", fill_value=0) self.ziseries = SparseSeries(arr, index=index, kind="integer", fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind="block", fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind="integer", fill_value=0)
def fetchDataSeries(self,ticker=None,startDate=date(2009,1,1),endDate=date(2009,10,1), interval="d"): #clear data set self.dataSet = []; #We check to see if each data is not already stored here. If not, we pull from Yahoo #We do not take into account holidays (we will pull if there is a holiday discrepancy) numBusinessDays = len(pandas.bdate_range(startDate,endDate)) possibleDataSet = Observation.objects.filter(observationDate__gte=startDate,observationDate__lte=endDate,ticker=ticker) #if the number of business days = the amount of data we have, use our data print "POSSIBLE DATES: " + str(possibleDataSet.count()) print "BUSINESS DAYS: " + str(numBusinessDays) if possibleDataSet.count() == numBusinessDays: print "Necessary data already exists" for retrievedObs in possibleDataSet: newSObs = SObservation(inDate=retrievedObs.observationDate,inTicker=retrievedObs.ticker,inOpen=retrievedObs.open,inHigh=retrievedObs.high,inLow=retrievedObs.low,inClose=retrievedObs.close,inVol=retrievedObs.vol,inAdjClose=retrievedObs.adjClose) self.addObservation(newSObs) return if(ticker == None): print "ERROR: NO TICKER SPECIFIED" return None #Construct URL base="http://ichart.yahoo.com/table.csv?s=" fromDateString="&a="+str(startDate.month-1)+"&b="+str(startDate.day)+"&c="+str(startDate.year) toDateString="&d="+str(endDate.month-1)+"&e="+str(endDate.day)+"&f="+str(endDate.year) intervalString="&g="+interval staticString="&ignore=.csv" endUrl=base+ticker+fromDateString+toDateString+intervalString+staticString logger.debug(endUrl) print endUrl newData=urllib.urlopen(endUrl).readlines() newData.reverse() #Want earliest data first try: for lineNum in xrange(0,len(newData)-1): ds,open_,high,low,close,volume,adjclose=newData[lineNum].rstrip().split(',') tempDate=datetime.strptime(ds,"%Y-%m-%d").date() #print tempDate newSObs = SObservation(inDate=tempDate,inTicker=ticker,inOpen=float(open_),inHigh=float(high),inLow=float(low),inClose=float(close),inVol=float(volume),inAdjClose=float(adjclose)) self.addObservation(newSObs) #Check if newObs exists, if not, create it if Observation.objects.filter(observationDate=newSObs.date,ticker=newSObs.ticker).exists() == False: #Create it print "Does not exist, create it" newObs = Observation() newObs.created = datetime.today() newObs.observationDate = newSObs.getAttributeByName("date") newObs.ticker = newSObs.getAttributeByName("ticker") newObs.open = newSObs.getAttributeByName("open") newObs.high = newSObs.getAttributeByName("high") newObs.low = newSObs.getAttributeByName("low") newObs.close = newSObs.getAttributeByName("close") newObs.vol = newSObs.getAttributeByName("volume") newObs.adjClose = newSObs.getAttributeByName("adjClosePrice") newObs.save() else: print "Item already exists, do not create" except ValueError: print "DJANGO ERROR: No data exists for query." return -1
def _check_stat_op(self, name, alternate, string_series_, check_objects=False, check_allna=False): with pd.option_context('use_bottleneck', False): f = getattr(Series, name) # add some NaNs string_series_[5:15] = np.NaN # mean, idxmax, idxmin, min, and max are valid for dates if name not in ['max', 'min', 'mean']: ds = Series(pd.date_range('1/1/2001', periods=10)) with pytest.raises(TypeError): f(ds) # skipna or no assert pd.notna(f(string_series_)) assert pd.isna(f(string_series_, skipna=False)) # check the result is correct nona = string_series_.dropna() tm.assert_almost_equal(f(nona), alternate(nona.values)) tm.assert_almost_equal(f(string_series_), alternate(nona.values)) allna = string_series_ * np.nan if check_allna: assert np.isnan(f(allna)) # dtype=object with None, it works! s = Series([1, 2, 3, None, 5]) f(s) # GH#2888 items = [0] items.extend(lrange(2 ** 40, 2 ** 40 + 1000)) s = Series(items, dtype='int64') tm.assert_almost_equal(float(f(s)), float(alternate(s.values))) # check date range if check_objects: s = Series(pd.bdate_range('1/1/2000', periods=10)) res = f(s) exp = alternate(s) assert res == exp # check on string data if name not in ['sum', 'min', 'max']: with pytest.raises(TypeError): f(Series(list('abc'))) # Invalid axis. with pytest.raises(ValueError): f(string_series_, axis=1) # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: with pytest.raises(NotImplementedError, match=name): f(string_series_, numeric_only=True)
def test_reindex_fill_value(self, float_frame_fill0, float_frame_fill0_dense): rng = bdate_range('20110110', periods=20) result = float_frame_fill0.reindex(rng, fill_value=0) exp = float_frame_fill0_dense.reindex(rng, fill_value=0) exp = exp.to_sparse(float_frame_fill0.default_fill_value) tm.assert_sp_frame_equal(result, exp)
def arbitrary_timeindex(Nperiods, index_start=pd.datetime(2000,1,1)): """ For nice plotting, convert a list of prices or returns into an arbitrary pandas time series """ ans=pd.bdate_range(start=index_start, periods=Nperiods) return ans
def test_union_freq_both_none(self, sort): # GH11086 expected = bdate_range('20150101', periods=10) expected.freq = None result = expected.union(expected, sort=sort) tm.assert_index_equal(result, expected) assert result.freq is None
def test_misc(self): end = datetime(2009, 5, 13) dr = bdate_range(end=end, periods=20, freq='C') firstDate = end - 19 * CDay() assert len(dr) == 20 assert dr[0] == firstDate assert dr[-1] == end
def test_custom_business_day_freq(self): # GH7222 from pandas.tseries.offsets import CustomBusinessDay s = Series(range(100, 121), index=pd.bdate_range( start='2014-05-01', end='2014-06-01', freq=CustomBusinessDay(holidays=['2014-05-26']))) _check_plot_works(s.plot)
def test_cython_fail_agg(self): dr = bdate_range('1/1/2000', periods=50) ts = Series(['A', 'B', 'C', 'D', 'E'] * 10, index=dr) grouped = ts.groupby(lambda x: x.month) summed = grouped.sum() expected = grouped.agg(np.sum) assert_series_equal(summed, expected)
def Simulate(self, categories="", items=""): import pyodbc import numpy import pandas if self.mode == 1: # Mode 1 simulates our actual historical PO placements and Customer orders self.bDaysToIterateOver = pandas.bdate_range(start=self.startDate, end=self.endDate)
def test_cdaterange(self): result = bdate_range('2013-05-01', periods=3, freq='C') expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) tm.assert_index_equal(result, expected)
def makeDateIndex(k): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k) return DatetimeIndex(dr)
def test_dti_custom_business_repr(self): # only really care that it works repr( pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C"))
def setup_method(self, method): self.rng = bdate_range(START, END, freq='C')
transactions = [] f = open('2015_transactions.csv', 'r') lines = f.readlines() for line in lines: transactions.append(transaction.Transaction(line)) # turn transaction objects into portfolio objects portfolios = [] # list of portfolio objects for each date init_port = Portfolio(); portfolios.append(init_port); for t in transactions: portfolios.append(Portfolio(t, portfolios[-1])) # get all trading days as a list bizdates = pd.bdate_range(portfolios[0].start_date, parser.parse('11/1/2016')) print(bizdates) # get portfolio values portfolios_length = len(portfolios) idx = 0 values = {} cpy_bizdates = copy.deepcopy(bizdates) for date in bizdates: # advance to next portfolio on dates with transactions if idx + 1 < portfolios_length and portfolios[idx + 1].start_date <= date.to_datetime(): idx += 1 value = portfolios[idx].calculateValue(date) if value != None: values[date.to_datetime().strftime('%Y-%m-%d')] = value
def test_summary_dateutil(self): bdate_range('1/1/2005', '1/1/2009', freq='C', tz=dateutil.tz.tzutc()).summary()
for i in df: document = i['ticker'] # print(document) for j in range(len(i['ticker'])): ticker = document[j].keys() return ticker client = pymongo.MongoClient('localhost', 27017) db = client.fin datelist = pandas.bdate_range(pandas.datetime.today(), periods=5, freq='B').tolist() date = [] for i in range(0, len(datelist)): date.append(str(datelist[i]).split(" ", 1)[0]) before_date = [] after_date = [] after_signal_con = [] MOM_signal_con = [] ROC_signal_con = [] MACD_signal_con = [] ADX_signal_con = [] RSI_signal_con = [] MACD_1 = []
def is_trading_day(date_to_check): is_business_day = len(bdate_range(date_to_check, date_to_check)) == 1 is_holiday = date_to_check in holidays.USA() return is_business_day and not is_holiday
import matplotlib.pyplot as plt #import matplotlib.ticker as ticker # Importar librerías de formato from datetime import tzinfo, timedelta, datetime # Importar modelos de aprendizaje #from sklearn.model_selection import cross_val_score #from sklearn.neighbors import RadiusNeighborsClassifier #from sklearn.neighbors import KNeighborsClassifier sns.set_style(style='white') drive_in = './input-data/' drive_out = './output-data/' train = pd.read_csv(drive_in+'sf_train_fixed.csv') #get a unique list of categories cats = list(set(train.Category)) #turn strings into dates dates = [] datesAll = np.array([datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in train.Dates]) #set up pandas startDate = (np.min(datesAll)).date() endDate = (np.max(datesAll)).date() alldates_month = pd.bdate_range(startDate, endDate, freq="m") alldates_daily = pd.bdate_range(startDate, endDate, freq="d") print(alldates_month) print(alldates_daily)
from pandas import DataFrame, Series import pandas as pd import numpy as np from numpy import nan #QUESTION 1 #Bring csv into the python program csv = 'HW5_Q1_data.csv' rawdata = pd.DataFrame(data=pd.read_csv(csv, low_memory=False)) #print(rawdata); #Make index range and confirm it matches the Date column rng = pd.bdate_range(start='1/3/1990', end='11/1/1993', freq='B') #create our own range using business days count = 0 for date in rng: if str(date)[0:-9] != str(rawdata["Date"][count]): #print(str(date)[0:-9]) #this date is produced by our range, but not recorded in the dataset holiday = [str(date)[0:-9] ] #set this date as a holiday to exclude from the new range break count = count + 1 #Since the index range didn't match Date column, make a new range that does match newrng = pd.bdate_range(start='1/3/1990', end='11/1/1993', holidays=holiday, freq='C', weekmask='Mon Tue Wed Thu Fri') #remake range with the correct days
def test_apply_ramp_dateoffset(): x = pd.Series(range(10), index=pd.bdate_range('2020-02-17', freq='b', periods=10)) y = apply_ramp(x, Window(pd.DateOffset(weeks=1), pd.DateOffset(days=1))) assert len(y) == 9
value > np.percentile(value, (1 - self.N / len(value)) * 100) elif self.strategy['name'] == '3_LowVol' or \ self.strategy['name'] == '6M_LowVol': # calculate the annualised volatility std = np.std(a, 0) * np.sqrt(252) self.selected = std < np.percentile(std, (self.N / len(std)) * 100) def asset_allocation(self, today): # do equally weight if self.strategy['allocation'] == 'EW': self.weights[today] = 0 self.weights[today, self.selected] = 1 / self.N if __name__ == '__main__': # this is the date range we want to simulate dates = pd.bdate_range(start='1/4/2016', end='') #YYYYMMDD # let's generate some random normal returns for illustrative purposes random_returns = pd.DataFrame( data=np.random.normal(0, 1, size=(len(dates), 500)) / 100, index=dates) # we create the execution dates -- where we trade assets calendar = dates[dates.is_month_end] # what kind of strategy do we want? strategy = {'construction': '12M_Momo', 'rebalance': 'EW'} # construct the class and run the backtest backtest = Backtest(random_returns, calendar, strategy, 50) backtest.run() # plot the results backtest.cumulative_return.plot()
def setUp(self): self.rng = bdate_range(START, END)
def test_summary_pytz(self): bdate_range('1/1/2005', '1/1/2009', freq='C', tz=pytz.utc).summary()
def test_dti_business_getitem_matplotlib_hackaround(self): rng = pd.bdate_range(START, END) values = rng[:, None] expected = rng.values[:, None] tm.assert_numpy_array_equal(values, expected)
#fill blanks with 0 nky = nky.replace({0: np.nan}) #replace all 0 with NaN nky = nky.dropna(axis=0) #drop row with an NaN print(nky) #Custom time series date range to be added as index for each #must use normalize=False to obtain custom start and end minutes, otherwise defaults to midnight (00:00:00) #weekmask='Mon Tue Wed Thu Fri', #, holidays=['2018-12-25','2018-01-15'] finalspxrng = [] spxday = pd.bdate_range(start='7/30/2018 09:30:00', end='12/31/2018 16:00:00', freq='C', normalize=False, weekmask='Mon Tue Wed Thu Fri', tz='America/New_York') #create our own range using 15 min intervals #print(len(spxday)) spxrng = pd.bdate_range(start='7/30/2018 09:30:00', end='12/31/2018 16:00:00', freq='15T', normalize=False, tz='America/New_York') #create our own range using 15 min intervals #print(spxrng.summary()) spxstart = datetime.time(9, 30, 0) spxend = datetime.time(16, 0, 0) longdate = spxrng.date longtime = spxrng.time
# now df should become [0 0 4 0 0 0 x x x 2 2 2] df[i].replace(to_replace=0, method='bfill', inplace=True) # now df should become [4 4 4 x x x x x x 2 2 2] df[i].replace(to_replace=2, value=0, inplace=True) # now df should become [4 4 4 x x x x x x 0 0 0] df[i].replace(to_replace=4, value=0, inplace=True) # now df should become [0 0 0 x x x x x x 0 0 0] return df if __name__ == '__main__': # test functions calc = Calculations() tsf = Filter() # test rolling ewma date_range = pandas.bdate_range('2014-01-01', '2014-02-28') print(calc.get_bus_day_of_month(date_range)) foo = pandas.DataFrame(numpy.arange(0.0, 13.0)) print(calc.rolling_ewma(foo, span=3))
def compute_forward_returns(prices, periods=(1, 5, 10), filter_zscore=None): """ Finds the N period forward returns (as percent change) for each asset provided. Parameters ---------- prices : pd.DataFrame Pricing data to use in forward price calculation. Assets as columns, dates as index. Pricing data must span the factor analysis time period plus an additional buffer window that is greater than the maximum number of expected periods in the forward returns calculations. periods : sequence[int] periods to compute forward returns on. filter_zscore : int or float, optional Sets forward returns greater than X standard deviations from the the mean to nan. Set it to 'None' to avoid filtering. Caution: this outlier filtering incorporates lookahead bias. Returns ------- forward_returns : pd.DataFrame - MultiIndex Forward returns in indexed by date and asset. Separate column for each forward return window. """ forward_returns = pd.DataFrame(index=pd.MultiIndex.from_product( [prices.index, prices.columns], names=['date', 'asset'])) # if the period length is not consistent across the factor index then # it must be a trading day calendar time_diffs = prices.index.to_series().diff() trading_calendar = time_diffs.min() != time_diffs.max() for period in periods: delta = prices.pct_change(period).shift(-period) if filter_zscore is not None: mask = abs(delta - delta.mean()) > (filter_zscore * delta.std()) delta[mask] = np.nan # find the period length and consider there might be weekends # or public holidays entries_to_test = min(10, len(prices.index)-period) period_len = min([prices.index[period+i] - prices.index[i] for i in range(entries_to_test)]) # we use business days as an approximation to trading calendar if trading_calendar and period_len.components.days > 0: days_diffs = \ [len(pd.bdate_range(prices.index[i], prices.index[period+i])) - 1 for i in range(entries_to_test)] delta_days = period_len.components.days - min(days_diffs) period_len -= pd.Timedelta(days=delta_days) column_name = timedelta_to_string(period_len) forward_returns[column_name] = delta.stack() forward_returns.index = forward_returns.index.rename(['date', 'asset']) return forward_returns
def test_dti_custom_business_summary_pytz(self): pd.bdate_range("1/1/2005", "1/1/2009", freq="C", tz=pytz.utc)._summary()
def test_cdaterange(self): result = bdate_range("2013-05-01", periods=3, freq="C") expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-03"]) tm.assert_index_equal(result, expected)
def test_intersection_bug(self): # GH #771 a = bdate_range('11/30/2011', '12/31/2011') b = bdate_range('12/10/2011', '12/20/2011') result = a.intersection(b) self.assert_index_equal(result, b)
from nowtrade.action import Long, Short, LongExit, ShortExit """ MSFT Data MSFT_Open MSFT_High MSFT_Low MSFT_Close MSFT_Volume MSFT_Adj Close Date 2010-06-01 25.53 26.31 25.52 25.89 76152400 23.20 2010-06-02 26.06 26.48 25.73 26.46 65718800 23.71 2010-06-03 26.55 26.93 26.41 26.86 67837000 24.07 2010-06-04 26.10 26.57 25.62 25.79 89832200 23.11 2010-06-07 25.82 25.83 25.24 25.29 80456200 22.66 2010-06-08 25.25 25.26 24.65 25.11 87355000 22.50 2010-06-09 25.22 25.52 24.75 24.79 87794000 22.21 2010-06-10 25.13 25.15 24.78 25.00 78930900 22.40 """ index = pd.bdate_range('20100601', periods=8) msft_open = [25.53, 26.06, 26.55, 26.10, 25.82, 25.25, 25.22, 25.13] msft_open_name = 'MSFT_Open' msft_high = [26.31, 26.48, 26.93, 26.57, 25.83, 25.26, 25.52, 25.15] msft_high_name = 'MSFT_High' msft_low = [25.52, 25.73, 26.41, 25.62, 25.24, 24.65, 24.75, 24.78] msft_low_name = 'MSFT_Low' msft_close = [25.89, 26.46, 26.86, 25.79, 25.29, 25.11, 24.79, 25.00] msft_close_name = 'MSFT_Close' msft_volume = [ 76152400, 65718800, 67837000, 89832200, 80456200, 87355000, 87794000, 78930900 ] msft_volume_name = 'MSFT_Volume' msft_adj_close = [23.20, 23.71, 24.07, 23.11, 22.66, 22.50, 22.21, 22.40] msft_adj_close_name = 'MSFT_Adj Close'
def test_dti_custom_business_summary(self): rng = pd.bdate_range(datetime(2009, 1, 1), datetime(2010, 1, 1), freq="C") rng._summary() rng[2:2]._summary()
from nsetools import Nse import pandas as pd import urllib.request import datetime import pandas as pd import time bhavcopy_base_url = "https://www.nseindia.com/content/historical/EQUITIES/%s/%s/cm%s%s%sbhav.csv.zip" base_url = 'https://www.nseindia.com/content/historical/EQUITIES/2019/JAN/cm08JAN2019bhav.csv.zip' file_path = 'D:\\nse_data\\History\\cm%s%s%sbhav.csv.zip' # Date Generator with weekdays start = datetime.datetime(2019, 1, 8) end = datetime.datetime(2019, 1, 9) weekmask = 'Mon Tue Wed Thu Fri' date_lst = pd.bdate_range(start, end, freq='C', weekmask=weekmask) #print(date_lst) for i in date_lst: print("Processing for date:" + str(i)) day_of_month = i.strftime("%d") mon = i.strftime("%b").upper() year = i.year try: url = bhavcopy_base_url % (year, mon, day_of_month, mon, year) print(url) filename = file_path % (day_of_month, mon, year) print(filename) opener = urllib.request.build_opener()
def is_business_day(date): return bool(len(pd.bdate_range( date, date))) and (date not in get_trading_close_holidays(date.year))
def test_dti_tz_localize_bdate_range(self): dr = pd.bdate_range('1/1/2009', '1/1/2010') dr_utc = pd.bdate_range('1/1/2009', '1/1/2010', tz=pytz.utc) localized = dr.tz_localize(pytz.utc) tm.assert_index_equal(dr_utc, localized)
def test_series(): data = np.array(['Tom', 'Lily', 'Jerry', 'Lilei']) # simple s = pd.Series(data) print(s) print(s[0]) s = pd.Series(data, index=[10, 20, 30, 40]) print(s) print(s[10]) d = {'a': 0., 'b': 1., 'c': 2} s = pd.Series(d) print(s) print(s['b']) s = pd.Series(5, index=[1, 2, 3, 4, 5]) print(s) # date date_series_str = pd.Series(['2011', '2011-02', '2011-03-01', '2014/04/01', '2011/05/01 01:01:01', '01 Jun 2011']) datas = pd.to_datetime(date_series_str) print(datas) delta = datas - pd.to_datetime('1970-01-01') print('-' * 32) print(delta) print(delta.dt.days) dr = pd.date_range('2019-08-20', periods=5, freq='D') print(dr) start = pd.datetime(2019, 8, 1) end = pd.to_datetime('2019/08/30') print('=' * 32) rd = pd.date_range(start, end) print(rd) datas = pd.bdate_range('2019/08/01', '2019/09/01') print(datas) d_map = [{'a': 1, 'b': 2}, {'a': 1, 'b': 2, 'c': 3}] datas = pd.DataFrame(d_map) print(datas) d_map = {'a': [1, 2, 3, 4], 'b': ['c1', 'c2', 'c3', 'c4']} datas = pd.DataFrame(d_map) print(datas) print(datas[1:3]) print('==loc:\n', datas.loc[1]) d_copy = datas[:] print('==d_copy:\n', d_copy) d_copy = d_copy.drop(2) print('==d_copy drop 2:\n', d_copy) d_copy.pop('a') print('==d_copy pop:\n', d_copy) d_copy['b'][1] = 'rename' print('==d_copy rename:\n', d_copy) print('==d_copy', d_copy.loc[1]['b']) print('== append') a = pd.DataFrame([['aa', 12], ['bb', 3]], columns=['a', 'b']) datas_append = datas.append(a) print(datas_append) r = datas_append.loc[0] print(r) i = np.arange(0, datas_append['a'].size) datas_append.reindex(index=i) print(datas_append)
def test_dti_custom_business_summary_dateutil(self): pd.bdate_range("1/1/2005", "1/1/2009", freq="C", tz=dateutil.tz.tzutc())._summary()
def _check_stat_op(self, name, alternate, string_series_, check_objects=False, check_allna=False): with pd.option_context("use_bottleneck", False): f = getattr(Series, name) # add some NaNs string_series_[5:15] = np.NaN # mean, idxmax, idxmin, min, and max are valid for dates if name not in ["max", "min", "mean"]: ds = Series(pd.date_range("1/1/2001", periods=10)) with pytest.raises(TypeError): f(ds) # skipna or no assert pd.notna(f(string_series_)) assert pd.isna(f(string_series_, skipna=False)) # check the result is correct nona = string_series_.dropna() tm.assert_almost_equal(f(nona), alternate(nona.values)) tm.assert_almost_equal(f(string_series_), alternate(nona.values)) allna = string_series_ * np.nan if check_allna: assert np.isnan(f(allna)) # dtype=object with None, it works! s = Series([1, 2, 3, None, 5]) f(s) # GH#2888 items = [0] items.extend(range(2**40, 2**40 + 1000)) s = Series(items, dtype="int64") tm.assert_almost_equal(float(f(s)), float(alternate(s.values))) # check date range if check_objects: s = Series(pd.bdate_range("1/1/2000", periods=10)) res = f(s) exp = alternate(s) assert res == exp # check on string data if name not in ["sum", "min", "max"]: with pytest.raises(TypeError): f(Series(list("abc"))) # Invalid axis. with pytest.raises(ValueError): f(string_series_, axis=1) # Unimplemented numeric_only parameter. if "numeric_only" in inspect.getfullargspec(f).args: with pytest.raises(NotImplementedError, match=name): f(string_series_, numeric_only=True)
def _create_rng(): """Internal function to mock date range.""" rng = bdate_range(datetime(2009, 1, 1), periods=100) return rng