def getTradingDates(): '''Get futures trading dates. Returns ------- tradingDates : list of datetime.date return list of trading dates in datetime.date. Exceptions ---------- raise Exception when duplicated records found. ''' username, password = config.MONGODB_CRED db = dMongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'universe') cursor = db.refdata.find({'Name': 'TradingDates', 'Country': 'CN'}) nRecords = cursor.count() if nRecords == 0: tradingDates = None elif nRecords > 1: raise Exception('Duplicated {n:d} records found.'.format(n=nRecords)) else: data = cursor.next() tradingDates = [d.date() for d in data['Data']] return tradingDates
def updateFuturesUniverse( asOfDate ): '''Update futures universe data. Parameters ---------- asOfDate : datetime.date Data date. Returns ------- None ''' futData = [] for exch in futures.FUTURES_EXCHANGES: logging.info( 'Get futures contracts in {e:s}...'.format( e=exch ) ) exchFutures = futures.getFuturesContracts( exch ) futData.append( exchFutures ) futData = pd.concat( futData ) futData.reset_index( drop=True, inplace=True ) # write to MongoDB username, password = MONGODB_CRED db = mongodb.getAuthenticatedConnection( MONGODB_URL, MONGODB_PORT, username, password, DB_NAME ) mongoDate = dt.datetime.combine( asOfDate, dt.datetime.min.time() ) record = { 'Date': mongoDate, 'Data': futData.to_json(), 'Country': 'CN' } db.futures.update( { 'Date': mongoDate, 'Country': 'CN' }, record, upsert=True )
def updateStockUniverse( asOfDate ): '''Update stock universe data. Parameters ---------- asOfDate : datetime.date Data date. Returns ------- None ''' # get the universe data. industryClassification = stocks.getIndustryClassification() stockInfo = stocks.getStocks() # extract MongoDB credential username, password = MONGODB_CRED db = mongodb.getAuthenticatedConnection( MONGODB_URL, MONGODB_PORT, username, password, DB_NAME ) # write to MongoDB # the data is enhanced with an asOfDate date and the market identifier mongoDate = dt.datetime.combine( asOfDate, dt.datetime.min.time() ) record = { 'Date': mongoDate, 'Stocks': stockInfo.to_json(), 'Sectors': industryClassification.to_json(), 'Country': 'CN' } db.stocks.update( { 'Date': mongoDate, 'Country': 'CN' }, record, upsert=True )
def main(): '''Entry point of the job. ''' # runtime date asOfDate = dt.date.today() # get all stocks in the universe universe = stockApi.getExchangeStockNames(asOfDate) universe.sort() # idx = universe.index( '603085.XSHG' ) universe = universe[idx:] # initialize MongoDB connection username, password = config.MONGODB_CRED db = mongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'binData') nStocks = len(universe) logging.info( 'Backfill bin volume for {ns:d} stocks in total...'.format(ns=nStocks)) # for bin data backfill, stocks are updated one-by-one and by month for i, s in enumerate(universe): for startDate, endDate in DATE_RANGE: logging.info( 'Backfilling bin volume for {sec:s} ({idx:d}/{n:d}) from {sd:s} to {ed:s}...' .format(sec=s, idx=i + 1, n=nStocks, sd=str(startDate), ed=str(endDate))) data = stockTrading.getHistoryBinData(s, startDate=startDate, endDate=endDate) try: groupedData = data.groupby(['dataDate']) records = [] for dataDate, tData in groupedData: record = { 'SecID': s, 'Date': dt.datetime.strptime(dataDate, '%Y-%m-%d'), 'Data': tData.to_json(), 'Country': 'CN' } records.append(record) db.stocks.insert_many(records) except KeyError as e: logging.warning( 'Error when updating {sec:s} from {sd:s} to {ed:s}: {msg:s}.' .format(sec=s, sd=str(startDate), ed=str(endDate), msg=str(e))) logging.info('Bin data backfill done.')
def getDailyData(secId, startDate=dt.date(2012, 1, 1), endDate=dt.date.today()): '''Get daily data for the given futures during the date range. Parameters ---------- secId : str Security ID of the futures; startDate : datetime.date Start date of the daily data queried inclusively; endDate : datetime.date End date of the daily data queried inclusively. Returns ------- dailyData : pandas.DataFrame Requested daily data in pandas.DataFrame. Exceptions ---------- raise Exception when duplicated records found on the given futures name. ''' # Get authenticated MongoDB connection username, password = config.MONGODB_CRED db = dMongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'dailyData') # Query data cursor = db.futures.find({'SecID': secId}) # Sanity check nRecords = cursor.count() if nRecords == 0: dailyData = None elif nRecords > 1: raise Exception( 'Duplicated {n:d} records found for futures {s:s}.'.format( n=nRecords, s=secId)) else: data = cursor.next() dailyData = pd.read_json(data['Data']) dailyData.sort_index(inplace=True) # Filtered by date startDateStr = startDate.strftime('%Y-%m-%d') endDateStr = endDate.strftime('%Y-%m-%d') greaterDates = dailyData.tradeDate >= startDateStr smallerDates = dailyData.tradeDate <= endDateStr dailyData = dailyData[np.logical_and(greaterDates, smallerDates)] # reset index from 0 onwards dailyData.reset_index(drop=True, inplace=True) return dailyData
def getFuturesInformation(asOfDate, ticker=None, listed=True, country='CN'): '''Get all futures contract information. Parameters ---------- asOfDate : datetime.date Data date of the futures information or None if not found; ticker : str Ticker name of the futures, if not given, return all futures; listed : bool or None Contract listed status, True for listed contracts by the asOfDate, otherwise return all available contracts; country : str Country identifier, currently, only CN supported. Returns ------- futuresInfo : pandas.DataFrame Futures information. Exceptions ---------- raise Exception when duplicated records found. ''' username, password = config.MONGODB_CRED db = dMongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'universe') # find the latest records validDate = dt.datetime(2020, 1, 1) data = db.futures.find_one({ 'Date': { '$lte': validDate }, 'Country': 'CN' }, sort=[('Date', dMongodb.pymongo.DESCENDING)]) futuresInfo = pd.read_json(data['Data']) futuresInfo.sort_index(inplace=True) if ticker is not None: futuresInfo = futuresInfo[futures.ticker == ticker] futuresInfo.reset_index(drop=True, inplace=True) if listed: sAsOfDate = str(asOfDate) isListed = np.logical_and(futuresInfo.listDate <= sAsOfDate, futuresInfo.lastTradeDate >= sAsOfDate) futuresInfo = futuresInfo[isListed] futuresInfo.reset_index(drop=True, inplace=True) return futuresInfo
def main(): '''Entry point of the job. ''' # runtime asOfDate = dt.date.today() logging.info( 'Updating minute bin data for futures on date {d:s}...'.format( d=str(asOfDate))) # get all futures in the universe futuresInfo = futuresApi.getFuturesInformation(asOfDate) universe = dict(zip(futuresInfo.ticker, futuresInfo.secID)) # initialize MongoDB connection username, password = config.MONGODB_CRED db = mongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'binData') nFutures = len(universe) logging.info( 'Minute bin volume for {ns:d} futures in total to be updated...') # for bin data, futures are updated one-by-one for i, ids in enumerate(universe.items()): futures, secId = ids futures = futures.upper() logging.info( 'Updating minute bin data for {s:s} ({idx:d}/{n:d})...'.format( s=secId, idx=i + 1, n=nFutures)) data = futuresTrading.getBinData(futures, dataDate=asOfDate) if len(data) > 0: mongoDate = dt.datetime.combine(asOfDate, dt.datetime.min.time()) record = { 'SecID': secId, 'Date': mongoDate, 'Data': data.to_json(), 'Country': 'CN' } db.futures.update( { 'SecID': secId, 'Date': mongoDate, 'Country': 'CN' }, record, upsert=True) else: logging.warning('Empty data for {secId:s}'.format(secId=secId)) logging.info('All futures updated.')
def getBinData(secId, startDate=dt.date(2012, 1, 1), endDate=dt.date.today()): '''Get minute-by-minute data for the given futures during the date range. Parameters ---------- secId : str Security ID of the futures; startDate : datetime.date Start date of the bin data required inclusively, endDate : datetime.date End date of the bin data required inclusively. Returns ------- binData : pandas.Panel Requested bin data in pandas.Panel. Exceptoins ---------- raise Exception when duplicated records found on the given futures name. ''' # Get authenticated MongoDB connection username, password = config.MONGODB_CRED db = dMongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'binData') # Query data cursor = db.futures.find({ 'SecID': secId, 'Date': { '$gte': dt.datetime.combine(startDate, dt.datetime.min.time()), '$lte': dt.datetime.combine(endDate, dt.datetime.min.time()) } }) data = {} for item in cursor: # Build DataFrame's to convert to a Panel. date = item['Date'].date() if date in data: raise Exception( 'Duplicated records on {d:s} found.'.format(d=str(date))) else: dayBinData = pd.read_json(item['Data']) dayBinData.sort_index(inplace=True) data[date] = dayBinData return pd.Panel(data)
def main(): '''Entry point of the job. ''' # runtime asOfDate = dt.date.today() logging.info('Updating minute bin data for stocks on date {d:s}...'.format( d=str(asOfDate))) # get all stocks in the universe universe = stockApi.getExchangeStockNames(asOfDate) # initialize MongoDB connection username, password = config.MONGODB_CRED db = mongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'binData') nStocks = len(universe) logging.info( 'Minute bin volume for {ns:d} stocks in total to be updated...'.format( ns=nStocks)) # for bin data, stocks are updated one-by-one for i, stock in enumerate(universe): logging.info( 'Updating minute bin data for {s:s} ({idx:d}/{n:d})...'.format( s=stock, idx=i + 1, n=nStocks)) data = stockTrading.getBinData(stock, dataDate=asOfDate) mongoDate = dt.datetime.combine(asOfDate, dt.datetime.min.time()) record = { 'SecID': stock, 'Date': mongoDate, 'Data': data.to_json(), 'Country': 'CN' } db.stocks.update({ 'SecID': stock, 'Date': mongoDate, 'Country': 'CN' }, record, upsert=True) logging.info('All stocks updated.')
def main(): '''Entry point of the job. ''' # runtime date asOfDate = dt.date.today() mongoDate = dt.datetime.combine(asOfDate, dt.datetime.min.time()) # get all stocks in the universe universe = stockApi.getExchangeStockNames(asOfDate) # initialize MongoDB connetion username, password = config.MONGODB_CRED db = mongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'dailyData') nStocks = len(universe) logging.info( 'Daily volume for {ns:d} stocks in total to be updated...'.format( ns=nStocks)) # for daily data, stocks are updated one by one for i, s in enumerate(universe): logging.info( 'Updating daily data for {sec:s} ({idx:d}/{n:d})...'.format( sec=s, idx=i + 1, n=nStocks)) data = stockTrading.getAdjustedDailyData(s) # compose record record = { 'SecID': s, 'Data': data.to_json(), 'LastModified': mongoDate, 'Country': 'CN' } # save data to mongo db.stocks.update({'SecID': s, 'Country': 'CN'}, record, upsert=True) logging.info('Daily data updated done.')
def main(): '''Main body of the job. ''' asOfDate = dt.date(2016, 4, 25) endDate = dt.datetime.combine(dt.date(2016, 4, 22), dt.datetime.min.time()) # get all available futures since the very beginning. futuresInfo = futuresApi.getFuturesInformation(asOfDate, listed=False) # initialize MongoDB connection username, password = config.MONGODB_CRED db = mongodb.getAuthenticatedConnection(config.MONGODB_URL, config.MONGODB_PORT, username, password, 'binData') # contract metadata contractMeta = list( zip(futuresInfo.ticker, futuresInfo.secID, futuresInfo.listDate, futuresInfo.lastTradeDate)) # contractMeta = contractMeta[ 23 : ] nContracts = len(contractMeta) for i, meta in enumerate(contractMeta): instId, secId, listedDate, tradeDate = meta logging.info('Processing futures {f:s} ({i:d}/{n:d})...'.format( f=secId, i=i + 1, n=nContracts)) if listedDate <= '2012-01-01': logging.warning( 'Too senior contract for {sec:s}.'.format(sec=secId)) continue # normalize instrument ID instId = instId.upper() startDate = dt.datetime.strptime(listedDate, '%Y-%m-%d') endDate = min(endDate, dt.datetime.strptime(tradeDate, '%Y-%m-%d')) curDate = startDate records = [] while curDate <= endDate: logging.info( 'Processing {iid:s} ({i:d}/{n:d}) on {sd:s}...'.format( iid=instId, sd=str(curDate), i=i, n=nContracts)) try: dailyBin = futuresTrading.getBinData(instId, dataDate=curDate) if len(dailyBin) > 0: record = { 'SecID': secId, 'Date': curDate, 'Data': dailyBin.to_json(), 'Country': 'CN' } records.append(record) else: logging.warning('Empty data for {sec:s} on {sd:s}.'.format( sec=secId, sd=str(curDate))) except KeyError as e: logging.warning( 'Error when updating {sec:s} on {sd:s}.'.format( sec=secId, sd=str(curDate))) curDate += dt.timedelta(1) if len(records) > 0: db.futures.insert_many(records) else: logging.warning( 'Empty data to be inserted into database for {sec:s}'.format( sec=secId)) logging.info('Bin data backfill done.')