def getInfoAboutTables(mydb: database.tradingDB = None): ssn = mydb.Session() resultList = [] for idx in mydb.MarketDataInfoTableDataFrame.index: tableORM = mydb.MarketDataInfoTableDataFrame.at[idx, 'tableORM'] tableName = tableORM.__tablename__ lastDateTimeOnDisk = ssn.query(sqlalchemy.func.max( tableORM.datetime)).scalar() largestDiffDateTimeInMinutes = ssn.query( sqlalchemy.func.max(tableORM.diffToNextRowInMinutes)).scalar() # largestDiffDateTimeRow = ssn.query(tableORM).order_by(tableORM.diffToNextRowInMinutes.desc()).first() dateTimeOflargestDiffDateTimeInMinutesRecord = ssn.query( tableORM.datetime).order_by(tableORM.datetime).filter( tableORM.diffToNextRowInMinutes == largestDiffDateTimeInMinutes).first() if dateTimeOflargestDiffDateTimeInMinutesRecord is not None: dateTimeOflargestDiffDateTimeInMinutes = dateTimeOflargestDiffDateTimeInMinutesRecord[ 0] pass else: dateTimeOflargestDiffDateTimeInMinutes = pd.NaT pass nRows = ssn.query(tableORM).count() ordrdDct = OrderedDict(( ('tableName', tableName), ('nRows', nRows), ('lastDateTimeOnDisk', lastDateTimeOnDisk), ('largestDiffDateTimeInMinutes', largestDiffDateTimeInMinutes), ('dateTimeOflargestDiffDateTimeInMinutes', dateTimeOflargestDiffDateTimeInMinutes), )) resultList.append(ordrdDct) pass # t0 = time.time() # for idx in mydb.MarketDataInfoTableDataFrame.index: # tableORM = mydb.MarketDataInfoTableDataFrame.at[idx, 'tableORM'] # tableName = tableORM.__tablename__ # # lastDateTimeOnDisk = ssn.query(sqlalchemy.func.max(tableORM.datetime)).scalar() # # largestDiffDateTime = ssn.query(sqlalchemy.func.max(tableORM.diffToNextRowInMinutes)).scalar() # # nRows = ssn.query(tableORM).count() # # t1 = time.time() # print(idx,t1-t0,tableName, lastDateTimeOnDisk) # pass ssn.commit() ssn.close() df = pd.DataFrame(resultList) return df
def getCompleteDFFromDateRangeAndTableName(tableName: str = None, mydb: database.tradingDB = None, dateRange: pd.DatetimeIndex = None): """retrieve training data for one one table""" tableORM = mydb.getTableORMByTablename(tableName) df = getDataFromDateRange(tableSchema=tableORM.__table__, mydb=mydb, dateRange=dateRange) dfNew = processRawDataFrameAsResultFromGetDataFromRangeToTrainginData( tableName=tableName, df=df, referenceDateTime=dateRange[-1]) return dfNew
def persistMarketDataBarsOnDisk(bars, mydb: database.tradingDB, tableSchema, doCorrection=True): """write the bars to disk. update the data. also calculate time differences between successive rows""" if bars is not None and len(bars) > 0: tStart = pd.datetime.now() # convert bars to dataframe df = util.df(bars) # rename columns (the 'date' column defined in the bars is the 'datetime' column in the tables) df.rename(columns={'date': 'datetime'}, inplace=True) # calculate the time difference between rows # calculate the difference in time between a row and the next row df.loc[:, 'diffToNextRowInMinutes'] = df.datetime.diff().shift( -1) / pd.Timedelta(1, 'm') # the last difference is zero df.iloc[-1, df.columns.get_loc('diffToNextRowInMinutes')] = 0 # upsert the dataframe if tableSchema is not None: mydb.upsertDataFrame(df, tableSchema) # repair datetime on disk. # it can happen that the data fetched contains less or more information # than the data on disk (That has been fetched before). # therefore, the diffDateTImes need to be corrected for all entries between # the first and the last entries firstDateTime = df.iloc[ 0, df.columns.get_loc('datetime')].tz_localize(None) lastDateTime = df.iloc[ -1, df.columns.get_loc('datetime')].tz_localize(None) # find previous DateTime ssn = mydb.Session() previousDateTime = ssn.query(func.max( tableSchema.c.datetime)).filter( tableSchema.c.datetime < firstDateTime).scalar() ssn.close() # this is None if no such value exists if previousDateTime is None: previousDateTime = firstDateTime if doCorrection: df = mydb.correctDiffDateTimesForMarketDataTable( tableName=tableSchema.name, startDateTime=previousDateTime, endDateTime=lastDateTime, doCorrection=doCorrection) pass pass tEnd = pd.datetime.now() a = ( f'finished treating data. Table: {tableSchema.name}; nRows: {len(bars)}; elapsed time: {tEnd-tStart}' ) _logger.info(a) # print(a) pass pass
async def asyncioJobGetHistoricalData(ib, mydb: database.tradingDB, qcs, additionalArgs={}): """get all available historical data for the contracts specified in qcs""" success = False if not ib.isConnected(): return success # create a session for this task ssn = mydb.Session() # the table with the information about the position in the historical data request task tablePos = mydb.MarketDataHistoricalRequestsPositionTable.__table__ # get the info about where we are in the historical data request task [tableNameOnDisk, startDateTimeOnDiskUTC ] = getCurrentInfoForHistoricalDataRequests(mydb, tablePos) # find the index for the for-loop below tableNames = [ mydb.getValueFromMarketDataInfoTable(whereColumn='conId', whereValue=qc.conId, getColumn='tableName') for qc in qcs ] indexToStart = 0 try: indexToStart = tableNames.index(tableNameOnDisk) except: pass # if we are finished with all data retrieval, remove table content such # that we can start over if indexToStart == len(qcs) - 1: if startDateTimeOnDiskUTC is not None: if startDateTimeOnDiskUTC >= pd.to_datetime( pd.datetime.utcnow()).tz_localize('UTC'): delStmt = tablePos.delete() results = ssn.bind.execute(delStmt) ssn.commit() pass pass pass # retrieve historical data, starting at the position where we were last time this program ran for qc in qcs[indexToStart:]: # this can be a long task if it is not interrupted. # we record the starting time tStart1 = pd.datetime.now() # the conId for this contract conId = qc.conId # the table into which we want to insert tableORM = utils.getValueFromDataFrame( mydb.MarketDataInfoTableDataFrame, whereColumn='conId', whereValue=conId, getColumn='tableORM') # get the earliest dateTime for this conId defined by IB earliestDateTimeUTCAccordingToIB = getEarliestDateTimeUTCFromDataFrame( mydb, conId) # if there is no earliestDateTime according to IB, it makes no sense to try to retrieve data if pd.isnull(earliestDateTimeUTCAccordingToIB): a = ( f'attempting to get historical data: {qc.symbol}, {qc.currency}; not performed because earliestDateTime is not given by IB; time: {tStart1}' ) else: a = ( f'attempting to get historical data: {qc.symbol}, {qc.currency}; is going to be performed because earliestDateTime is given by IB; time: {tStart1}' ) pass _logger.info(a) # we did not break the for loop in the rows above because we wanted to log some items. # we are now breaking the loop if no earliestDateTime is found if pd.isnull(earliestDateTimeUTCAccordingToIB): # it makes no sense to continue if IB has no earliestDateTime for this conId # we therefore skip all code below and continue the for loop, essentially meaning that the next conId # will be treated continue pass # local timezone tzlocal = dateutil.tz.tzlocal() # override the earliestDateTime if an argument is given earliestDateTimeUTC = additionalArgs.get( 'earliestDateTimeUTC', earliestDateTimeUTCAccordingToIB) startDateTimeUTC = earliestDateTimeUTC # if there is inofrmation in the position table, take this information if startDateTimeOnDiskUTC is not None: # only take the information if it concerncs the table that we are currently treating if tableNameOnDisk == tableORM.__tablename__: startDateTimeUTC = startDateTimeOnDiskUTC pass pass # duration time delta durationPandasTimeDeltaDefault = pd.Timedelta(weeks=4) # bar size delta barSizePandasTimeDeltaDefault = pd.Timedelta(minutes=1) # override durations and bar size settings if arguments are given durationPandasTimeDelta = additionalArgs.get( 'durationPandasTimeDelta', durationPandasTimeDeltaDefault) barSizePandasTimeDelta = additionalArgs.get( 'barSizePandasTimeDelta', barSizePandasTimeDeltaDefault) timeOutTimeHistoricalBars = additionalArgs.get( 'timeOutTimeHistoricalBars', 600) jitterSpanDivider = additionalArgs.get('jitterSpanDivider', 50) # create the timeDelta objects that generate the IB strings dTDRegular = utils.CTimeDelta(durationPandasTimeDelta) dTD = dTDRegular bTD = utils.CTimeDelta(barSizePandasTimeDelta) diffTimeInMinutes = int(bTD._timeDelta / pd.Timedelta(minutes=1)) while (startDateTimeUTC is not None) and ((startDateTimeUTC) <= pd.to_datetime( pd.datetime.utcnow()).tz_localize('UTC').ceil('1 Min')): a = (f'startDateTIme Original {startDateTimeUTC}') _logger.info(a) # calculate the regular endDateTIme endDateTimeUTC = startDateTimeUTC + dTD._timeDelta endDateTimeUTCRegular = endDateTimeUTC a = (f'calculated endDateTime {endDateTimeUTC}') _logger.info(a) # initialize the durationTimeDelta Object dTD = dTDRegular if 0: ## skipping data that already exists # Idea: if the startDateTime is on disk, then # advance startDateTime to the beginning of the next gap (if a next gap is found) newStartDateTimeUTC = getNewStartDateTime( ssn=ssn, tableORM=tableORM, oldStartDateTimeUTC=startDateTimeUTC, diffTimeInMinutes=diffTimeInMinutes) if newStartDateTimeUTC is not None: a = ( f'advancing to new StartDateTime {newStartDateTimeUTC}' ) _logger.info(a) startDateTimeUTC = newStartDateTimeUTC # continue the while loop continue pass # the recalculated startDateTime above has placed us in a gap by design # test that this is correct startDateTimeIsOnDisk = bool( ssn.query(tableORM).filter( tableORM.datetime == startDateTimeUTC).count() == 1) assert (not startDateTimeIsOnDisk) # now find the end of the gap rowWhereGapStops = ssn.query(tableORM).filter( tableORM.diffToNextRowInMinutes <= diffTimeInMinutes ).filter(tableORM.datetime >= startDateTimeUTC).order_by( tableORM.datetime).first() if rowWhereGapStops is not None: # we found a datetime where the gap stops a = ( f'found a row where the gap stops. datetime: {rowWhereGapStops.datetime}' ) _logger.info(a) pass else: # we found no datetime where the gap stops a = (f'found no row where the gap stops.') _logger.info(a) pass if rowWhereGapStops is not None: # because we found a row where the gap stops, we will only request data until that timepoint (if the resulting duration is smaller than the current regular duration) endDateTimeWhereGapStopsUTC = pd.to_datetime( rowWhereGapStops.datetime).tz_localize('UTC') a = ( f'endDatetime calculated using the end of the gap: {endDateTimeWhereGapStopsUTC}' ) _logger.info(a) endDateTimeUTC = min(endDateTimeUTCRegular, endDateTimeWhereGapStopsUTC) a = ( f'now setting a new endDateTime that is the minumum of the endDateTime where the gap stops and the regular endDateTime : {endDateTimeUTC}' ) _logger.info(a) # because we have redefined the endDateTime, we have to generate a revised TimeDelta object pandasTimeDelta = pd.Timedelta( seconds=(endDateTimeUTC - startDateTimeUTC).total_seconds()) dTD = utils.CTimeDelta(pandasTimeDelta) a = ( f'generated a new TimeDelta object. IBDurationString :{dTD.IB_Duration_String}' ) _logger.info(a) pass else: # we found no row where the gap stops a = ( f'no new endDatetime calculated using the end of the gap because no end of gap was found' ) _logger.info(a) pass # add some jitter to the startDate to make sure we are not always requesting the same period durationTimeInMinutes = int( (endDateTimeUTC - startDateTimeUTC) / pd.Timedelta(1, 'm')) jitterSpan = durationTimeInMinutes / jitterSpanDivider jitterInMinutes = randint(0, int(jitterSpan)) jitterTimeDelta = pd.Timedelta(jitterInMinutes, 'm') startDateTimeUTC = startDateTimeUTC - jitterTimeDelta # because we now have a startDateTime that is prior to the original one, we are extending the duration for which we want to fetch data # such that the original endDateTime is met pandasTimeDelta = dTD._timeDelta pandasTimeDelta = pandasTimeDelta + jitterTimeDelta dTD = utils.CTimeDelta(pandasTimeDelta) # specify the endDateTime endDateTimeUTC = startDateTimeUTC + dTD._timeDelta # log info about chunk getting tStart2 = pd.datetime.now() a = ( f'attempting to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC}; endDT: {endDateTimeUTC}; durationString: {dTD.IB_Duration_String}; time: {tStart2}' ) _logger.info(a) # get the historical bars endDateTimeLocal = endDateTimeUTC.tz_convert(tzlocal) endDateTimeLocalNaive = endDateTimeLocal.tz_localize(None) bars = None timeOutOccured = None [bars, timeOutOccured ] = getHistoricalDataBars(ib, qc, endDateTime=endDateTimeLocalNaive, durationStr=dTD.IB_Duration_String, barSizeSetting=bTD.IB_Bar_Size_String, timeOutTime=timeOutTimeHistoricalBars) if timeOutOccured is not None and timeOutOccured: a = ( f'Timeout while requesting historical bars for contract {qc}' ) _logger.warn(a) pass # persist the result on disk # upsert the data persistMarketDataBarsOnDisk(bars, mydb, tableORM.__table__, doCorrection=True) # calculate the number of rows that have been retrieved nRows = 0 if bars is not None: nRows = len(bars) pass # calculate the total number of rows on disk nRowsTotal = ssn.query(tableORM).count() # write the information about where we are to disk endDateTimeUTCNaiveAsDateTime = endDateTimeUTC.tz_localize( None).to_pydatetime() # remove the old row delStmt = tablePos.delete() results = ssn.bind.execute(delStmt) ssn.commit() # add this row insStmt = tablePos.insert().values( tableName=tableORM.__tablename__, endDateTime=endDateTimeUTCNaiveAsDateTime, durationStringOnLastUpdate=dTD.IB_Duration_String, numberOfTicksRetrieved=nRows, lastUpdateDateTimeLocal=pd.datetime.now(), ) results = ssn.bind.execute(insStmt) ssn.commit() # log info about the end of bar data retrieval tEnd2 = pd.datetime.now() tDelta2 = tEnd2 - tStart2 a = ( f'finished to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC}; endDT: {endDateTimeUTC}; durationString: {dTD.IB_Duration_String}; time: {tEnd2}; elapsedTime: {tDelta2}; rows: {nRows}; rowsTotal: {nRowsTotal}' ) _logger.info(a) # update the startDateTime for the next iteration of the loop startDateTimeUTC = endDateTimeUTC # as this ia a potentially very long running loop, make a little pause to # allow other asyncio processes (for example, the processes that tet recent market data) # to also get a piece of the processor await asyncio.sleep(0.01) pass tEnd1 = pd.datetime.now() tDelta1 = tEnd1 - tStart1 a = ( f'finished to get historical data: {qc.symbol}, {qc.currency}; time: {tEnd1}; elapsedTime: {tDelta1}' ) _logger.info(a) pass success = True ssn.close() return success
async def asyncioJobGetRecentData(ib: IB, mydb: database.tradingDB, qc, additionalArgs={}): """get recent historical data for the contract specified in qc""" success = False a = ( f'attempting to get recent historical data chunk: {qc.symbol}, {qc.currency}' ) _logger.info(a) if not (isinstance(ib, IB) and ib.isConnected()): return success conId = qc.conId tableORM = utils.getValueFromDataFrame( mydb.MarketDataInfoTableDataFrame, whereColumn='conId', whereValue=conId, getColumn='tableORM') lastDateTimeOnDiskUTC = getLastDateTimeOnDiskUTC( mydb, tableORM.__table__) earliestDateTimeUTCAccordingToIB = getEarliestDateTimeUTCFromDataFrame( mydb, conId) # if there is no earliestDateTime according to IB, it makes no sense to try to retrieve data if pd.isnull(earliestDateTimeUTCAccordingToIB): success = True return (success) # duration time delta durationMaximumPandasTimeDeltaDefault = pd.Timedelta(weeks=4) # bar size delta barSizePandasTimeDeltaDefault = pd.Timedelta(minutes=1) # override defaults if values have been passed in additionalArgs durationMaximumPandasTimeDelta = additionalArgs.get( 'durationMaximumPandasTimeDelta', durationMaximumPandasTimeDeltaDefault) barSizePandasTimeDelta = additionalArgs.get( 'barSizePandasTimeDelta', barSizePandasTimeDeltaDefault) timeOutTimeHistoricalBars = additionalArgs.get( 'timeOutTimeHistoricalBars', 600) # we wish to get the most recent historical data # the end time is therefore utcnow nowUTC = pd.to_datetime( pd.datetime.utcnow()).tz_localize('UTC').floor('1 Min') endDateTimeUTC = nowUTC # the earliest possible start date time is endtime minus the maximum allowed time delta earliestStartDateTimeUTC = endDateTimeUTC - durationMaximumPandasTimeDelta # by default, the startDateTime is this earliest possible datetime startDateTimeUTC = earliestStartDateTimeUTC # if we found a lastDateTime on Disk, the startDateTime is modified if not pd.isnull(lastDateTimeOnDiskUTC): startDateTimeUTC = max(lastDateTimeOnDiskUTC, earliestStartDateTimeUTC) pass # for some reason, it is possible that the time on disk is in the future. # we therefore determine the startDateTime as the minimum between the startDateTime as calculated before # and the endDateTime startDateTimeUTC = min(startDateTimeUTC, endDateTimeUTC) # we can now calculate the time delta that we actually want to fetch durationPandasTimeDelta = endDateTimeUTC - startDateTimeUTC # define the objects used to calculate the IB strings for time deltas and bar size settings dTD = utils.CTimeDelta(durationPandasTimeDelta) bTD = utils.CTimeDelta(barSizePandasTimeDelta) # log info about the start of bar data retrieval tStart2 = pd.datetime.now() if not pd.isnull(earliestDateTimeUTCAccordingToIB): a = ( f'attempting to get recent historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC} (durationString: {dTD.IB_Duration_String}); time: {tStart2}' ) else: a = ( f'attempting to get recent historical data chunk: {qc.symbol}, {qc.currency}; not performed because earliestDateTime is not given by IB; time: {tStart2}' ) pass _logger.info(a) # get the historical data bars # only get bars if the last value on disk is not utcnow bars = None timeOutOccured = None if not (nowUTC == lastDateTimeOnDiskUTC): [bars, timeOutOccured ] = getHistoricalDataBars(ib, qc, endDateTime='', durationStr=dTD.IB_Duration_String, barSizeSetting=bTD.IB_Bar_Size_String, timeOutTime=timeOutTimeHistoricalBars) if timeOutOccured is not None and timeOutOccured: a = (f'Timeout while requesting historical bars for contract {qc}') _logger.warn(a) pass # calculate the number of rows that have been retrieved nRows = 0 if bars is not None: nRows = len(bars) pass # persist result on disk # upsert the data persistMarketDataBarsOnDisk(bars, mydb, tableORM.__table__, doCorrection=True) # calculate the total number of rows on disk ssn = mydb.Session() nRowsTotal = ssn.query(tableORM.__table__).count() ssn.close() # log info about the end of bar data retrieval tEnd2 = pd.datetime.now() tDelta2 = tEnd2 - tStart2 a = ( f'finished to get recent historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC} (durationString: {dTD.IB_Duration_String}); time: {tEnd2}; elapsedTime: {tDelta2}; rows: {nRows}; rowsTotal: {nRowsTotal}' ) _logger.info(a) success = True ssn.close() return success
async def asyncioJobGetHistoricalData(ib, mydb: database.tradingDB, qcs, additionalArgs={}): """get all available historical data for the contracts specified in qcs""" success = False if not ib.isConnected(): return success # a = (f'starting function to get historical data: {[qc.localSymbol for qc in qcs]}') # _logger.info(a) # print(a) # create a session for this task ssn = mydb.Session() # the table with the information about the position in the historical data request task tablePos = mydb.MarketDataHistoricalRequestsPositionTable.__table__ # get the info about where we are in the historical data request task [tableNameOnDisk, startDateTimeOnDiskUTCNaiveFloor ] = getCurrentInfoForHistoricalDataRequests(mydb, tablePos) # find the index for the for-loop below tableNames = [ mydb.getValueFromMarketDataInfoTable(whereColumn='conId', whereValue=qc.conId, getColumn='tableName') for qc in qcs ] indexToStart = 0 try: indexToStart = tableNames.index(tableNameOnDisk) except: pass # get the additional args # duration time delta durationPandasTimeDeltaDefault = pd.Timedelta(weeks=4) # bar size delta barSizePandasTimeDeltaDefault = pd.Timedelta(minutes=1) # override durations and bar size settings if arguments are given durationPandasTimeDelta = additionalArgs.get( 'durationPandasTimeDelta', durationPandasTimeDeltaDefault) barSizePandasTimeDelta = additionalArgs.get('barSizePandasTimeDelta', barSizePandasTimeDeltaDefault) timeOutTimeHistoricalBars = additionalArgs.get('timeOutTimeHistoricalBars', 600) jitterSpanFraction = additionalArgs.get('jitterSpanFraction', 0.02) # if we are finished with all data retrieval, remove table content such # that we can start over startOver = False if indexToStart == len(qcs) - 1: if pd.isnull(tableNameOnDisk): # no content in the table. Start over startOver = True pass else: if pd.isnull(startDateTimeOnDiskUTCNaiveFloor): # there was a tablename but no time. This probably means we wrote before a tableName that cannot retrieve historical data. Start over. startOver = True pass else: # there is a tableName and a dateTime. We have to check if we are finished with fethcing all data for this table if startDateTimeOnDiskUTCNaiveFloor >= pd.to_datetime( pd.datetime.utcnow()).floor( barSizePandasTimeDelta) - barSizePandasTimeDelta: # we are finished fetching data. Start over startOver = True pass pass pass pass if startOver: delStmt = tablePos.delete() results = ssn.bind.execute(delStmt) ssn.commit() indexToStart = 0 pass # a = (f'index to start: {indexToStart}') # _logger.info(a) # print(a) # retrieve historical data, starting at the position where we were last time this program ran for qc in qcs[indexToStart:]: # refresh the info about where we are in the historical data request task [tableNameOnDisk, startDateTimeOnDiskUTCNaiveFloor ] = getCurrentInfoForHistoricalDataRequests(mydb, tablePos) # this can be a long task if it is not interrupted. # we record the starting time tStart1 = pd.datetime.now() # the conId for this contract conId = qc.conId # the table into which we want to insert tableORM = utils.getValueFromDataFrame( mydb.MarketDataInfoTableDataFrame, whereColumn='conId', whereValue=conId, getColumn='tableORM') # get the earliest dateTime for this conId defined by IB # get it directly from IB as this is a check whether we can ask for historical Data for this conId earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly = utils.conformDateTimeToPandasUTCNaive( utils.getEarliestDateTimeFromIBAsDateTime(ib=ib, qualifiedContract=qc, timeOutTime=10)) # if there is no earliestDateTime according to IB, it makes no sense to try to retrieve data if pd.isnull(earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly): a = ( f'attempting to get historical data: {qc.symbol}, {qc.currency}: not performed because earliestDateTime as queried directly from IB is NULL.' ) else: a = ( f'attempting to get historical data: {qc.symbol}, {qc.currency}; is going to be performed because earliestDateTime as queried directly from IB is: {earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly}' ) pass _logger.info(a) # we did not break the for loop in the rows above because we wanted to log some items. # we are now breaking the loop if no earliestDateTime is found if pd.isnull(earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly): # it makes no sense to continue if IB has no earliestDateTime for this conId # we therefore skip all code below and continue the for loop, essentially meaning that the next conId # will be treated # we must update the positiontable of course # remove the old row by removing the table content delStmt = tablePos.delete() results = ssn.bind.execute(delStmt) ssn.commit() insStmt = tablePos.insert().values( tableName=tableORM.__tablename__, endDateTime=None, durationStringOnLastUpdate=None, numberOfTicksRetrieved=None, lastUpdateDateTimeLocal=pd.datetime.now(), ) results = ssn.bind.execute(insStmt) ssn.commit() continue pass # local timezone tzlocal = dateutil.tz.tzlocal() # override the earliestDateTime if an argument is given earliestDateTimeUTCNaiveAccordingToAdditionalArgs = additionalArgs.get( 'earliestDateTimeUTCNaive', None) if (not pd.isnull(earliestDateTimeUTCNaiveAccordingToAdditionalArgs)): earliestDateTimeUTCNaive = earliestDateTimeUTCNaiveAccordingToAdditionalArgs else: earliestDateTimeUTCNaive = earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly pass startDateTimeUTCNaive = earliestDateTimeUTCNaive # if there is information in the position table, take this information if not pd.isnull(startDateTimeOnDiskUTCNaiveFloor): # only take the information if it concerns the table that we are currently treating if tableNameOnDisk == tableORM.__tablename__: if startDateTimeOnDiskUTCNaiveFloor > startDateTimeUTCNaive: # only correct the information from disk if the disk value is larger than the current value startDateTimeUTCNaive = startDateTimeOnDiskUTCNaiveFloor pass pass pass # create the timeDelta objects that generate the IB strings dTDRegular = utils.CTimeDelta(durationPandasTimeDelta) dTD = dTDRegular bTD = utils.CTimeDelta(barSizePandasTimeDelta) diffTimeInMinutes = int( bTD._timeDelta / pd.Timedelta(minutes=1) ) # necessary in case we use the algorithm to search for the newest datetime on disk # now make sure that startDateTime is not larger than nowUTCNaive # this makes sure that we actually retrieve data # this is ok as the loop for a given contract starts below such that we do not interfere with the ending conditions for that loop # and the loop over all contracts is initialized above such that the conditions for that loop are not impacted if startDateTimeUTCNaive >= pd.to_datetime(pd.datetime.utcnow()).floor( barSizePandasTimeDelta) - durationPandasTimeDelta: startDateTimeUTCNaive = pd.to_datetime(pd.datetime.utcnow()).floor( barSizePandasTimeDelta) - durationPandasTimeDelta pass conditionOnStartDateTime = startDateTimeUTCNaive <= pd.to_datetime( pd.datetime.utcnow()).ceil(barSizePandasTimeDelta) while (not pd.isnull(startDateTimeUTCNaive) ) and conditionOnStartDateTime: # a = (f'while start: startDateTimeUTCNaive {startDateTimeUTCNaive}, now {pd.to_datetime(pd.datetime.utcnow()).ceil(barSizePandasTimeDelta)}, condition {conditionOnStartDateTime}') # _logger.info(a) a = (f'startDateTime Original {startDateTimeUTCNaive}') _logger.info(a) # calculate the regular endDateTIme endDateTimeUTCNaive = startDateTimeUTCNaive + dTD._timeDelta endDateTimeUTCNaiveRegular = endDateTimeUTCNaive # necessary in case we use the algorithm to search for the newest datetime on disk a = (f'calculated endDateTime {endDateTimeUTCNaive}') _logger.info(a) # initialize the durationTimeDelta Object dTD = dTDRegular if 0: # search for the newest datetime on disk and only request data for periods that start from that point ## skipping data that already exists # Idea: if the startDateTime is on disk, then # advance startDateTime to the beginning of the next gap (if a next gap is found) newStartDateTimeUTCNaive = getNewStartDateTime( ssn=ssn, tableORM=tableORM, oldStartDateTimeUTCNaive=startDateTimeUTCNaive, diffTimeInMinutes=diffTimeInMinutes) if newStartDateTimeUTCNaive is not None: a = ( f'advancing to new StartDateTime {newStartDateTimeUTCNaive}' ) _logger.info(a) startDateTimeUTCNaive = newStartDateTimeUTCNaive # continue the while loop continue pass # the recalculated startDateTime above has placed us in a gap by design # test that this is correct startDateTimeIsOnDisk = bool( ssn.query(tableORM).filter( tableORM.datetime == startDateTimeUTCNaive).count() == 1) assert (not startDateTimeIsOnDisk) # now find the end of the gap rowWhereGapStops = ssn.query(tableORM).filter( tableORM.diffToNextRowInMinutes <= diffTimeInMinutes ).filter(tableORM.datetime >= startDateTimeUTCNaive).order_by( tableORM.datetime).first() if rowWhereGapStops is not None: # we found a datetime where the gap stops a = ( f'found a row where the gap stops. datetime: {rowWhereGapStops.datetime}' ) _logger.info(a) pass else: # we found no datetime where the gap stops a = (f'found no row where the gap stops.') _logger.info(a) pass if rowWhereGapStops is not None: # because we found a row where the gap stops, we will only request data until that timepoint (if the resulting duration is smaller than the current regular duration) endDateTimeWhereGapStopsUTCNaive = pd.to_datetime( rowWhereGapStops.datetime) a = ( f'endDatetime calculated using the end of the gap: {endDateTimeWhereGapStopsUTCNaive}' ) _logger.info(a) endDateTimeUTCNaive = min( endDateTimeUTCNaiveRegular, endDateTimeWhereGapStopsUTCNaive) a = ( f'now setting a new endDateTime that is the minumum of the endDateTime where the gap stops and the regular endDateTime : {endDateTimeUTCNaive}' ) _logger.info(a) # because we have redefined the endDateTime, we have to generate a revised TimeDelta object pandasTimeDelta = pd.Timedelta( seconds=(endDateTimeUTCNaive - startDateTimeUTCNaive).total_seconds()) dTD = utils.CTimeDelta(pandasTimeDelta) a = ( f'generated a new TimeDelta object. IBDurationString :{dTD.IB_Duration_String}' ) _logger.info(a) pass else: # we found no row where the gap stops a = ( f'no new endDatetime calculated using the end of the gap because no end of gap was found' ) _logger.info(a) pass # add some jitter to the startDate to make sure we are not always requesting the same period durationTimeInMinutes = int( (endDateTimeUTCNaive - startDateTimeUTCNaive) / pd.Timedelta(1, 'm')) jitterSpan = durationTimeInMinutes * jitterSpanFraction jitterInMinutes = randint(0, int(jitterSpan)) jitterTimeDelta = pd.Timedelta(jitterInMinutes, 'm') dTD = utils.CTimeDelta(durationPandasTimeDelta - jitterTimeDelta) # specify the endDateTime endDateTimeUTCNaive = startDateTimeUTCNaive + dTD._timeDelta # log info about chunk getting tStart2 = pd.datetime.now() a = ( f'attempting to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTCNaive}; endDT: {endDateTimeUTCNaive}; durationString: {dTD.IB_Duration_String}; timeout: {timeOutTimeHistoricalBars}' ) _logger.info(a) # print(a) # get the historical bars endDateTimeLocalNaive = endDateTimeUTCNaive.tz_localize( 'UTC').tz_convert(tzlocal).tz_localize(None) bars = None timeOutOccured = None [bars, timeOutOccured ] = getHistoricalDataBars(ib, qc, endDateTime=endDateTimeLocalNaive, durationStr=dTD.IB_Duration_String, barSizeSetting=bTD.IB_Bar_Size_String, timeOutTime=timeOutTimeHistoricalBars) if timeOutOccured is not None and timeOutOccured: a = ( f'Timeout while requesting historical bars for contract {qc}' ) _logger.warn(a) pass # it could be that the bars contain the current datetime. # we do not want to use this value in the update as it is still submect to change. # we therefore check if the date of last bars is the current date and remove the corresponding row if that # is the case if bars is not None and len(bars) > 0: lastBarDateTime = utils.conformDateTimeToPandasUTCNaive( pd.to_datetime(bars[-1].date)) _nowUTCNaiveFloor = pd.to_datetime( pd.datetime.utcnow()).floor(barSizePandasTimeDelta) if lastBarDateTime == _nowUTCNaiveFloor: bars = bars[:-1] pass pass # persist the result on disk # upsert the data persistMarketDataBarsOnDisk(bars, mydb, tableORM.__table__, doCorrection=True) # calculate the number of rows that have been retrieved nRows = 0 if bars is not None: nRows = len(bars) pass # calculate the total number of rows on disk nRowsTotal = ssn.query(tableORM).count() # # write the information about where we are to disk # endDateTimeUTCNaiveAsDateTime = endDateTimeUTCNaive.to_pydatetime() # ib.sleep(2) # a = f'before removing: {ssn.query(tablePos).all()}' # print(a) # _logger.info(a) # # ib.sleep(2) # remove the old row by removing the table content delStmt = tablePos.delete() results = ssn.bind.execute(delStmt) ssn.commit() # a = f'after removing: {ssn.query(tablePos).all()}' # print(a) # _logger.info(a) # # ib.sleep(2) # add this row to update the position in the positiongtable insStmt = tablePos.insert().values( tableName=tableORM.__tablename__, endDateTime=endDateTimeUTCNaive, durationStringOnLastUpdate=dTD.IB_Duration_String, numberOfTicksRetrieved=nRows, lastUpdateDateTimeLocal=pd.datetime.now(), ) results = ssn.bind.execute(insStmt) ssn.commit() # a = f'after udpating: {ssn.query(tablePos).all()}' # print(a) # _logger.info(a) # # ib.sleep(2) # log info about the end of bar data retrieval tEnd2 = pd.datetime.now() tDelta2 = tEnd2 - tStart2 a = ( f'finished to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTCNaive}; endDT: {endDateTimeUTCNaive}; durationString: {dTD.IB_Duration_String}; elapsedTime: {tDelta2}; rows: {nRows}; rowsTotal: {nRowsTotal}' ) _logger.info(a) # update the startDateTime for the next iteration of the loop startDateTimeUTCNaive = endDateTimeUTCNaive # as this ia a potentially very long running loop, make a little pause to # allow other asyncio processes (for example, the processes that tet recent market data) # to also get a piece of the processor ib.sleep(0.01) conditionOnStartDateTime = startDateTimeUTCNaive <= pd.to_datetime( pd.datetime.utcnow()).ceil(barSizePandasTimeDelta) # a = (f'while end: startDateTimeUTCNaive {startDateTimeUTCNaive}, now {pd.to_datetime(pd.datetime.utcnow()).ceil(barSizePandasTimeDelta)}, condition {conditionOnStartDateTime}') # _logger.info(a) # print(a) pass tEnd1 = pd.datetime.now() tDelta1 = tEnd1 - tStart1 a = ( f'finished to get historical data: {qc.symbol}, {qc.currency}; elapsedTime: {tDelta1}' ) _logger.info(a) # print(a) pass success = True ssn.close() # a = (f'finished function to get historical data') # _logger.info(a) # print(a) return success
def getDataFromDateRange(tableSchema: sqlalchemy.Table, mydb: database.tradingDB, dateRange: pd.DatetimeIndex) -> pd.DataFrame: tt1 = time.time() eng = mydb.DBEngine eng.dispose() ssn = mydb.Session() results = [] for _DT in dateRange: __DT = _DT.to_pydatetime() _close = None _low = None _high = None _open = None tm = None qsf = (ssn.query(tableSchema.c.close, tableSchema.c.low, tableSchema.c.high, tableSchema.c.high, tableSchema.c.datetime).order_by( tableSchema.c.datetime.desc()).filter( tableSchema.c.datetime <= __DT)).first() if qsf is not None: _close, _low, _high, _open, tm = qsf else: qsf = (ssn.query(tableSchema.c.close, tableSchema.c.low, tableSchema.c.high, tableSchema.c.high, tableSchema.c.datetime).order_by( tableSchema.c.datetime).filter( tableSchema.c.datetime > __DT)).first() if qsf is not None: _close, _low, _high, _open, tm = qsf pass pass tDiff = None try: tDiff = tm - __DT except: pass ordrdDct = OrderedDict( (('target', __DT), ('achieved', tm), ('mismatchInMinutes', tDiff / pd.Timedelta(1, 'm')), ('close', _close), ('low', _low), ('high', _high), ('open', _open))) results.append(ordrdDct) pass df = pd.DataFrame(results) df.sort_index(inplace=True) tt2 = time.time() ttdiff = tt2 - tt1 nRowsTotal = nRowsTotal = ssn.query(tableSchema).count() ssn.close() print( f'Getting data from table: {tableSchema}: rowsTotal: {nRowsTotal}; {ttdiff}' ) return (df)