Beispiel #1
0
def getInfoAboutTables(mydb: database.tradingDB = None):

    ssn = mydb.Session()

    resultList = []

    for idx in mydb.MarketDataInfoTableDataFrame.index:
        tableORM = mydb.MarketDataInfoTableDataFrame.at[idx, 'tableORM']
        tableName = tableORM.__tablename__
        lastDateTimeOnDisk = ssn.query(sqlalchemy.func.max(
            tableORM.datetime)).scalar()
        largestDiffDateTimeInMinutes = ssn.query(
            sqlalchemy.func.max(tableORM.diffToNextRowInMinutes)).scalar()
        # largestDiffDateTimeRow = ssn.query(tableORM).order_by(tableORM.diffToNextRowInMinutes.desc()).first()
        dateTimeOflargestDiffDateTimeInMinutesRecord = ssn.query(
            tableORM.datetime).order_by(tableORM.datetime).filter(
                tableORM.diffToNextRowInMinutes ==
                largestDiffDateTimeInMinutes).first()
        if dateTimeOflargestDiffDateTimeInMinutesRecord is not None:
            dateTimeOflargestDiffDateTimeInMinutes = dateTimeOflargestDiffDateTimeInMinutesRecord[
                0]
            pass
        else:
            dateTimeOflargestDiffDateTimeInMinutes = pd.NaT
            pass

        nRows = ssn.query(tableORM).count()
        ordrdDct = OrderedDict((
            ('tableName', tableName),
            ('nRows', nRows),
            ('lastDateTimeOnDisk', lastDateTimeOnDisk),
            ('largestDiffDateTimeInMinutes', largestDiffDateTimeInMinutes),
            ('dateTimeOflargestDiffDateTimeInMinutes',
             dateTimeOflargestDiffDateTimeInMinutes),
        ))
        resultList.append(ordrdDct)
        pass

    # t0 = time.time()
    # for idx in mydb.MarketDataInfoTableDataFrame.index:
    #     tableORM = mydb.MarketDataInfoTableDataFrame.at[idx, 'tableORM']
    #     tableName = tableORM.__tablename__
    #
    #     lastDateTimeOnDisk = ssn.query(sqlalchemy.func.max(tableORM.datetime)).scalar()
    #     # largestDiffDateTime = ssn.query(sqlalchemy.func.max(tableORM.diffToNextRowInMinutes)).scalar()
    #     # nRows = ssn.query(tableORM).count()
    #
    #     t1 = time.time()
    #     print(idx,t1-t0,tableName, lastDateTimeOnDisk)
    #     pass

    ssn.commit()
    ssn.close()

    df = pd.DataFrame(resultList)

    return df
Beispiel #2
0
def getCompleteDFFromDateRangeAndTableName(tableName: str = None,
                                           mydb: database.tradingDB = None,
                                           dateRange: pd.DatetimeIndex = None):
    """retrieve training data for one one table"""

    tableORM = mydb.getTableORMByTablename(tableName)

    df = getDataFromDateRange(tableSchema=tableORM.__table__,
                              mydb=mydb,
                              dateRange=dateRange)

    dfNew = processRawDataFrameAsResultFromGetDataFromRangeToTrainginData(
        tableName=tableName, df=df, referenceDateTime=dateRange[-1])

    return dfNew
Beispiel #3
0
def persistMarketDataBarsOnDisk(bars,
                                mydb: database.tradingDB,
                                tableSchema,
                                doCorrection=True):
    """write the bars to disk.
    update the data.
    also calculate time differences between successive rows"""
    if bars is not None and len(bars) > 0:
        tStart = pd.datetime.now()
        # convert bars to dataframe
        df = util.df(bars)
        # rename columns (the 'date' column defined in the bars is the 'datetime' column in the tables)
        df.rename(columns={'date': 'datetime'}, inplace=True)
        # calculate the time difference between rows

        # calculate the difference in time between a row and the next row
        df.loc[:, 'diffToNextRowInMinutes'] = df.datetime.diff().shift(
            -1) / pd.Timedelta(1, 'm')
        # the last difference is zero
        df.iloc[-1, df.columns.get_loc('diffToNextRowInMinutes')] = 0

        # upsert the dataframe
        if tableSchema is not None:
            mydb.upsertDataFrame(df, tableSchema)

            # repair datetime on disk.
            # it can happen that the data fetched contains less or more information
            # than the data on disk (That has been fetched before).
            # therefore, the diffDateTImes need to be corrected for all entries between
            # the first and the last entries

            firstDateTime = df.iloc[
                0, df.columns.get_loc('datetime')].tz_localize(None)
            lastDateTime = df.iloc[
                -1, df.columns.get_loc('datetime')].tz_localize(None)

            # find previous DateTime
            ssn = mydb.Session()
            previousDateTime = ssn.query(func.max(
                tableSchema.c.datetime)).filter(
                    tableSchema.c.datetime < firstDateTime).scalar()
            ssn.close()
            # this is None if no such value exists
            if previousDateTime is None:
                previousDateTime = firstDateTime

            if doCorrection:
                df = mydb.correctDiffDateTimesForMarketDataTable(
                    tableName=tableSchema.name,
                    startDateTime=previousDateTime,
                    endDateTime=lastDateTime,
                    doCorrection=doCorrection)
                pass

            pass

        tEnd = pd.datetime.now()
        a = (
            f'finished treating data. Table: {tableSchema.name}; nRows: {len(bars)}; elapsed time: {tEnd-tStart}'
        )
        _logger.info(a)
        # print(a)
        pass
    pass
Beispiel #4
0
async def asyncioJobGetHistoricalData(ib,
                                      mydb: database.tradingDB,
                                      qcs,
                                      additionalArgs={}):
    """get all available historical data for the contracts specified in qcs"""
    success = False
    if not ib.isConnected():
        return success

    # create a session for this task
    ssn = mydb.Session()

    # the table with the information about the position in the historical data request task
    tablePos = mydb.MarketDataHistoricalRequestsPositionTable.__table__

    # get the info about where we are in the historical data request task
    [tableNameOnDisk, startDateTimeOnDiskUTC
     ] = getCurrentInfoForHistoricalDataRequests(mydb, tablePos)

    # find the index for the for-loop below
    tableNames = [
        mydb.getValueFromMarketDataInfoTable(whereColumn='conId',
                                             whereValue=qc.conId,
                                             getColumn='tableName')
        for qc in qcs
    ]
    indexToStart = 0
    try:
        indexToStart = tableNames.index(tableNameOnDisk)
    except:
        pass

    # if we are finished with all data retrieval, remove table content such
    # that we can start over
    if indexToStart == len(qcs) - 1:
        if startDateTimeOnDiskUTC is not None:
            if startDateTimeOnDiskUTC >= pd.to_datetime(
                    pd.datetime.utcnow()).tz_localize('UTC'):
                delStmt = tablePos.delete()
                results = ssn.bind.execute(delStmt)
                ssn.commit()
                pass
            pass
        pass

    # retrieve historical data, starting at the position where we were last time this program ran
    for qc in qcs[indexToStart:]:
        # this can be a long task if it is not interrupted.

        # we record the starting time
        tStart1 = pd.datetime.now()

        # the conId for this contract
        conId = qc.conId

        # the table into which we want to insert
        tableORM = utils.getValueFromDataFrame(
            mydb.MarketDataInfoTableDataFrame,
            whereColumn='conId',
            whereValue=conId,
            getColumn='tableORM')

        # get the earliest dateTime for this conId defined by IB
        earliestDateTimeUTCAccordingToIB = getEarliestDateTimeUTCFromDataFrame(
            mydb, conId)
        # if there is no earliestDateTime according to IB, it makes no sense to try to retrieve data
        if pd.isnull(earliestDateTimeUTCAccordingToIB):
            a = (
                f'attempting to get historical data: {qc.symbol}, {qc.currency}; not performed because earliestDateTime is not given by IB; time: {tStart1}'
            )
        else:
            a = (
                f'attempting to get historical data: {qc.symbol}, {qc.currency}; is going to be performed because earliestDateTime is given by IB; time: {tStart1}'
            )
            pass

        _logger.info(a)

        # we did not break the for loop in the rows above because we wanted to log some items.
        # we are now breaking the loop if no earliestDateTime is found
        if pd.isnull(earliestDateTimeUTCAccordingToIB):
            # it makes no sense to continue if IB has no earliestDateTime for this conId
            # we therefore skip all code below and continue the for loop, essentially meaning that the next conId
            # will be treated
            continue
            pass

        # local timezone
        tzlocal = dateutil.tz.tzlocal()

        # override the earliestDateTime if an argument is given
        earliestDateTimeUTC = additionalArgs.get(
            'earliestDateTimeUTC', earliestDateTimeUTCAccordingToIB)

        startDateTimeUTC = earliestDateTimeUTC
        # if there is inofrmation in the position table, take this information
        if startDateTimeOnDiskUTC is not None:
            # only take the information if it concerncs the table that we are currently treating
            if tableNameOnDisk == tableORM.__tablename__:
                startDateTimeUTC = startDateTimeOnDiskUTC
                pass
            pass

        # duration time delta
        durationPandasTimeDeltaDefault = pd.Timedelta(weeks=4)
        # bar size delta
        barSizePandasTimeDeltaDefault = pd.Timedelta(minutes=1)

        # override durations and bar size settings if arguments are given
        durationPandasTimeDelta = additionalArgs.get(
            'durationPandasTimeDelta', durationPandasTimeDeltaDefault)
        barSizePandasTimeDelta = additionalArgs.get(
            'barSizePandasTimeDelta', barSizePandasTimeDeltaDefault)
        timeOutTimeHistoricalBars = additionalArgs.get(
            'timeOutTimeHistoricalBars', 600)
        jitterSpanDivider = additionalArgs.get('jitterSpanDivider', 50)

        # create the timeDelta objects that generate the IB strings
        dTDRegular = utils.CTimeDelta(durationPandasTimeDelta)
        dTD = dTDRegular
        bTD = utils.CTimeDelta(barSizePandasTimeDelta)
        diffTimeInMinutes = int(bTD._timeDelta / pd.Timedelta(minutes=1))

        while (startDateTimeUTC
               is not None) and ((startDateTimeUTC) <= pd.to_datetime(
                   pd.datetime.utcnow()).tz_localize('UTC').ceil('1 Min')):

            a = (f'startDateTIme Original {startDateTimeUTC}')
            _logger.info(a)

            # calculate the regular endDateTIme
            endDateTimeUTC = startDateTimeUTC + dTD._timeDelta
            endDateTimeUTCRegular = endDateTimeUTC
            a = (f'calculated endDateTime {endDateTimeUTC}')
            _logger.info(a)

            # initialize the durationTimeDelta Object
            dTD = dTDRegular

            if 0:
                ## skipping data that already exists
                # Idea: if the startDateTime is on disk, then
                #          advance startDateTime to the beginning of the next gap (if a next gap is found)
                newStartDateTimeUTC = getNewStartDateTime(
                    ssn=ssn,
                    tableORM=tableORM,
                    oldStartDateTimeUTC=startDateTimeUTC,
                    diffTimeInMinutes=diffTimeInMinutes)
                if newStartDateTimeUTC is not None:
                    a = (
                        f'advancing to new StartDateTime {newStartDateTimeUTC}'
                    )
                    _logger.info(a)
                    startDateTimeUTC = newStartDateTimeUTC
                    # continue the while loop
                    continue
                    pass

                # the recalculated startDateTime above has placed us in a gap by design
                # test that this is correct
                startDateTimeIsOnDisk = bool(
                    ssn.query(tableORM).filter(
                        tableORM.datetime == startDateTimeUTC).count() == 1)
                assert (not startDateTimeIsOnDisk)

                # now find the end of the gap
                rowWhereGapStops = ssn.query(tableORM).filter(
                    tableORM.diffToNextRowInMinutes <= diffTimeInMinutes
                ).filter(tableORM.datetime >= startDateTimeUTC).order_by(
                    tableORM.datetime).first()
                if rowWhereGapStops is not None:
                    # we found a datetime where the gap stops
                    a = (
                        f'found a row where the gap stops. datetime: {rowWhereGapStops.datetime}'
                    )
                    _logger.info(a)
                    pass
                else:
                    # we found no datetime where the gap stops
                    a = (f'found no row where the gap stops.')
                    _logger.info(a)
                    pass

                if rowWhereGapStops is not None:
                    # because we found a row where the gap stops, we will only request data until that timepoint (if the resulting duration is smaller than the current regular duration)
                    endDateTimeWhereGapStopsUTC = pd.to_datetime(
                        rowWhereGapStops.datetime).tz_localize('UTC')
                    a = (
                        f'endDatetime calculated using the end of the gap: {endDateTimeWhereGapStopsUTC}'
                    )
                    _logger.info(a)
                    endDateTimeUTC = min(endDateTimeUTCRegular,
                                         endDateTimeWhereGapStopsUTC)
                    a = (
                        f'now setting a new endDateTime that is the minumum of the endDateTime where the gap stops and the regular endDateTime : {endDateTimeUTC}'
                    )
                    _logger.info(a)
                    # because we have redefined the endDateTime, we have to generate a revised TimeDelta object
                    pandasTimeDelta = pd.Timedelta(
                        seconds=(endDateTimeUTC -
                                 startDateTimeUTC).total_seconds())
                    dTD = utils.CTimeDelta(pandasTimeDelta)
                    a = (
                        f'generated a new TimeDelta object. IBDurationString :{dTD.IB_Duration_String}'
                    )
                    _logger.info(a)

                    pass
                else:
                    # we found no row where the gap stops
                    a = (
                        f'no new endDatetime calculated using the end of the gap because no end of gap was found'
                    )
                    _logger.info(a)
                    pass

            # add some jitter to the startDate to make sure we are not always requesting the same period
            durationTimeInMinutes = int(
                (endDateTimeUTC - startDateTimeUTC) / pd.Timedelta(1, 'm'))
            jitterSpan = durationTimeInMinutes / jitterSpanDivider
            jitterInMinutes = randint(0, int(jitterSpan))
            jitterTimeDelta = pd.Timedelta(jitterInMinutes, 'm')
            startDateTimeUTC = startDateTimeUTC - jitterTimeDelta
            # because we now have a startDateTime that is prior to the original one, we are extending the duration for which we want to fetch data
            # such that the original endDateTime is met
            pandasTimeDelta = dTD._timeDelta
            pandasTimeDelta = pandasTimeDelta + jitterTimeDelta
            dTD = utils.CTimeDelta(pandasTimeDelta)

            # specify the endDateTime
            endDateTimeUTC = startDateTimeUTC + dTD._timeDelta

            # log info about chunk getting
            tStart2 = pd.datetime.now()
            a = (
                f'attempting to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC}; endDT: {endDateTimeUTC}; durationString: {dTD.IB_Duration_String}; time: {tStart2}'
            )
            _logger.info(a)

            # get the historical bars
            endDateTimeLocal = endDateTimeUTC.tz_convert(tzlocal)
            endDateTimeLocalNaive = endDateTimeLocal.tz_localize(None)

            bars = None
            timeOutOccured = None
            [bars, timeOutOccured
             ] = getHistoricalDataBars(ib,
                                       qc,
                                       endDateTime=endDateTimeLocalNaive,
                                       durationStr=dTD.IB_Duration_String,
                                       barSizeSetting=bTD.IB_Bar_Size_String,
                                       timeOutTime=timeOutTimeHistoricalBars)

            if timeOutOccured is not None and timeOutOccured:
                a = (
                    f'Timeout while requesting historical bars for contract {qc}'
                )
                _logger.warn(a)
                pass

            # persist the result on disk
            # upsert the data
            persistMarketDataBarsOnDisk(bars,
                                        mydb,
                                        tableORM.__table__,
                                        doCorrection=True)

            # calculate the number of rows that have been retrieved

            nRows = 0
            if bars is not None:
                nRows = len(bars)
                pass

            # calculate the total number of rows on disk
            nRowsTotal = ssn.query(tableORM).count()

            # write the information about where we are to disk
            endDateTimeUTCNaiveAsDateTime = endDateTimeUTC.tz_localize(
                None).to_pydatetime()
            # remove the old row
            delStmt = tablePos.delete()
            results = ssn.bind.execute(delStmt)
            ssn.commit()
            # add this row
            insStmt = tablePos.insert().values(
                tableName=tableORM.__tablename__,
                endDateTime=endDateTimeUTCNaiveAsDateTime,
                durationStringOnLastUpdate=dTD.IB_Duration_String,
                numberOfTicksRetrieved=nRows,
                lastUpdateDateTimeLocal=pd.datetime.now(),
            )
            results = ssn.bind.execute(insStmt)
            ssn.commit()

            # log info about the end of bar data retrieval
            tEnd2 = pd.datetime.now()
            tDelta2 = tEnd2 - tStart2
            a = (
                f'finished to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC}; endDT: {endDateTimeUTC}; durationString: {dTD.IB_Duration_String}; time: {tEnd2}; elapsedTime: {tDelta2}; rows: {nRows}; rowsTotal: {nRowsTotal}'
            )
            _logger.info(a)

            # update the startDateTime for the next iteration of the loop
            startDateTimeUTC = endDateTimeUTC

            # as this ia a potentially very long running loop, make a little pause to
            # allow other asyncio processes (for example, the processes that tet recent market data)
            # to also get a piece of the processor
            await asyncio.sleep(0.01)

            pass

        tEnd1 = pd.datetime.now()
        tDelta1 = tEnd1 - tStart1
        a = (
            f'finished to get historical data: {qc.symbol}, {qc.currency}; time: {tEnd1}; elapsedTime: {tDelta1}'
        )
        _logger.info(a)
        pass
    success = True
    ssn.close()
    return success
Beispiel #5
0
    async def asyncioJobGetRecentData(ib: IB,
                                      mydb: database.tradingDB,
                                      qc,
                                      additionalArgs={}):
        """get recent historical data for the contract specified in qc"""
        success = False
        a = (
            f'attempting to get recent historical data chunk: {qc.symbol}, {qc.currency}'
        )
        _logger.info(a)

        if not (isinstance(ib, IB) and ib.isConnected()):
            return success

        conId = qc.conId

        tableORM = utils.getValueFromDataFrame(
            mydb.MarketDataInfoTableDataFrame,
            whereColumn='conId',
            whereValue=conId,
            getColumn='tableORM')

        lastDateTimeOnDiskUTC = getLastDateTimeOnDiskUTC(
            mydb, tableORM.__table__)
        earliestDateTimeUTCAccordingToIB = getEarliestDateTimeUTCFromDataFrame(
            mydb, conId)

        # if there is no earliestDateTime according to IB, it makes no sense to try to retrieve data
        if pd.isnull(earliestDateTimeUTCAccordingToIB):
            success = True
            return (success)

        # duration time delta
        durationMaximumPandasTimeDeltaDefault = pd.Timedelta(weeks=4)
        # bar size delta
        barSizePandasTimeDeltaDefault = pd.Timedelta(minutes=1)

        # override defaults if values have been passed in additionalArgs
        durationMaximumPandasTimeDelta = additionalArgs.get(
            'durationMaximumPandasTimeDelta',
            durationMaximumPandasTimeDeltaDefault)
        barSizePandasTimeDelta = additionalArgs.get(
            'barSizePandasTimeDelta', barSizePandasTimeDeltaDefault)
        timeOutTimeHistoricalBars = additionalArgs.get(
            'timeOutTimeHistoricalBars', 600)

        # we wish to get the most recent historical data
        # the end time is therefore utcnow
        nowUTC = pd.to_datetime(
            pd.datetime.utcnow()).tz_localize('UTC').floor('1 Min')
        endDateTimeUTC = nowUTC

        # the earliest possible start date time is endtime minus the maximum allowed time delta
        earliestStartDateTimeUTC = endDateTimeUTC - durationMaximumPandasTimeDelta

        # by default, the startDateTime is this earliest possible datetime
        startDateTimeUTC = earliestStartDateTimeUTC

        # if we found a lastDateTime on Disk, the startDateTime is modified
        if not pd.isnull(lastDateTimeOnDiskUTC):
            startDateTimeUTC = max(lastDateTimeOnDiskUTC,
                                   earliestStartDateTimeUTC)
            pass

        # for some reason, it is possible that the time on disk is in the future.
        # we therefore determine the startDateTime as the minimum between the startDateTime as calculated before
        # and the endDateTime
        startDateTimeUTC = min(startDateTimeUTC, endDateTimeUTC)

        # we can now calculate the time delta that we actually want to fetch
        durationPandasTimeDelta = endDateTimeUTC - startDateTimeUTC

        # define the objects used to calculate the IB strings for time deltas and bar size settings
        dTD = utils.CTimeDelta(durationPandasTimeDelta)
        bTD = utils.CTimeDelta(barSizePandasTimeDelta)

        # log info about the start of bar data retrieval
        tStart2 = pd.datetime.now()
        if not pd.isnull(earliestDateTimeUTCAccordingToIB):
            a = (
                f'attempting to get recent historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC} (durationString: {dTD.IB_Duration_String}); time: {tStart2}'
            )
        else:
            a = (
                f'attempting to get recent historical data chunk: {qc.symbol}, {qc.currency}; not performed because earliestDateTime is not given by IB; time: {tStart2}'
            )
            pass
        _logger.info(a)

        # get the historical data bars
        # only get bars if the last value on disk is not utcnow
        bars = None
        timeOutOccured = None
        if not (nowUTC == lastDateTimeOnDiskUTC):
            [bars, timeOutOccured
             ] = getHistoricalDataBars(ib,
                                       qc,
                                       endDateTime='',
                                       durationStr=dTD.IB_Duration_String,
                                       barSizeSetting=bTD.IB_Bar_Size_String,
                                       timeOutTime=timeOutTimeHistoricalBars)

        if timeOutOccured is not None and timeOutOccured:
            a = (f'Timeout while requesting historical bars for contract {qc}')
            _logger.warn(a)
            pass

        # calculate the number of rows that have been retrieved
        nRows = 0
        if bars is not None:
            nRows = len(bars)
            pass

        # persist result on disk
        # upsert the data
        persistMarketDataBarsOnDisk(bars,
                                    mydb,
                                    tableORM.__table__,
                                    doCorrection=True)

        # calculate the total number of rows on disk
        ssn = mydb.Session()
        nRowsTotal = ssn.query(tableORM.__table__).count()
        ssn.close()

        # log info about the end of bar data retrieval
        tEnd2 = pd.datetime.now()
        tDelta2 = tEnd2 - tStart2
        a = (
            f'finished to get recent historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTC} (durationString: {dTD.IB_Duration_String}); time: {tEnd2}; elapsedTime: {tDelta2}; rows: {nRows}; rowsTotal: {nRowsTotal}'
        )
        _logger.info(a)
        success = True
        ssn.close()
        return success
Beispiel #6
0
async def asyncioJobGetHistoricalData(ib,
                                      mydb: database.tradingDB,
                                      qcs,
                                      additionalArgs={}):
    """get all available historical data for the contracts specified in qcs"""
    success = False
    if not ib.isConnected():
        return success

    # a = (f'starting function to get historical data: {[qc.localSymbol for qc in qcs]}')
    # _logger.info(a)
    # print(a)

    # create a session for this task
    ssn = mydb.Session()

    # the table with the information about the position in the historical data request task
    tablePos = mydb.MarketDataHistoricalRequestsPositionTable.__table__

    # get the info about where we are in the historical data request task
    [tableNameOnDisk, startDateTimeOnDiskUTCNaiveFloor
     ] = getCurrentInfoForHistoricalDataRequests(mydb, tablePos)

    # find the index for the for-loop below
    tableNames = [
        mydb.getValueFromMarketDataInfoTable(whereColumn='conId',
                                             whereValue=qc.conId,
                                             getColumn='tableName')
        for qc in qcs
    ]
    indexToStart = 0
    try:
        indexToStart = tableNames.index(tableNameOnDisk)
    except:
        pass

    # get the additional args
    # duration time delta
    durationPandasTimeDeltaDefault = pd.Timedelta(weeks=4)
    # bar size delta
    barSizePandasTimeDeltaDefault = pd.Timedelta(minutes=1)

    # override durations and bar size settings if arguments are given
    durationPandasTimeDelta = additionalArgs.get(
        'durationPandasTimeDelta', durationPandasTimeDeltaDefault)
    barSizePandasTimeDelta = additionalArgs.get('barSizePandasTimeDelta',
                                                barSizePandasTimeDeltaDefault)
    timeOutTimeHistoricalBars = additionalArgs.get('timeOutTimeHistoricalBars',
                                                   600)
    jitterSpanFraction = additionalArgs.get('jitterSpanFraction', 0.02)

    # if we are finished with all data retrieval, remove table content such
    # that we can start over
    startOver = False
    if indexToStart == len(qcs) - 1:
        if pd.isnull(tableNameOnDisk):
            # no content in the table. Start over
            startOver = True
            pass
        else:
            if pd.isnull(startDateTimeOnDiskUTCNaiveFloor):
                # there was a tablename but no time. This probably means we wrote before a tableName that cannot retrieve historical data. Start over.
                startOver = True
                pass
            else:
                # there is a tableName and a dateTime. We have to check if we are finished with fethcing all data for this table
                if startDateTimeOnDiskUTCNaiveFloor >= pd.to_datetime(
                        pd.datetime.utcnow()).floor(
                            barSizePandasTimeDelta) - barSizePandasTimeDelta:
                    # we are finished fetching data. Start over
                    startOver = True
                    pass
                pass
            pass
        pass

    if startOver:
        delStmt = tablePos.delete()
        results = ssn.bind.execute(delStmt)
        ssn.commit()
        indexToStart = 0
        pass

    # a = (f'index to start: {indexToStart}')
    # _logger.info(a)
    # print(a)

    # retrieve historical data, starting at the position where we were last time this program ran
    for qc in qcs[indexToStart:]:
        # refresh the info about where we are in the historical data request task
        [tableNameOnDisk, startDateTimeOnDiskUTCNaiveFloor
         ] = getCurrentInfoForHistoricalDataRequests(mydb, tablePos)

        # this can be a long task if it is not interrupted.

        # we record the starting time
        tStart1 = pd.datetime.now()

        # the conId for this contract
        conId = qc.conId

        # the table into which we want to insert
        tableORM = utils.getValueFromDataFrame(
            mydb.MarketDataInfoTableDataFrame,
            whereColumn='conId',
            whereValue=conId,
            getColumn='tableORM')

        # get the earliest dateTime for this conId defined by IB
        # get it directly from IB as this is a check whether we can ask for historical Data for this conId
        earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly = utils.conformDateTimeToPandasUTCNaive(
            utils.getEarliestDateTimeFromIBAsDateTime(ib=ib,
                                                      qualifiedContract=qc,
                                                      timeOutTime=10))
        # if there is no earliestDateTime according to IB, it makes no sense to try to retrieve data
        if pd.isnull(earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly):
            a = (
                f'attempting to get historical data: {qc.symbol}, {qc.currency}: not performed because earliestDateTime as queried directly from IB is NULL.'
            )
        else:
            a = (
                f'attempting to get historical data: {qc.symbol}, {qc.currency}; is going to be performed because earliestDateTime as queried directly from IB is: {earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly}'
            )
            pass

        _logger.info(a)

        # we did not break the for loop in the rows above because we wanted to log some items.
        # we are now breaking the loop if no earliestDateTime is found
        if pd.isnull(earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly):
            # it makes no sense to continue if IB has no earliestDateTime for this conId
            # we therefore skip all code below and continue the for loop, essentially meaning that the next conId
            # will be treated
            # we must update the positiontable of course
            # remove the old row by removing the table content

            delStmt = tablePos.delete()
            results = ssn.bind.execute(delStmt)
            ssn.commit()

            insStmt = tablePos.insert().values(
                tableName=tableORM.__tablename__,
                endDateTime=None,
                durationStringOnLastUpdate=None,
                numberOfTicksRetrieved=None,
                lastUpdateDateTimeLocal=pd.datetime.now(),
            )
            results = ssn.bind.execute(insStmt)
            ssn.commit()

            continue
            pass

        # local timezone
        tzlocal = dateutil.tz.tzlocal()

        # override the earliestDateTime if an argument is given
        earliestDateTimeUTCNaiveAccordingToAdditionalArgs = additionalArgs.get(
            'earliestDateTimeUTCNaive', None)
        if (not pd.isnull(earliestDateTimeUTCNaiveAccordingToAdditionalArgs)):
            earliestDateTimeUTCNaive = earliestDateTimeUTCNaiveAccordingToAdditionalArgs
        else:
            earliestDateTimeUTCNaive = earliestDateTimeUTCNaiveAccordingToIBQueriedDirectly
            pass

        startDateTimeUTCNaive = earliestDateTimeUTCNaive
        # if there is information in the position table, take this information

        if not pd.isnull(startDateTimeOnDiskUTCNaiveFloor):
            # only take the information if it concerns the table that we are currently treating
            if tableNameOnDisk == tableORM.__tablename__:
                if startDateTimeOnDiskUTCNaiveFloor > startDateTimeUTCNaive:
                    # only correct the information from disk if the disk value is larger than the current value
                    startDateTimeUTCNaive = startDateTimeOnDiskUTCNaiveFloor
                    pass
                pass
            pass

        # create the timeDelta objects that generate the IB strings
        dTDRegular = utils.CTimeDelta(durationPandasTimeDelta)
        dTD = dTDRegular
        bTD = utils.CTimeDelta(barSizePandasTimeDelta)
        diffTimeInMinutes = int(
            bTD._timeDelta / pd.Timedelta(minutes=1)
        )  # necessary in case we use the algorithm to search for the newest datetime on disk

        # now make sure that startDateTime is not larger than nowUTCNaive
        # this makes sure that we actually retrieve data
        # this is ok as the loop for a given contract starts below such that we do not interfere with the ending conditions for that loop
        #           and the loop over all contracts is initialized above such that the conditions for that loop are not impacted
        if startDateTimeUTCNaive >= pd.to_datetime(pd.datetime.utcnow()).floor(
                barSizePandasTimeDelta) - durationPandasTimeDelta:
            startDateTimeUTCNaive = pd.to_datetime(pd.datetime.utcnow()).floor(
                barSizePandasTimeDelta) - durationPandasTimeDelta
            pass

        conditionOnStartDateTime = startDateTimeUTCNaive <= pd.to_datetime(
            pd.datetime.utcnow()).ceil(barSizePandasTimeDelta)
        while (not pd.isnull(startDateTimeUTCNaive)
               ) and conditionOnStartDateTime:

            # a = (f'while start: startDateTimeUTCNaive {startDateTimeUTCNaive}, now {pd.to_datetime(pd.datetime.utcnow()).ceil(barSizePandasTimeDelta)}, condition {conditionOnStartDateTime}')
            # _logger.info(a)

            a = (f'startDateTime Original {startDateTimeUTCNaive}')
            _logger.info(a)

            # calculate the regular endDateTIme
            endDateTimeUTCNaive = startDateTimeUTCNaive + dTD._timeDelta
            endDateTimeUTCNaiveRegular = endDateTimeUTCNaive  # necessary in case we use the algorithm to search for the newest datetime on disk
            a = (f'calculated endDateTime {endDateTimeUTCNaive}')
            _logger.info(a)

            # initialize the durationTimeDelta Object
            dTD = dTDRegular

            if 0:  # search for the newest datetime on disk and only request data for periods that start from that point
                ## skipping data that already exists
                # Idea: if the startDateTime is on disk, then
                #          advance startDateTime to the beginning of the next gap (if a next gap is found)
                newStartDateTimeUTCNaive = getNewStartDateTime(
                    ssn=ssn,
                    tableORM=tableORM,
                    oldStartDateTimeUTCNaive=startDateTimeUTCNaive,
                    diffTimeInMinutes=diffTimeInMinutes)
                if newStartDateTimeUTCNaive is not None:
                    a = (
                        f'advancing to new StartDateTime {newStartDateTimeUTCNaive}'
                    )
                    _logger.info(a)
                    startDateTimeUTCNaive = newStartDateTimeUTCNaive
                    # continue the while loop
                    continue
                    pass

                # the recalculated startDateTime above has placed us in a gap by design
                # test that this is correct
                startDateTimeIsOnDisk = bool(
                    ssn.query(tableORM).filter(
                        tableORM.datetime == startDateTimeUTCNaive).count() ==
                    1)
                assert (not startDateTimeIsOnDisk)

                # now find the end of the gap
                rowWhereGapStops = ssn.query(tableORM).filter(
                    tableORM.diffToNextRowInMinutes <= diffTimeInMinutes
                ).filter(tableORM.datetime >= startDateTimeUTCNaive).order_by(
                    tableORM.datetime).first()
                if rowWhereGapStops is not None:
                    # we found a datetime where the gap stops
                    a = (
                        f'found a row where the gap stops. datetime: {rowWhereGapStops.datetime}'
                    )
                    _logger.info(a)
                    pass
                else:
                    # we found no datetime where the gap stops
                    a = (f'found no row where the gap stops.')
                    _logger.info(a)
                    pass

                if rowWhereGapStops is not None:
                    # because we found a row where the gap stops, we will only request data until that timepoint (if the resulting duration is smaller than the current regular duration)
                    endDateTimeWhereGapStopsUTCNaive = pd.to_datetime(
                        rowWhereGapStops.datetime)
                    a = (
                        f'endDatetime calculated using the end of the gap: {endDateTimeWhereGapStopsUTCNaive}'
                    )
                    _logger.info(a)
                    endDateTimeUTCNaive = min(
                        endDateTimeUTCNaiveRegular,
                        endDateTimeWhereGapStopsUTCNaive)
                    a = (
                        f'now setting a new endDateTime that is the minumum of the endDateTime where the gap stops and the regular endDateTime : {endDateTimeUTCNaive}'
                    )
                    _logger.info(a)
                    # because we have redefined the endDateTime, we have to generate a revised TimeDelta object
                    pandasTimeDelta = pd.Timedelta(
                        seconds=(endDateTimeUTCNaive -
                                 startDateTimeUTCNaive).total_seconds())
                    dTD = utils.CTimeDelta(pandasTimeDelta)
                    a = (
                        f'generated a new TimeDelta object. IBDurationString :{dTD.IB_Duration_String}'
                    )
                    _logger.info(a)

                    pass
                else:
                    # we found no row where the gap stops
                    a = (
                        f'no new endDatetime calculated using the end of the gap because no end of gap was found'
                    )
                    _logger.info(a)
                    pass

            # add some jitter to the startDate to make sure we are not always requesting the same period
            durationTimeInMinutes = int(
                (endDateTimeUTCNaive - startDateTimeUTCNaive) /
                pd.Timedelta(1, 'm'))
            jitterSpan = durationTimeInMinutes * jitterSpanFraction
            jitterInMinutes = randint(0, int(jitterSpan))
            jitterTimeDelta = pd.Timedelta(jitterInMinutes, 'm')

            dTD = utils.CTimeDelta(durationPandasTimeDelta - jitterTimeDelta)

            # specify the endDateTime
            endDateTimeUTCNaive = startDateTimeUTCNaive + dTD._timeDelta

            # log info about chunk getting
            tStart2 = pd.datetime.now()
            a = (
                f'attempting to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTCNaive}; endDT: {endDateTimeUTCNaive}; durationString: {dTD.IB_Duration_String}; timeout: {timeOutTimeHistoricalBars}'
            )
            _logger.info(a)
            # print(a)

            # get the historical bars
            endDateTimeLocalNaive = endDateTimeUTCNaive.tz_localize(
                'UTC').tz_convert(tzlocal).tz_localize(None)

            bars = None
            timeOutOccured = None
            [bars, timeOutOccured
             ] = getHistoricalDataBars(ib,
                                       qc,
                                       endDateTime=endDateTimeLocalNaive,
                                       durationStr=dTD.IB_Duration_String,
                                       barSizeSetting=bTD.IB_Bar_Size_String,
                                       timeOutTime=timeOutTimeHistoricalBars)

            if timeOutOccured is not None and timeOutOccured:
                a = (
                    f'Timeout while requesting historical bars for contract {qc}'
                )
                _logger.warn(a)
                pass

            # it could be that the bars contain the current datetime.
            # we do not want to use this value in the update as it is still submect to change.
            # we therefore check if the date of last bars is the current date and remove the corresponding row if that
            # is the case

            if bars is not None and len(bars) > 0:
                lastBarDateTime = utils.conformDateTimeToPandasUTCNaive(
                    pd.to_datetime(bars[-1].date))
                _nowUTCNaiveFloor = pd.to_datetime(
                    pd.datetime.utcnow()).floor(barSizePandasTimeDelta)
                if lastBarDateTime == _nowUTCNaiveFloor:
                    bars = bars[:-1]
                    pass
                pass

            # persist the result on disk
            # upsert the data
            persistMarketDataBarsOnDisk(bars,
                                        mydb,
                                        tableORM.__table__,
                                        doCorrection=True)

            # calculate the number of rows that have been retrieved

            nRows = 0
            if bars is not None:
                nRows = len(bars)
                pass

            # calculate the total number of rows on disk
            nRowsTotal = ssn.query(tableORM).count()

            # # write the information about where we are to disk
            # endDateTimeUTCNaiveAsDateTime = endDateTimeUTCNaive.to_pydatetime()

            # ib.sleep(2)
            # a = f'before removing: {ssn.query(tablePos).all()}'
            # print(a)
            # _logger.info(a)
            # # ib.sleep(2)

            # remove the old row by removing the table content
            delStmt = tablePos.delete()
            results = ssn.bind.execute(delStmt)
            ssn.commit()

            # a = f'after removing: {ssn.query(tablePos).all()}'
            # print(a)
            # _logger.info(a)
            # # ib.sleep(2)

            # add this row to update the position in the positiongtable
            insStmt = tablePos.insert().values(
                tableName=tableORM.__tablename__,
                endDateTime=endDateTimeUTCNaive,
                durationStringOnLastUpdate=dTD.IB_Duration_String,
                numberOfTicksRetrieved=nRows,
                lastUpdateDateTimeLocal=pd.datetime.now(),
            )
            results = ssn.bind.execute(insStmt)
            ssn.commit()

            # a = f'after udpating: {ssn.query(tablePos).all()}'
            # print(a)
            # _logger.info(a)
            # # ib.sleep(2)

            # log info about the end of bar data retrieval
            tEnd2 = pd.datetime.now()
            tDelta2 = tEnd2 - tStart2
            a = (
                f'finished to get historical data chunk: {qc.symbol}, {qc.currency}; startDT: {startDateTimeUTCNaive}; endDT: {endDateTimeUTCNaive}; durationString: {dTD.IB_Duration_String}; elapsedTime: {tDelta2}; rows: {nRows}; rowsTotal: {nRowsTotal}'
            )
            _logger.info(a)

            # update the startDateTime for the next iteration of the loop
            startDateTimeUTCNaive = endDateTimeUTCNaive

            # as this ia a potentially very long running loop, make a little pause to
            # allow other asyncio processes (for example, the processes that tet recent market data)
            # to also get a piece of the processor
            ib.sleep(0.01)

            conditionOnStartDateTime = startDateTimeUTCNaive <= pd.to_datetime(
                pd.datetime.utcnow()).ceil(barSizePandasTimeDelta)

            # a = (f'while end: startDateTimeUTCNaive {startDateTimeUTCNaive}, now {pd.to_datetime(pd.datetime.utcnow()).ceil(barSizePandasTimeDelta)}, condition {conditionOnStartDateTime}')
            # _logger.info(a)
            # print(a)

            pass

        tEnd1 = pd.datetime.now()
        tDelta1 = tEnd1 - tStart1
        a = (
            f'finished to get historical data: {qc.symbol}, {qc.currency}; elapsedTime: {tDelta1}'
        )
        _logger.info(a)
        # print(a)
        pass
    success = True
    ssn.close()
    # a = (f'finished function to get historical data')
    # _logger.info(a)
    # print(a)

    return success
Beispiel #7
0
def getDataFromDateRange(tableSchema: sqlalchemy.Table,
                         mydb: database.tradingDB,
                         dateRange: pd.DatetimeIndex) -> pd.DataFrame:

    tt1 = time.time()

    eng = mydb.DBEngine
    eng.dispose()

    ssn = mydb.Session()
    results = []

    for _DT in dateRange:
        __DT = _DT.to_pydatetime()
        _close = None
        _low = None
        _high = None
        _open = None

        tm = None
        qsf = (ssn.query(tableSchema.c.close, tableSchema.c.low,
                         tableSchema.c.high, tableSchema.c.high,
                         tableSchema.c.datetime).order_by(
                             tableSchema.c.datetime.desc()).filter(
                                 tableSchema.c.datetime <= __DT)).first()
        if qsf is not None:
            _close, _low, _high, _open, tm = qsf
        else:
            qsf = (ssn.query(tableSchema.c.close, tableSchema.c.low,
                             tableSchema.c.high, tableSchema.c.high,
                             tableSchema.c.datetime).order_by(
                                 tableSchema.c.datetime).filter(
                                     tableSchema.c.datetime > __DT)).first()
            if qsf is not None:
                _close, _low, _high, _open, tm = qsf
                pass
            pass
        tDiff = None
        try:
            tDiff = tm - __DT
        except:
            pass
        ordrdDct = OrderedDict(
            (('target', __DT), ('achieved', tm),
             ('mismatchInMinutes', tDiff / pd.Timedelta(1, 'm')),
             ('close', _close), ('low', _low), ('high', _high), ('open',
                                                                 _open)))
        results.append(ordrdDct)
        pass

    df = pd.DataFrame(results)
    df.sort_index(inplace=True)

    tt2 = time.time()
    ttdiff = tt2 - tt1

    nRowsTotal = nRowsTotal = ssn.query(tableSchema).count()

    ssn.close()

    print(
        f'Getting data from table: {tableSchema}: rowsTotal: {nRowsTotal}; {ttdiff}'
    )

    return (df)