Esempio n. 1
0
 def istrading(self):
     # TODO: dirty dirty
     if not (TradingCalendar.is_trading_day(self.date)
             or TradingCalendar.is_partial_trading_day(self.date)):
         return False
     range = TradingCalendar.tradingtimes(self.date)
     return range[0].time() < self.sync_datetime.time() < range[-1].time()
Esempio n. 2
0
    def run(self):
        DataFactory.prejack_symbols = self.symbols
        clock, datafactory = TradingClock.getInstance(), DataFactory.getInstance()

        # Configure
        # TODO: TOML

        # Where do we have available data
        earliest_data, latest_data = datafactory.datesSpread(barsize=self.barsize)
        start_date = TradingCalendar.add_trading_days(earliest_data, self.setup_days)
        end_date = latest_data  # TradingCalendar.add_trading_days(clock.date, 1)
        # clock.set_day(TradingCalendar.add_trading_days(latest_data, -12))

        # Get a groove on
        clock.set_day(start_date)
        while (clock.date <= end_date):
            self.logger.info(clock.date)

            # Daily setup
            m = Manager(self.account, self.simtracker)
            strategies = [stype(m, **strategy_kwargs) for stype, strategy_kwargs in self.strategies]

            # Guts of the simulation
            if self.rapid:
                for simtime in [clock.mytz.localize(datetime.datetime.combine(clock.date, time)) for time in self.times]:
                    clock.sync_datetime = simtime
                    for strategy in strategies:
                        strategy.update()

            else:
                for simtime in TradingCalendar.tradingtimes(clock.date):
                    clock.sync_datetime = simtime

                    # On the hour
                    # if clock.sync_datetime.time().hour > 10 and clock.sync_datetime.time().minute == 0:

                    if clock.sync_datetime.time() in self.times:
                        # todo: need to really use the tws api before figuring this out..
                        for strategy in strategies:
                            strategy.update()

            if clock.date == end_date:
                for strategy in strategies:
                    strategy.closeall()

            # Daily teardown
            m.stop()

            # NEXT!
            clock.roll_day()

        self.account.stop()

        print("THREAD DONE.")
Esempio n. 3
0
def scrape_new(ticker, period, start_date, end_date):
    """Scrapes new data to the current store"""
    logger = Logger.getInstance()
    datafactory = DataFactory.getInstance()

    # File locations
    csv_path = datafactory.getDataDir(period) + datafactory.symbol2file(ticker)
    record_exists = Path(csv_path).exists()

    # Attempt to find the date range
    record = None
    try:
        if record_exists:
            record = datafactory.loadSymbol(ticker)
            record_end_date = record.index[-1]
            daydelta = datetime.timedelta(days=1)
            start_date = max(start_date, (record_end_date + daydelta).date())

            # Can potentially skip
            dates_between = pd.date_range(start_date,
                                          end_date - daydelta,
                                          freq='d')
            dates_between_workdays = pd.Series(dates_between).transform(
                lambda x: tcal.is_trading_day(x))
            if (dates_between_workdays.empty) or (
                    not dates_between_workdays.any()):
                logger.LogEvent("INFO",
                                f"No dates to update for {ticker} {period}")
                return True
    except Exception as e:
        logger.LogEvent(
            "ERROR",
            f"Error getting date ({ticker}, {period}): {e}, {type(e)}")

    # Attempt to scrape the data
    try:
        logger.LogEvent(
            "INFO",
            f"Collecting {ticker} {period} from {start_date} to {end_date}")
        dataframe_dl = getDataframe(ticker,
                                    start_date,
                                    end_date,
                                    period,
                                    instant=False)
        if not dataframe_dl.empty:
            if record is None:
                # Failed to load
                if record_exists:
                    today = datetime.datetime.now()
                    copyfile(
                        csv_path,
                        f"{csv_path[:-4]} - Copy {today.month}-{today.day}{csv_path[-4:]}"
                    )

                dataframe_dl.to_csv(csv_path)
            else:
                dataframe_dl.to_csv(csv_path, mode='a', header=False)
        return True
    except Exception as e:
        logger.LogEvent(
            "ERROR", f"Error downloading ({ticker}, {period}): {e}, {type(e)}")
Esempio n. 4
0
def scrape_repair(ticker, period, start_date):
    """Aim to fill missing gaps. First aims for days. Then times"""
    logger = Logger.getInstance()
    datafactory = DataFactory.getInstance()

    daydelta = datetime.timedelta(days=1)
    csv_path = datafactory.getDataDir(period) + datafactory.symbol2file(ticker)

    # Load the dataframe, get list of dates
    dataframe_full = datafactory.loadSymbol(ticker, period)

    dataframe_full = dataframe_full[~dataframe_full.index.duplicated()]
    dataframe_dates = pd.Series(
        dataframe_full.index).transform(lambda x: x.date())
    dataframe_full_dates = sorted(
        [x for x in set(dataframe_dates) if x >= start_date])

    mytz = TradingClock.mytz

    # PT1: Are any dates missing
    try:
        prefix_dates = len(dataframe_full_dates)
        missing_dates = []
        for datei in range(1, len(dataframe_full_dates)):
            day1 = dataframe_full_dates[datei - 1]
            day2 = dataframe_full_dates[datei]
            missing_dates = missing_dates + [
                x.date() for x in pd.date_range(
                    day1 + daydelta, day2 - daydelta, freq='d')
                if tcal.is_trading_day(x.date())
            ]

        if len(missing_dates) > 0:
            # Combine missing dates to ranges
            missing_ranges = list(
                zip(missing_dates, [
                    tcal.add_trading_days(missing_date, 1)
                    for missing_date in missing_dates
                ]))
            # zip(missing_dates, [next_working_day(missing_date, cal) for missing_date in missing_dates]))
            for datei in range(len(missing_ranges) - 2, -1, -1):
                c1, c2 = missing_ranges[datei]
                n1, n2 = missing_ranges[datei + 1]
                if c2 + datetime.timedelta(
                        days=(0 if period == "5m" else 50)) >= n1:
                    missing_ranges.pop(datei + 1)
                    missing_ranges[datei] = (c1, n2)

            # Patch it up
            logger.LogEvent(
                "INFO", f"Collecting missing dates for {ticker}  {period}")
            for missing_start, missing_end in missing_ranges:
                dataframe_patch = getDataframe(ticker,
                                               missing_start,
                                               missing_end,
                                               period,
                                               instant=False)
                if not dataframe_patch.empty:
                    before_dl = dataframe_full[:datetime.datetime(
                        year=missing_start.year,
                        month=missing_start.month,
                        day=missing_start.day,
                        tzinfo=mytz)]
                    after_dl = dataframe_full[datetime.
                                              datetime(year=missing_end.year,
                                                       month=missing_end.month,
                                                       day=missing_end.day,
                                                       tzinfo=mytz):]
                    dataframe_full = pd.concat(
                        [before_dl, dataframe_patch, after_dl])
                else:
                    logger.LogEvent(
                        "WARN",
                        f"Cannot find data for ({ticker}, {period}) between {missing_start}=>{missing_end} to patch data"
                    )

        else:
            logger.LogEvent("INFO", f"No missing dates for {ticker}  {period}")

        dataframe_dates = pd.Series(
            dataframe_full.index).transform(lambda x: x.date())
        dataframe_full_dates = sorted(
            [x for x in set(dataframe_dates) if x >= start_date])
        postfix_dates = len(dataframe_full_dates)

        if prefix_dates < postfix_dates:
            # Over-write what we have saved
            dataframe_full.index.name = "Datetime"
            dataframe_full.to_csv(csv_path,
                                  index_label=dataframe_full.index.name)
    except Exception as e:
        logger.LogEvent(
            "ERROR",
            f"Error fixing missing dates for ({ticker}, {period}): {e}, {type(e)}"
        )
        return False, -1, -1

    # PT2: What is the content like ??
    try:
        missing_days_times = {}
        prefix_rows = len(dataframe_full)

        if period != "1d":
            # setup
            missing_cutoff = datetime.time(14, 00)
            if period == "5m":
                t_range = pd.Series(
                    pd.date_range("10:00", "15:55",
                                  freq="5min")).transform(lambda x: x.time())
            else:
                raise ValueError(f"Period {period} not supported")

            # fill the missing_days_times dict
            for df_date in dataframe_full_dates:
                # List of times for the day
                df_dt = datetime.datetime(year=df_date.year,
                                          month=df_date.month,
                                          day=df_date.day,
                                          tzinfo=mytz)
                t_dataframe = pd.Series(
                    dataframe_full[df_dt:df_dt + daydelta].index).transform(
                        lambda x: x.time())

                # Are all of these times in the expected time range?
                missing_times = t_range[~t_range.isin(t_dataframe)]
                if tcal.is_partial_trading_day(df_date):
                    missing_times = [
                        x for x in missing_times if x < missing_cutoff
                    ]
                if len(missing_times) > 0:
                    missing_days_times[df_date] = missing_times

            # If there is any data missing, try and fix
            missing_times_dates = list(missing_days_times.keys())
            if len(missing_times_dates) > 0:
                # Combine missing dates to ranges
                missing_ranges = list(
                    zip(missing_times_dates, [
                        tcal.add_trading_days(missing_date, 1)
                        for missing_date in missing_times_dates
                    ]))
                for datei in range(len(missing_ranges) - 2, -1, -1):
                    c1, c2 = missing_ranges[datei]
                    n1, n2 = missing_ranges[datei + 1]
                    # Give this one a bit of room, there are more missing
                    if c2 + datetime.timedelta(days=2) >= n1:
                        missing_ranges.pop(datei + 1)
                        missing_ranges[datei] = (c1, n2)

                logger.LogEvent(
                    "INFO", f"Collecting missing times for {ticker} {period}")
                for missing_start, missing_end in missing_ranges:
                    dataframe_patch = getDataframe(ticker,
                                                   missing_start,
                                                   missing_end,
                                                   period,
                                                   instant=False)
                    patch_dates = set(
                        pd.Series(dataframe_patch.index).transform(
                            lambda x: x.date()))
                    for patch_date in sorted(list(patch_dates)):
                        # Check if the data wasn't added when grouping ranges
                        if patch_date in missing_days_times.keys():
                            missing_dtimes = pd.Series([
                                datetime.datetime.combine(patch_date, mdt)
                                for mdt in missing_days_times[patch_date]
                            ]).transform(lambda x: x.tz_localize(mytz))
                            times_found = missing_dtimes[missing_dtimes.isin(
                                dataframe_patch.index)]
                            for found_time in times_found:
                                # patcher = dataframe_patch.loc[found_time]
                                before_dl = dataframe_full[
                                    dataframe_full.index < found_time]
                                patcher = pd.DataFrame(
                                    [dataframe_patch.loc[found_time].values],
                                    columns=[
                                        xx for xx in dataframe_full.columns
                                        if not xx == "Datetime"
                                    ],
                                    index=pd.DatetimeIndex([found_time]))
                                after_dl = dataframe_full[
                                    dataframe_full.index > found_time]
                                dataframe_full = pd.concat(
                                    [before_dl, patcher, after_dl])

        # Check that some changes were actually made...
        fixed_rows = len(dataframe_full) - prefix_rows
        if fixed_rows > 0:
            logger.LogEvent(
                "INFO",
                f"Patched {fixed_rows} rows successfully for {ticker} {period}"
            )
            if not dataframe_full.index.is_monotonic:
                dataframe_full = dataframe_full.index_sort()
                logger.LogEvent("ERROR", f"Index not sorted properly {ticker}")
            dataframe_full.index.name = "Datetime"
            dataframe_full.to_csv(csv_path)
        else:
            logger.LogEvent("WARN",
                            f"No missing time patched for {ticker} {period}")
    except Exception as e:
        logger.LogEvent(
            "ERROR",
            f"Error fixing missing times for ({ticker}, {period}): {e}, {type(e)}"
        )
        return False, -1, -1

    # Leftovers
    fixed_dates = postfix_dates - prefix_dates
    outstanding_dates = len(missing_dates) - fixed_dates
    fixed_times = fixed_rows
    outstanding_times = sum([len(x) for x in missing_days_times.values()
                             ]) - fixed_times

    return True, outstanding_dates, outstanding_times
Esempio n. 5
0
def replace_empties(data, goal=None):
    logger = MyLogger.getLogger("RepUtil")

    if goal is not None and goal < data.index[-1]:
        logger.warn(f"Goal date {goal} is pre-data end {data.index[-1]}, resetting")
        goal = data.index[-1]

    dates = sorted(list(set(pd.Series(data.index).transform(lambda x: x.date()))))
    date = dates[0]
    end_date = dates[-1] if goal is None else goal.date()
    period = min([abs(d1 - d2) for d1, d2 in zip(data.index[1:], data.index[:-1])])

    while date <= end_date:
        if tcal.is_partial_trading_day(date) or tcal.is_trading_day(date):


            #TODO: Bug - min period changed if get a partial bar


            if period > datetime.timedelta(minutes=5):
                # TODO: Bit ugly.... shoed in
                missing_ranges = []
                if date not in dates:
                    missing_ranges = [[data.index[0].replace(year=date.year, month=date.month, day=date.day)]]
            else:
                ttimes = tcal.tradingtimes(date)
                if goal is None:
                    missing_times = ttimes[~ttimes.isin(data.index)]
                else:
                    missing_times = ttimes[~ttimes.isin(data.index) & (ttimes <= goal)]
                missing_ranges = []

                if len(missing_times):
                    # Blocks of missing data
                    missing_ranges.append([missing_times[0]])
                    for missing_time in missing_times[1:]:
                        previous = missing_ranges[-1][-1]

                        next_span = missing_time - previous
                        if next_span != period:
                            missing_ranges.append([missing_time])
                        else:
                            missing_ranges[-1].append(missing_time)

            # chuck repeated in there
            for missing_range in missing_ranges:
                try:
                    before = data[data.index < missing_range[0]]
                    after = data[data.index > missing_range[-1]]

                    previous = before.iloc[-1]

                    patch = pd.DataFrame({"Open": previous.Close, "High": previous.Close, "Low": previous.Close,
                                          "Close": previous.Close, "Adj Close": previous.Close, "Volume": 0},
                                         index=missing_range)


                    data = pd.concat([before, patch, after])
                    data = data[~data.index.duplicated(keep='last')]
                except Exception as e:
                    print(f"Repair failed for {e}")
        date += datetime.timedelta(days=1)
    return data
Esempio n. 6
0
    def parseHistoricalDataArgs(self, tickerId, contract, endDateTime,
                                durationStr, barSizeSetting, whatToShow,
                                useRTH, formatDate, keepUpToDate,
                                chartOptions):
        if formatDate == 0:
            raise NotImplementedError(
                f"formatDate value {formatDate} not supported; use 1:yyyyMMdd HH:mm:ss"
            )
        elif formatDate == 1:
            dateformat = "%Y%m%d %H:%M:%S"
        else:
            raise ValueError(
                f"Format date value {formatDate} not recognised must be 0:sys or 1:yyyyMMdd HH:mm:ss"
            )

        # End time
        currtime = self.clock.sync_datetime
        if endDateTime == "":
            endtime = currtime
        else:
            endtime = datetime.strptime(endDateTime, dateformat)
            if currtime < endtime:
                raise ValueError(
                    f"End time {endtime} cannot be greater than current time {currtime}"
                )

        # TODO: Public holidays in lookback

        # Start time
        lookback_val = int(durationStr.split(" ")[0])
        lookback_unit = durationStr.split(" ")[-1]
        if lookback_unit == "S":
            starttime = endtime - datetime.timedelta(seconds=lookback_val)
        elif lookback_unit == "D":
            starttime = TradingCalendar.add_trading_days(
                endtime, -lookback_val)
        elif lookback_unit == "W":
            starttime = endtime - datetime.timedelta(days=lookback_val * 7)
        elif lookback_unit == "M":
            lb_month = endtime.month - lookback_val % 12
            lb_year = endtime.year - lookback_val // 12
            starttime = datetime.datetime(
                year=lb_year if lb_month >= 1 else lb_year - 1,
                month=lb_month if lb_month >= 1 else lb_month + 12,
                day=endtime.day,
                hour=endtime.hour,
                minute=endtime.minute,
                second=endtime.second)
        elif lookback_unit == "Y":
            starttime = datetime.datetime(year=endtime.year - lookback_val,
                                          month=endtime.month,
                                          day=endtime.day,
                                          hour=endtime.hour,
                                          minute=endtime.minute,
                                          second=endtime.second)
        else:
            raise ValueError(
                f"Invalid durationStr argument {durationStr}, unit {lookback_unit} not recognised"
            )

        return starttime, endtime, dateformat
Esempio n. 7
0
 def roll_day(self):
     return self.set_day(TradingCalendar.add_trading_days(self.date, 1))