def istrading(self): # TODO: dirty dirty if not (TradingCalendar.is_trading_day(self.date) or TradingCalendar.is_partial_trading_day(self.date)): return False range = TradingCalendar.tradingtimes(self.date) return range[0].time() < self.sync_datetime.time() < range[-1].time()
def run(self): DataFactory.prejack_symbols = self.symbols clock, datafactory = TradingClock.getInstance(), DataFactory.getInstance() # Configure # TODO: TOML # Where do we have available data earliest_data, latest_data = datafactory.datesSpread(barsize=self.barsize) start_date = TradingCalendar.add_trading_days(earliest_data, self.setup_days) end_date = latest_data # TradingCalendar.add_trading_days(clock.date, 1) # clock.set_day(TradingCalendar.add_trading_days(latest_data, -12)) # Get a groove on clock.set_day(start_date) while (clock.date <= end_date): self.logger.info(clock.date) # Daily setup m = Manager(self.account, self.simtracker) strategies = [stype(m, **strategy_kwargs) for stype, strategy_kwargs in self.strategies] # Guts of the simulation if self.rapid: for simtime in [clock.mytz.localize(datetime.datetime.combine(clock.date, time)) for time in self.times]: clock.sync_datetime = simtime for strategy in strategies: strategy.update() else: for simtime in TradingCalendar.tradingtimes(clock.date): clock.sync_datetime = simtime # On the hour # if clock.sync_datetime.time().hour > 10 and clock.sync_datetime.time().minute == 0: if clock.sync_datetime.time() in self.times: # todo: need to really use the tws api before figuring this out.. for strategy in strategies: strategy.update() if clock.date == end_date: for strategy in strategies: strategy.closeall() # Daily teardown m.stop() # NEXT! clock.roll_day() self.account.stop() print("THREAD DONE.")
def scrape_new(ticker, period, start_date, end_date): """Scrapes new data to the current store""" logger = Logger.getInstance() datafactory = DataFactory.getInstance() # File locations csv_path = datafactory.getDataDir(period) + datafactory.symbol2file(ticker) record_exists = Path(csv_path).exists() # Attempt to find the date range record = None try: if record_exists: record = datafactory.loadSymbol(ticker) record_end_date = record.index[-1] daydelta = datetime.timedelta(days=1) start_date = max(start_date, (record_end_date + daydelta).date()) # Can potentially skip dates_between = pd.date_range(start_date, end_date - daydelta, freq='d') dates_between_workdays = pd.Series(dates_between).transform( lambda x: tcal.is_trading_day(x)) if (dates_between_workdays.empty) or ( not dates_between_workdays.any()): logger.LogEvent("INFO", f"No dates to update for {ticker} {period}") return True except Exception as e: logger.LogEvent( "ERROR", f"Error getting date ({ticker}, {period}): {e}, {type(e)}") # Attempt to scrape the data try: logger.LogEvent( "INFO", f"Collecting {ticker} {period} from {start_date} to {end_date}") dataframe_dl = getDataframe(ticker, start_date, end_date, period, instant=False) if not dataframe_dl.empty: if record is None: # Failed to load if record_exists: today = datetime.datetime.now() copyfile( csv_path, f"{csv_path[:-4]} - Copy {today.month}-{today.day}{csv_path[-4:]}" ) dataframe_dl.to_csv(csv_path) else: dataframe_dl.to_csv(csv_path, mode='a', header=False) return True except Exception as e: logger.LogEvent( "ERROR", f"Error downloading ({ticker}, {period}): {e}, {type(e)}")
def scrape_repair(ticker, period, start_date): """Aim to fill missing gaps. First aims for days. Then times""" logger = Logger.getInstance() datafactory = DataFactory.getInstance() daydelta = datetime.timedelta(days=1) csv_path = datafactory.getDataDir(period) + datafactory.symbol2file(ticker) # Load the dataframe, get list of dates dataframe_full = datafactory.loadSymbol(ticker, period) dataframe_full = dataframe_full[~dataframe_full.index.duplicated()] dataframe_dates = pd.Series( dataframe_full.index).transform(lambda x: x.date()) dataframe_full_dates = sorted( [x for x in set(dataframe_dates) if x >= start_date]) mytz = TradingClock.mytz # PT1: Are any dates missing try: prefix_dates = len(dataframe_full_dates) missing_dates = [] for datei in range(1, len(dataframe_full_dates)): day1 = dataframe_full_dates[datei - 1] day2 = dataframe_full_dates[datei] missing_dates = missing_dates + [ x.date() for x in pd.date_range( day1 + daydelta, day2 - daydelta, freq='d') if tcal.is_trading_day(x.date()) ] if len(missing_dates) > 0: # Combine missing dates to ranges missing_ranges = list( zip(missing_dates, [ tcal.add_trading_days(missing_date, 1) for missing_date in missing_dates ])) # zip(missing_dates, [next_working_day(missing_date, cal) for missing_date in missing_dates])) for datei in range(len(missing_ranges) - 2, -1, -1): c1, c2 = missing_ranges[datei] n1, n2 = missing_ranges[datei + 1] if c2 + datetime.timedelta( days=(0 if period == "5m" else 50)) >= n1: missing_ranges.pop(datei + 1) missing_ranges[datei] = (c1, n2) # Patch it up logger.LogEvent( "INFO", f"Collecting missing dates for {ticker} {period}") for missing_start, missing_end in missing_ranges: dataframe_patch = getDataframe(ticker, missing_start, missing_end, period, instant=False) if not dataframe_patch.empty: before_dl = dataframe_full[:datetime.datetime( year=missing_start.year, month=missing_start.month, day=missing_start.day, tzinfo=mytz)] after_dl = dataframe_full[datetime. datetime(year=missing_end.year, month=missing_end.month, day=missing_end.day, tzinfo=mytz):] dataframe_full = pd.concat( [before_dl, dataframe_patch, after_dl]) else: logger.LogEvent( "WARN", f"Cannot find data for ({ticker}, {period}) between {missing_start}=>{missing_end} to patch data" ) else: logger.LogEvent("INFO", f"No missing dates for {ticker} {period}") dataframe_dates = pd.Series( dataframe_full.index).transform(lambda x: x.date()) dataframe_full_dates = sorted( [x for x in set(dataframe_dates) if x >= start_date]) postfix_dates = len(dataframe_full_dates) if prefix_dates < postfix_dates: # Over-write what we have saved dataframe_full.index.name = "Datetime" dataframe_full.to_csv(csv_path, index_label=dataframe_full.index.name) except Exception as e: logger.LogEvent( "ERROR", f"Error fixing missing dates for ({ticker}, {period}): {e}, {type(e)}" ) return False, -1, -1 # PT2: What is the content like ?? try: missing_days_times = {} prefix_rows = len(dataframe_full) if period != "1d": # setup missing_cutoff = datetime.time(14, 00) if period == "5m": t_range = pd.Series( pd.date_range("10:00", "15:55", freq="5min")).transform(lambda x: x.time()) else: raise ValueError(f"Period {period} not supported") # fill the missing_days_times dict for df_date in dataframe_full_dates: # List of times for the day df_dt = datetime.datetime(year=df_date.year, month=df_date.month, day=df_date.day, tzinfo=mytz) t_dataframe = pd.Series( dataframe_full[df_dt:df_dt + daydelta].index).transform( lambda x: x.time()) # Are all of these times in the expected time range? missing_times = t_range[~t_range.isin(t_dataframe)] if tcal.is_partial_trading_day(df_date): missing_times = [ x for x in missing_times if x < missing_cutoff ] if len(missing_times) > 0: missing_days_times[df_date] = missing_times # If there is any data missing, try and fix missing_times_dates = list(missing_days_times.keys()) if len(missing_times_dates) > 0: # Combine missing dates to ranges missing_ranges = list( zip(missing_times_dates, [ tcal.add_trading_days(missing_date, 1) for missing_date in missing_times_dates ])) for datei in range(len(missing_ranges) - 2, -1, -1): c1, c2 = missing_ranges[datei] n1, n2 = missing_ranges[datei + 1] # Give this one a bit of room, there are more missing if c2 + datetime.timedelta(days=2) >= n1: missing_ranges.pop(datei + 1) missing_ranges[datei] = (c1, n2) logger.LogEvent( "INFO", f"Collecting missing times for {ticker} {period}") for missing_start, missing_end in missing_ranges: dataframe_patch = getDataframe(ticker, missing_start, missing_end, period, instant=False) patch_dates = set( pd.Series(dataframe_patch.index).transform( lambda x: x.date())) for patch_date in sorted(list(patch_dates)): # Check if the data wasn't added when grouping ranges if patch_date in missing_days_times.keys(): missing_dtimes = pd.Series([ datetime.datetime.combine(patch_date, mdt) for mdt in missing_days_times[patch_date] ]).transform(lambda x: x.tz_localize(mytz)) times_found = missing_dtimes[missing_dtimes.isin( dataframe_patch.index)] for found_time in times_found: # patcher = dataframe_patch.loc[found_time] before_dl = dataframe_full[ dataframe_full.index < found_time] patcher = pd.DataFrame( [dataframe_patch.loc[found_time].values], columns=[ xx for xx in dataframe_full.columns if not xx == "Datetime" ], index=pd.DatetimeIndex([found_time])) after_dl = dataframe_full[ dataframe_full.index > found_time] dataframe_full = pd.concat( [before_dl, patcher, after_dl]) # Check that some changes were actually made... fixed_rows = len(dataframe_full) - prefix_rows if fixed_rows > 0: logger.LogEvent( "INFO", f"Patched {fixed_rows} rows successfully for {ticker} {period}" ) if not dataframe_full.index.is_monotonic: dataframe_full = dataframe_full.index_sort() logger.LogEvent("ERROR", f"Index not sorted properly {ticker}") dataframe_full.index.name = "Datetime" dataframe_full.to_csv(csv_path) else: logger.LogEvent("WARN", f"No missing time patched for {ticker} {period}") except Exception as e: logger.LogEvent( "ERROR", f"Error fixing missing times for ({ticker}, {period}): {e}, {type(e)}" ) return False, -1, -1 # Leftovers fixed_dates = postfix_dates - prefix_dates outstanding_dates = len(missing_dates) - fixed_dates fixed_times = fixed_rows outstanding_times = sum([len(x) for x in missing_days_times.values() ]) - fixed_times return True, outstanding_dates, outstanding_times
def replace_empties(data, goal=None): logger = MyLogger.getLogger("RepUtil") if goal is not None and goal < data.index[-1]: logger.warn(f"Goal date {goal} is pre-data end {data.index[-1]}, resetting") goal = data.index[-1] dates = sorted(list(set(pd.Series(data.index).transform(lambda x: x.date())))) date = dates[0] end_date = dates[-1] if goal is None else goal.date() period = min([abs(d1 - d2) for d1, d2 in zip(data.index[1:], data.index[:-1])]) while date <= end_date: if tcal.is_partial_trading_day(date) or tcal.is_trading_day(date): #TODO: Bug - min period changed if get a partial bar if period > datetime.timedelta(minutes=5): # TODO: Bit ugly.... shoed in missing_ranges = [] if date not in dates: missing_ranges = [[data.index[0].replace(year=date.year, month=date.month, day=date.day)]] else: ttimes = tcal.tradingtimes(date) if goal is None: missing_times = ttimes[~ttimes.isin(data.index)] else: missing_times = ttimes[~ttimes.isin(data.index) & (ttimes <= goal)] missing_ranges = [] if len(missing_times): # Blocks of missing data missing_ranges.append([missing_times[0]]) for missing_time in missing_times[1:]: previous = missing_ranges[-1][-1] next_span = missing_time - previous if next_span != period: missing_ranges.append([missing_time]) else: missing_ranges[-1].append(missing_time) # chuck repeated in there for missing_range in missing_ranges: try: before = data[data.index < missing_range[0]] after = data[data.index > missing_range[-1]] previous = before.iloc[-1] patch = pd.DataFrame({"Open": previous.Close, "High": previous.Close, "Low": previous.Close, "Close": previous.Close, "Adj Close": previous.Close, "Volume": 0}, index=missing_range) data = pd.concat([before, patch, after]) data = data[~data.index.duplicated(keep='last')] except Exception as e: print(f"Repair failed for {e}") date += datetime.timedelta(days=1) return data
def parseHistoricalDataArgs(self, tickerId, contract, endDateTime, durationStr, barSizeSetting, whatToShow, useRTH, formatDate, keepUpToDate, chartOptions): if formatDate == 0: raise NotImplementedError( f"formatDate value {formatDate} not supported; use 1:yyyyMMdd HH:mm:ss" ) elif formatDate == 1: dateformat = "%Y%m%d %H:%M:%S" else: raise ValueError( f"Format date value {formatDate} not recognised must be 0:sys or 1:yyyyMMdd HH:mm:ss" ) # End time currtime = self.clock.sync_datetime if endDateTime == "": endtime = currtime else: endtime = datetime.strptime(endDateTime, dateformat) if currtime < endtime: raise ValueError( f"End time {endtime} cannot be greater than current time {currtime}" ) # TODO: Public holidays in lookback # Start time lookback_val = int(durationStr.split(" ")[0]) lookback_unit = durationStr.split(" ")[-1] if lookback_unit == "S": starttime = endtime - datetime.timedelta(seconds=lookback_val) elif lookback_unit == "D": starttime = TradingCalendar.add_trading_days( endtime, -lookback_val) elif lookback_unit == "W": starttime = endtime - datetime.timedelta(days=lookback_val * 7) elif lookback_unit == "M": lb_month = endtime.month - lookback_val % 12 lb_year = endtime.year - lookback_val // 12 starttime = datetime.datetime( year=lb_year if lb_month >= 1 else lb_year - 1, month=lb_month if lb_month >= 1 else lb_month + 12, day=endtime.day, hour=endtime.hour, minute=endtime.minute, second=endtime.second) elif lookback_unit == "Y": starttime = datetime.datetime(year=endtime.year - lookback_val, month=endtime.month, day=endtime.day, hour=endtime.hour, minute=endtime.minute, second=endtime.second) else: raise ValueError( f"Invalid durationStr argument {durationStr}, unit {lookback_unit} not recognised" ) return starttime, endtime, dateformat
def roll_day(self): return self.set_day(TradingCalendar.add_trading_days(self.date, 1))