def run(self) -> None: # Set symbol and date we need data for. symbols = ['SPY', 'SPXL', 'SPXS'] start_date = date(year=2020, month=4, day=1) days_to_dump = 5 # Clone live environment so it can run on this thread. live_env = ExecEnv(self.program.logfeed_program, self.program.logfeed_program, self.program.live_env) live_env.fork_new_thread() data_collector = PolygonDataCollector(self.program.logfeed_program, self.program.logfeed_program, live_env.time()) # Go through each symbol. for symbol in symbols: # Go through the first 5 market days starting with start_date. day_date = start_date - timedelta(days=1) for i in range(days_to_dump): # Get the next market day. day_date = live_env.time().get_next_mkt_day(day_date) # Load price data. print(f'Fetching {symbol} data for {day_date:%m-%d-%Y}') day_data = live_env.mongo().load_symbol_day(symbol, day_date) # Get fresh data from polygon.io, if necessary. if not SymbolDay.validate_candles(day_data.candles): try: day_data = data_collector.collect_candles_for_day( day_date, symbol) except Exception as e: live_env.error_process( 'Error collecting polygon-rest data:') live_env.warn_process(traceback.format_exc()) # Validate the data. if day_data is None or not SymbolDay.validate_candles( day_data.candles): print( F'COULD NOT COMPILE DEBUG PRICE DATA FOR {symbol} ON {day_date:%m-%d-%Y}' ) continue # Convert the data into json. data_dict = day_data.to_json() # Dump the data into a text file. if not os.path.exists('debug_data'): os.mkdir('debug_data') with open(f'debug_data/{symbol}_{day_date:%m-%d-%Y}.txt', 'w+') as f: f.write(json.dumps(data_dict)) print( f'Dumped data to TC2_data/debug_data/{symbol}_{day_date:%m-%d-%Y}' )
def _train_on_rest_data(self, symbol: str, day_date: date) -> bool: """ Collects polygon-rest data and trains the symbol on it. Returns False if the collected data is invalid; True otherwise. """ # Collect polygon-rest data rest_data = None try: rest_data = self.data_collector().collect_candles_for_day( day_date, symbol) except Exception as e: self.error_process('Error collecting polygon-rest data:') self.warn_process(traceback.format_exc()) # Validate polygon-rest data if rest_data is None or not SymbolDay.validate_candles( rest_data.candles): self.redis().reset_day_difficulty(symbol, day_date) self.redis().incr_day_difficulty(symbol, day_date) return False # Save polygon-rest data self.redis().reset_day_difficulty(symbol, rest_data.day_date) self.mongo().save_symbol_day(rest_data) # Train models on polygon-rest data self.model_feeder.train_models(symbol=symbol, day_date=day_date, day_data=rest_data, stable=True) return True
def calculate_output(self, symbol: str) -> OUTPUT_TYPE: """ Returns a number between -1 and 1, indicating how strongly the S&P-500 is trending and in which direction. """ # Do not allow symbols to execute LongShortStrategy except SPXS and SPXL. if symbol not in ['SPXL', 'SPXS']: return LongShortFavor.NOT_APPLICABLE # Fetch data during the period. spxl_candles = self.get_latest_candles('SPXL', 30) spxs_candles = self.get_latest_candles('SPXS', 30) # Validate data. debugger = [] for candles in [spxl_candles, spxs_candles]: if not SymbolDay.validate_candles( candles, min_minutes=int(29), debug_output=debugger): debug_str = '\n\t'.join(debugger) self.debug_process(f'LSFavor invalid data') raise ValueError('LSFavorModel could not fetch valid data') # self.debug_process(f'LSFavor using candles to get favor val') return self.get_favor_val(self.time().now(), spxl_candles, spxs_candles)
def _train_on_stream_data(self, symbol: str, day_date: date) -> bool: """ Loads cached polygon-stream data and trains the symbol on it. Returns False if the data is invalid; True otherwise. """ # Validate polygon-stream data stream_data = SymbolDay(symbol=symbol, day_date=day_date, candles=self.redis().get_cached_candles( symbol=symbol, day_date=day_date)) if not SymbolDay.validate_candles(stream_data.candles): return False # Train models on polygon-stream data self.model_feeder.train_models(symbol=symbol, day_date=day_date, day_data=stream_data, stable=True) return True
def _load_period_data(self, output: Breakout1ModelOutput, symbol: str) -> bool: output.symbol = symbol output.period_data = self.get_latest_candles( symbol=symbol, minutes=breakout1_constants.BREAKOUT_SETUP_MINS) if not SymbolDay.validate_candles( output.period_data, min_minutes=breakout1_constants.BREAKOUT_SETUP_MINS): output.add_step(passed=False, value='insufficient data for the period', step_id=Breakout1ModelSteps.INITIALIZATION) return False return True
def feed_model(self, day_data: SymbolDay) -> None: """ Calculates the strongest dip during the first 45 minutes of CycleStrategy's typical run window. i.e. it predicts the worst dip that should be expected within 45 minutes of buying the symbol. """ # Find price at minute 60 start_candle: Candle = day_data.get_candle_at_sec( datetime.combine(day_data.day_date, time(hour=10, minute=30))) if start_candle is None: self.warn_process( "Couldn't update dip_45 analysis_model for CycleStrategy. Bad data at minute 60." ) return # Find lowest price within 45 minutes after minute 60 start_time = datetime.combine(day_data.day_date, time(hour=10, minute=30)) end_time = datetime.combine(day_data.day_date, time( hour=10, minute=30)) + timedelta(minutes=45) lowest_candle: Candle = Candle(start_candle.moment, start_candle.open, start_candle.high, start_candle.low, start_candle.close, start_candle.volume) for candle in day_data.candles: if candle.low < lowest_candle.low and start_time < candle.moment < end_time: lowest_candle = candle lowest_candle = lowest_candle # Calculate the greatest downward price change as a percentage strongest_dip_pct = 100.0 * max( 0.0, start_candle.low - lowest_candle.low) / start_candle.low # Load the current running sum current_sum = self.redis().get_analysis_rolling_sum( day_data.symbol, self.model_type) # Skip days that don't dip since this model is only interested in forecasting dips if strongest_dip_pct == 0: output = current_sum # Merge this day into the running sum else: output = RollingSumFormulas.combine( current_sum, strongest_dip_pct, RollingSumFormulas.get_30_day_weight()) # Save model's output self.save_output(symbol=day_data.symbol, raw_output=output, day_date=day_data.day_date)
def collect_candles_for_day(self, day: date, symbol: str) -> Optional[SymbolDay]: """ Uses Polygon to collect candles for the given day. Does NOT save the newly-collected candles. """ candles = None try: candles = self._parse_ticks_in_intervals(symbol, [[ datetime.combine(day, OPEN_TIME), datetime.combine(day, CLOSE_TIME) ]]) except Exception as e: self.debug_msgs.append(f'Error collecting {symbol} candles from polygon for {day:%m-%d-%Y}:') self.debug_msgs.append(traceback.format_exc()) return SymbolDay(symbol, day, candles)
def create_dummy_day(self, symbol: str, day_date: date, num_candles: int) -> SymbolDay: """Creates a SymbolDay with mock price data.""" dummy_candles = [] dummy_moment = datetime.combine(day_date, OPEN_TIME) for i in range(num_candles): dummy_candles.append( Candle( moment=dummy_moment, open=0.001, high=0.001, low=0.001, close=0.001, volume=999, )) dummy_moment += timedelta(seconds=1) return SymbolDay(symbol, day_date, dummy_candles)
def init_simulation_data(live_env: 'ExecEnv', sim_env: 'ExecEnv', symbols: List[str], days: int, end_date: date, model_feeder: 'ModelFeeder', skip_last_day_training: bool = False) -> Optional[str]: """ WARNING: this will change the time of the simulated environment, sim_env. :param days: the number of market days to fill with data before (including) end_date :param skip_last_day_training: whether or not to skip training analysis models on end_date Copies live data into the simulation environment and trains analysis models. Returns None if successful. Otherwise the error message is returned. """ # Go back n days from end_date. day_date = end_date for i in range(days): day_date = live_env.time().get_prev_mkt_day(day_date) # Copy data for each day into simulation environment and train models. for i in range(days + 1): for symbol in symbols: # Load data from live environment. day_data = live_env.mongo().load_symbol_day(symbol=symbol, day=day_date) # Validate data. if not SymbolDay.validate_candles(day_data.candles): return f'Couldn\'t set up {days}-day simulation environment for {symbol} ending at ' \ f'{end_date:%Y-%m-%d}. Data missing on {day_date:%Y-%m-%d}' # Copy data into the simulated environment. sim_env.mongo().save_symbol_day(day_data) # Train models. if day_date != end_date or not skip_last_day_training: model_feeder.train_models(symbol=symbol, day_date=day_date, day_data=day_data, stable=True) # Move to the next day. day_date = live_env.time().get_next_mkt_day(day_date)
def _check_ema_volume_excitement(self, output: Breakout1ModelOutput) -> bool: ema_volumes = [ema(output.minute_volumes)] prev_date = self.time().now().date() for i in range(7): # Load the previous day's data prev_date = self.time().get_prev_mkt_day(prev_date) day_data = self.mongo().load_symbol_day(output.symbol, prev_date) if not SymbolDay.validate_candles(day_data.candles): output.steps.append( ModelStep( passed=False, value= f'missing needed data on {prev_date.strftime(DATE_FORMAT)}', step_id=Breakout1ModelSteps.EMA_MINUTE_VOLUME)) return False # Aggregate the day's second-resolution candles into minute resolution prev_period_start = datetime.combine(prev_date, output.period.start_time) prev_period_end = datetime.combine(prev_date, output.period.end_time) prev_second_candles = [ candle for candle in day_data.candles if prev_period_start <= candle.moment <= prev_period_end ] prev_minute_candles = aggregate_minute_candles(prev_second_candles) # Calculate the previous day's moving-average volume ema_volumes.append( ema([candle.volume for candle in prev_minute_candles])) # Calculate the median ema volume of the same period on each of the past 7 days med_ema_volume_prev_7_periods = median(ema_volumes) # Calculate the ema volume of this period today ema_minute_volume = ema(output.minute_volumes) # Perform next step: ema minute volume check output.steps.append( output.check_ema_minute_volume(ema_minute_volume, med_ema_volume_prev_7_periods)) return output.steps[-1].passed
def create_data_points(self) -> List[NeuralExample]: examples = [] # Mix in examples from all symbols for symbol in Settings.get_symbols(self): dates = self.mongo().get_dates_on_file(symbol, START_DATE, self.time().now()) for day_date in dates: # Load symbol's candles on day_date day_data = self.mongo().load_symbol_day(symbol, day_date) # Ensure first 45 minutes of data is present first_45_candles = SymbolDay.get_ordered_candles(day_data.candles, FIRST_45_MINS) if len(first_45_candles) < 45 * MIN_CANDLES_PER_MIN or len(day_data.candles) < MIN_CANDLES_PER_DAY: continue # TODO Calculate minute-to-minute price and volume changes # TODO Classify rally strength on the day # TODO Create a data point return examples
def calculate_output(self, symbol: str) -> OUTPUT_TYPE: """ Returns True if the symbol's 12-hour high falls in the upper 4% of its 75-day range, False otherwise. """ # Fetch latest 12 hours of data recent_candles = self.get_latest_candles(symbol, 60 * 12) # Validate 12-hour data if not SymbolDay.validate_candles(recent_candles, min_minutes=60 * 12): self.error_process( 'High96PctModel candles ({}): {}'.format(len(recent_candles), [candle.open for candle in recent_candles][0:3])) raise ValueError('High96PctModel loaded invalid recent data') # Fetch 75-day data daily_candles_75 = [] day_date = self.time().now() for i in range(75): day_date = self.time().get_prev_mkt_day(day_date) daily_candle = self.mongo().load_aggregate_candle(symbol, day_date) if daily_candle is None: raise ValueError('High96PctModel couldn\'t perform its check because we ' 'don\'t have an aggregate candle for {}'.format(day_date.strftime(DATE_FORMAT))) daily_candles_75.append(daily_candle) # Compute 75-day price range low_75 = min([candle.low for candle in daily_candles_75]) high_75 = max([candle.high for candle in daily_candles_75]) # Check whether 12-hour high falls in upper 4% of 75-day range min_price_required = low_75 + (0.96 * (high_75 - low_75)) if max([candle.high for candle in recent_candles]) >= min_price_required: return True else: return False
def catch_up(): self.info_main( 'Trading and simulation disabled while checking for missing recent data...' ) catch_up_start_moment = pytime.monotonic() # Fork data_env for the new thread. catch_up_env = ExecEnv(self.logfeed_program, self.logfeed_data, creator_env=self.live_env) catch_up_env.fork_new_thread() catch_up_env.info_process( 'Performing catch-up task: checking for missing recent data') # Fork model feeder for the new thread. catch_up_model_feeder = ModelFeeder(catch_up_env) # Reset models and go back 31 days if missing [t-31, t-4]. # OR go back 4 days if only missing at most [t-4, t-1]. # Start at t-31 days. day_date = catch_up_env.time().now().date() while not catch_up_env.time().is_mkt_day(day_date): day_date = catch_up_env.time().get_prev_mkt_day(day_date) for _ in range(warm_up_days + catch_up_days + 1): day_date = catch_up_env.time().get_prev_mkt_day(day_date) # Check that each day [t-31, t-4] has valid data. symbols_reset = [] for _ in range(warm_up_days): # Check the next day. day_date = catch_up_env.time().get_next_mkt_day(day_date) for symbol in Settings.get_symbols(catch_up_env): # Only check the symbol if it hasn't been reset. if symbol in symbols_reset: continue # Load the day's data and validate it. day_data = catch_up_env.mongo().load_symbol_day( symbol, day_date) if not SymbolDay.validate_candles(day_data.candles): catch_up_env.info_process( '{} missing price data on {}. Resetting its model data' .format(symbol, day_date)) catch_up_model_feeder.reset_models([symbol]) symbols_reset.append(symbol) # Go back to the latest potential missing day. day_date = catch_up_env.time().now().date() while not catch_up_env.time().is_mkt_day(day_date): day_date = catch_up_env.time().get_prev_mkt_day(day_date) for _ in range(warm_up_days + catch_up_days + 1 if len(symbols_reset) != 0 else catch_up_days + 1): day_date = catch_up_env.time().get_prev_mkt_day(day_date) # Use price data to train models. for _ in range(warm_up_days + catch_up_days if len(symbols_reset) != 0 else catch_up_days): # Go through each reset symbol. for symbol in symbols_reset: # Load mongo price data if present. start_instant = pytime.monotonic() day_data = catch_up_env.mongo().load_symbol_day( symbol, day_date) # Collect polygon-rest price data if necessary. if not SymbolDay.validate_candles(day_data.candles): try: day_data = catch_up_env.data_collector( ).collect_candles_for_day(day_date, symbol) except Exception as e: catch_up_env.error_process( 'Error collecting polygon-rest data:') catch_up_env.warn_process(traceback.format_exc()) collection_time = pytime.monotonic() - start_instant # Validate data. validation_debugger = [] if day_data is not None and SymbolDay.validate_candles( day_data.candles, debug_output=validation_debugger): # Save data catch_up_env.redis().reset_day_difficulty( symbol, day_date) catch_up_env.mongo().save_symbol_day(day_data) # Use data to train models for symbol on day. start_instant = pytime.monotonic() catch_up_model_feeder.train_models(symbol=symbol, day_date=day_date, day_data=day_data, stable=True) train_time = pytime.monotonic() - start_instant catch_up_env.info_process( f'Catch-up for {symbol} on {day_date:%m-%d-%Y}: collection took ' f'{collection_time:.2f}s; training took {train_time:.2f}s' ) else: catch_up_env.redis().incr_day_difficulty( symbol, day_date) catch_up_env.warn_process( f'Couldn\'t collect catch-up data for {symbol} on {day_date}: ' f'{"null" if day_date is None else len(day_data.candles)} candles' ) catch_up_env.warn_process( '\n'.join(validation_debugger)) # Move to the next day. day_date = catch_up_env.time().get_next_mkt_day(day_date) # Determine whether or not we have yesterday's cached data for at least one symbol. unstable_data_present = False while not catch_up_env.time().is_mkt_day(day_date): day_date = catch_up_env.time().get_prev_mkt_day(day_date) for symbol in Settings.get_symbols(catch_up_env): unstable_data = catch_up_env.redis().get_cached_candles( symbol, day_date) if unstable_data is not None and SymbolDay.validate_candles( unstable_data): unstable_data_present = True break if unstable_data_present: msg = f'Valid cached redis data on {day_date:%B %d} found. ' \ f'Models and strategies should function normally' catch_up_env.info_main(msg) catch_up_env.info_process(msg) else: msg = f'No valid redis data cached on {day_date:%b %d}. Models that need yesterday\'s data will ' \ f'fail, causing some strategies to fail.' catch_up_env.warn_main(msg) catch_up_env.warn_process(msg) # Allow processes to resume now that data_collector is not busy. catch_up_env.mark_data_as_loaded() msg = f'Trading and strategy optimization enabled (catch up task took ' \ f'{(pytime.monotonic() - catch_up_start_moment) / 3600:.2f} hrs)' catch_up_env.info_main(msg) catch_up_env.info_process(msg)
def find_mins_maxs(trendline_candles: List[Candle]) -> Tuple[List[Candle], List[Candle]]: """ Returns two lists: the first containing local minima, and the second local maxima. """ # Sanitize input. assert len(trendline_candles) > 9, 'Cannot find mins/maxs without at least 10 candles' # Get sliding window length. trend_length = ContinuousTimeInterval(start_time=trendline_candles[0].moment.time(), end_time=trendline_candles[-1].moment.time()).length() window_length = max(5, int(trend_length * 0.12)) # Ensure sliding window length is an odd number of seconds. window_length = window_length if window_length % 2 == 0 else window_length + 1 # Get slide interval. slide_interval = max(1, window_length * 0.02) # Slide the window along the trendline period. mins, maxs = [], [] window = ContinuousTimeInterval(trendline_candles[0].moment.time(), (trendline_candles[0].moment + timedelta(seconds=window_length)).time()) while datetime.combine(trendline_candles[0].moment.date(), window.end_time) <= trendline_candles[-1].moment: # Get candles in the window. window_candles = SymbolDay.get_ordered_candles(candles=trendline_candles, interval=TimeInterval(None, window.start_time, window.end_time)) # Get midpoint candle. midpoint_candle = midpoint_candle_in_period(period=window, candles=trendline_candles, day_date=trendline_candles[0].moment.date()) # Get candles before and after the midpoint. first_half_candles = [candle for candle in window_candles if candle.moment < midpoint_candle.moment] second_half_candles = [candle for candle in window_candles if candle.moment > midpoint_candle.moment] # Ensure there are candles before/after the midpoint. if midpoint_candle is None or len(window_candles) == 0 or len(first_half_candles) == 0 \ or len(second_half_candles) == 0: # Slide the window forward if not enough candles. window_start = datetime.combine(datetime.today(), window.start_time) + timedelta(seconds=slide_interval) window_end = datetime.combine(datetime.today(), window.end_time) + timedelta(seconds=slide_interval) window = ContinuousTimeInterval(window_start.time(), window_end.time()) continue # Find out what percentage of prices before/after midpoint are less than the midpoint price. pct_prices_below = (len([candle for candle in first_half_candles if candle.low < midpoint_candle.low]) + len([candle for candle in second_half_candles if candle.low < midpoint_candle.low])) \ / len(window_candles) # Find out what percentage of prices before/after midpoint are greater than the midpoint price. pct_prices_above = (len([candle for candle in first_half_candles if candle.high > midpoint_candle.high]) + len([candle for candle in second_half_candles if candle.high > midpoint_candle.high])) \ / len(window_candles) # Record a local minimum if 97% of the window's prices are higher than the midpoint price. if pct_prices_above >= 0.97: mins.append(midpoint_candle) # Record a local maximum if 97% of the window's prices are lower than the midpoint price. if pct_prices_below >= 0.97: maxs.append(midpoint_candle) # Slide the window forward. window_start = datetime.combine(datetime.today(), window.start_time) + timedelta(seconds=slide_interval) window_end = datetime.combine(datetime.today(), window.end_time) + timedelta(seconds=slide_interval) window = ContinuousTimeInterval(window_start.time(), window_end.time()) # Get candles at the beginning and end of the trendline period. start_candles = SymbolDay.get_ordered_candles( candles=trendline_candles, interval=TimeInterval(None, trendline_candles[0].moment.time(), (trendline_candles[0].moment + timedelta(seconds=window_length)).time())) end_candles = SymbolDay.get_ordered_candles( candles=trendline_candles, interval=TimeInterval(None, (trendline_candles[-1].moment - timedelta(seconds=window_length)).time(), trendline_candles[-1].moment.time())) # Check for a global minimum in prices at the start and end of the trendline period. start_min = sorted(start_candles, key=lambda candle: candle.low)[0] end_min = sorted(end_candles, key=lambda candle: candle.low)[0] if len(mins) < 2 or start_min.low < min([local_min_candle.low for local_min_candle in mins]): mins.insert(0, start_min) if len(mins) < 2 or end_min.low < min([local_min_candle.low for local_min_candle in mins]): mins.append(end_min) # Check for a global maximum in prices at the start and end of the trendline period. start_max = sorted(start_candles, key=lambda candle: candle.high)[-1] end_max = sorted(end_candles, key=lambda candle: candle.high)[-1] if len(maxs) < 2 or start_max.high > max([local_max_candle.high for local_max_candle in maxs]): maxs.insert(0, start_max) if len(maxs) < 2 or end_max.high > max([local_max_candle.high for local_max_candle in maxs]): maxs.append(end_max) # Ensure minima are spread apart by at least 3% of the trendline's period. reqd_dist = max(3, trend_length * 0.03) i = 0 while i < len(mins) - 1 and len(mins) >= 3: if (mins[i + 1].moment - mins[i].moment).total_seconds() < reqd_dist: # Remove the higher of the two local minima mins.pop(i if mins[i].low > mins[i + 1].low else i + 1) else: i += 1 # Ensure maxima are spread apart by at least 3% of the trendline's period. i = 0 while i < len(maxs) - 1 and len(maxs) >= 3: if (maxs[i + 1].moment - maxs[i].moment).total_seconds() < reqd_dist: # Remove the lower of the two local maxima. maxs.pop(i if maxs[i].high < maxs[i + 1].high else i + 1) else: i += 1 return mins, maxs
def train_models(self, symbol: str, day_date: date, day_data: Optional[SymbolDay], stable: bool, possibly_already_trained: bool = False) -> None: """ Trains analysis models using the data provided, or loads the data from mongo/redis. :param possibly_already_trained: if True, don't log when we skip over a model """ data_provided = True if day_data is not None else False # Train each model for model in self.models: # Only train models that can be trained if isinstance(model, AbstractSpotModel) or isinstance( model, AbstractNeuralModel): continue # Get first time model was trained first_training_date = self.redis().get_analysis_start_date( symbol, model.model_type, self.time().now().date()) # Get last time model was trained last_training_date = self.redis().get_analysis_date( symbol, model.model_type) # Restart training from this day if the model is missing a snapshot if self.redis().get_analysis_snapshot_raw_output( symbol, model.model_type) is None: model.restart_training(symbol) self.redis().save_analysis_start_date(symbol, model.model_type, day_date) self.info_process( f'{self.env_type.value} restarted {model.model_type} training for {symbol} from ' f'{day_date.strftime(DATE_FORMAT)} (model lacks a snapshot)' ) # Restart training from this day if the model is no longer continuous elif last_training_date < self.time().get_prev_mkt_day(day_date): model.restart_training(symbol) self.redis().save_analysis_start_date(symbol, model.model_type, day_date) self.info_process( f'{self.env_type.value} restarted {model.model_type} training for {symbol} from ' f'{day_date.strftime(DATE_FORMAT)} (model training missed a day before this date)' ) # Restart training from this day if the model began training after this day elif day_date < first_training_date: model.restart_training(symbol) self.redis().save_analysis_start_date(symbol, model.model_type, day_date) self.info_process( f'{self.env_type.value} restarted {model.model_type} training for {symbol} from ' f'{day_date.strftime(DATE_FORMAT)} (date precedes model\'s current start date)' ) # Don't train the model if it has already been trained on this day's data elif day_date <= last_training_date: if not possibly_already_trained: self.warn_process( f'{self.env_type.value} tried to train {symbol}\'s {model.model_type} more than ' f'once on {day_date.strftime(DATE_FORMAT)}') continue # Revert to last stable snapshot if about to be fed new stable data elif stable: snapshot_date = model.revert_to_snapshot(symbol) # Restart training from this day if the snapshot is too old if day_date != self.time().get_next_mkt_day(snapshot_date): model.restart_training(symbol) self.redis().save_analysis_start_date( symbol, model.model_type, day_date) self.info_process( f'{self.env_type.value} restarted {model.model_type} training for {symbol} from ' f'{day_date.strftime(DATE_FORMAT)} (snapshot was too old)' ) # Get the data if not data_provided: day_data = self.mongo().load_symbol_day(symbol, day_date) if not SymbolDay.validate_candles(day_data.candles): self.warn_process( f'{self.env_type.value} tried to train {symbol}\'s {model.model_type.value} ' f'model on bad data!') continue # Train the model try: model.feed_model(day_data) except Exception as e: self.error_process( f'Error training {self.env_type.value} {model.model_type.value}:' ) self.warn_process(traceback.format_exc()) # Get model's new output model_output = model.get_stored_output(symbol) # Take a snapshot after being fed stable data if stable: model.take_snapshot(symbol, model_output, day_date)
def calculate_output(self, symbol: str) -> OUTPUT_TYPE: """ This tells us is whether the price has been trending downward (accounting for volume) today, not long-term. """ # Fetch latest 45 mins of data candles = self.get_latest_candles(symbol, 45) # TODO REMOVE LATER: Debug candle validation if len(candles) > MIN_CANDLES_PER_MIN * 45: debug_lines = ['\n'] debug_lines.append('candle validation debug output:') SymbolDay.validate_candles(candles, min_minutes=45, debug_output=debug_lines) debug_lines.append('\n') # Validate data if not SymbolDay.validate_candles(candles, min_minutes=45): self.error_process('MomentumModel candles ({}): {}'.format( len(candles), [candle.open for candle in candles][0:3])) raise ValueError('Momentum calculation given invalid data') # Find "typical" volume change for the period, using median and std dev volume_changes = [] for i in range(1, len(candles)): volume_changes.append(candles[i].volume - candles[i - 1].volume) med_vol_chg = median(volume_changes) vol_chg_stdev = stdev(volume_changes) # Collect normalized volume and price changes to see what direction the stock is going # This is a slightly fancier way of multiplying trade price by number of trades weighted_price_changes = [] last_candle = candles[0] for candle in candles[1:-1]: vol_change = candle.volume - last_candle.volume price_change = (candle.low - last_candle.low) / last_candle.low """ < 2: least volume within the 45-min period < 0: less volume than usual for the 45-min period > 0: more volume than usual for the 45-min period > 2: most volume within the 45-min period """ std_devs = (vol_change - med_vol_chg) / vol_chg_stdev # Always positive, high value means more volume than usual weight = max(0, std_devs + 1.8) # Sign indicates direction of the price movement, value indicates significance weighted_change = 100 * price_change * 100 * weight weighted_price_changes.append(weighted_change) last_candle = candle # Calculate a sum that favors recent data sum_pct_price_changes = 0 x = 0 incr = 3.0 / len(weighted_price_changes) for change in weighted_price_changes[-1:0:-1]: x += incr weight = exp(-x) sum_pct_price_changes += change * weight # The longer movement stays negative, the more we should expect it to change and vice versa if abs(sum_pct_price_changes) > 3: sum_pct_price_changes += -2 if sum_pct_price_changes > 0 else 0.2 sum_pct_price_changes = sum_pct_price_changes / 2 # The symbol is risky if the momentum is in a downward direction return sum_pct_price_changes
def run(self) -> None: # Clone live environment, connecting this thread to real data. live_env = ExecEnv(self.program.logfeed_optimization, self.program.logfeed_optimization, self.program.live_env) live_env.fork_new_thread() # Experiment settings. MAX_TRIALS_PER_DAY = 250 # max number of periods to evaluate per historical day EVAL_PERIOD_LEN = 3 * 60 # number of seconds over which to track profits EVAL_FLOOR_PERIOD_LEN = 7 * 60 # number of seconds over which to track killswitch floor # Load dates on which we have all the needed data. experiment_start_date = date(2018, 6, 1) spy_dates = live_env.mongo().get_dates_on_file( symbol='SPY', start_date=experiment_start_date, end_date=live_env.time().now().date()) spxl_dates = live_env.mongo().get_dates_on_file( symbol='SPXL', start_date=experiment_start_date, end_date=live_env.time().now().date()) spxl_dates = [ day_date for day_date in spxl_dates if day_date in spy_dates ] # narrow spxl to spy dates spy_dates = [ day_date for day_date in spy_dates if day_date in spxl_dates ] # narrow spy to spxl dates spxs_dates = live_env.mongo().get_dates_on_file( symbol='SPXS', start_date=experiment_start_date, end_date=live_env.time().now().date()) spxs_dates = [ day_date for day_date in spxs_dates if day_date in spy_dates ] # narrow spxs to spy=sxpl dates spy_dates = [ day_date for day_date in spy_dates if day_date in spxs_dates ] # narrow spy to spxs<=sxpl dates spxl_dates = [ day_date for day_date in spxl_dates if day_date in spy_dates ] # narrow spxl to spy<=sxpl dates assert len(spy_dates) == len(spxl_dates) == len(spxs_dates) # Init statistics on the experiment. spxl_blr_setup_vals = [] spxs_blr_setup_vals = [] spxl_blr_10_vals = [] spxs_blr_10_vals = [] spxl_blr_25_vals = [] spxs_blr_25_vals = [] spxl_profits = [] spxl_floors = [] spxs_profits = [] spxs_floors = [] oscillation_model = OscillationModel(live_env, AnalysisModelType.OSCILLATION) trend_model = LSFavorModel(live_env, AnalysisModelType.LS_FAVOR) # Simulate the days on which SPY, SPXL, and SPXS jointly have data. live_env.info_process( f'Beginning BLR simulations over {len(spxs_dates)} dates') for day_date in spxs_dates: # Load data for experiment. live_env.info_process( f'Running trials on {day_date:%m-%d-%Y} (successful trials: {len(spxl_profits)})' ) spy_data = live_env.mongo().load_symbol_day(symbol='SPY', day=day_date) spxl_data = live_env.mongo().load_symbol_day(symbol='SPXL', day=day_date) spxs_data = live_env.mongo().load_symbol_day(symbol='SPXS', day=day_date) # Validate data. data_is_valid = True for day_data in [spy_data, spxl_data, spxs_data]: if not SymbolDay.validate_candles(day_data.candles): data_is_valid = False break if not data_is_valid: live_env.info_process(f'Invalid data on {day_date:%m-%d-%Y}') continue # Init time windows variables. start_moment = datetime.combine( day_date, OPEN_TIME) + timedelta(seconds=int(30 * 60)) end_moment = datetime.combine(day_date, CLOSE_TIME) - timedelta( seconds=int(EVAL_PERIOD_LEN + 15 * 60)) # Go thru time windows on each day. day_trials = 0 while start_moment < end_moment and day_trials < MAX_TRIALS_PER_DAY: try: # Move to the next time window. start_moment += timedelta(seconds=random.randint(30, 120)) blr_setup_period = ContinuousTimeInterval( (start_moment - timedelta(seconds=3 * 60)).time(), start_moment.time()) blr_10_period = ContinuousTimeInterval( (start_moment - timedelta(seconds=10 * 60)).time(), start_moment.time()) blr_25_period = ContinuousTimeInterval( (start_moment - timedelta(seconds=25 * 60)).time(), start_moment.time()) eval_period = ContinuousTimeInterval( start_moment.time(), (start_moment + timedelta(seconds=EVAL_PERIOD_LEN)).time()) eval_floor_period = ContinuousTimeInterval( start_moment.time(), (start_moment + timedelta(seconds=EVAL_FLOOR_PERIOD_LEN)).time()) # Ignore non-oscillatory periods. oscillation_val = oscillation_model.get_oscillation_val( candles_in_period(blr_setup_period, spy_data.candles, spy_data.day_date)) if oscillation_val < 0.6: continue # Calculate BLR trendline indicators. spxl_blr_setup_val = trend_model.get_blr_strength( BoundedLinearRegressions( candles_in_period(blr_setup_period, spxl_data.candles, spxl_data.day_date))) spxs_blr_setup_val = trend_model.get_blr_strength( BoundedLinearRegressions( candles_in_period(blr_setup_period, spxs_data.candles, spxs_data.day_date))) spxl_blr_10_val = trend_model.get_blr_strength( BoundedLinearRegressions( candles_in_period(blr_10_period, spxl_data.candles, spxl_data.day_date))) spxs_blr_10_val = trend_model.get_blr_strength( BoundedLinearRegressions( candles_in_period(blr_10_period, spxs_data.candles, spxs_data.day_date))) spxl_blr_25_val = trend_model.get_blr_strength( BoundedLinearRegressions( candles_in_period(blr_25_period, spxl_data.candles, spxl_data.day_date))) spxs_blr_25_val = trend_model.get_blr_strength( BoundedLinearRegressions( candles_in_period(blr_25_period, spxs_data.candles, spxs_data.day_date))) # Calculate maximum profits during evaluation period. spxl_buy_price = candles_in_period( blr_setup_period, spxl_data.candles, spxl_data.day_date)[-1].close spxs_buy_price = candles_in_period( blr_setup_period, spxs_data.candles, spxs_data.day_date)[-1].close spxl_eval_candles = candles_in_period( eval_period, spxl_data.candles, spxl_data.day_date) spxs_eval_candles = candles_in_period( eval_period, spxs_data.candles, spxs_data.day_date) spxl_eval_floor_candles = candles_in_period( eval_floor_period, spxl_data.candles, spxl_data.day_date) spxs_eval_floor_candles = candles_in_period( eval_floor_period, spxs_data.candles, spxs_data.day_date) spxl_profit_pct = (max([ candle.high * 0.3 + candle.open * 0.7 for candle in spxl_eval_candles ]) - spxl_buy_price) / spxl_buy_price spxs_profit_pct = (max([ candle.high * 0.3 + candle.open * 0.7 for candle in spxs_eval_candles ]) - spxs_buy_price) / spxs_buy_price spxl_floor_pct = (spxl_buy_price - min([ candle.low * 0.3 + candle.open * 0.7 for candle in spxl_eval_floor_candles ])) / spxl_buy_price spxs_floor_pct = (spxs_buy_price - min([ candle.low * 0.3 + candle.open * 0.7 for candle in spxs_eval_floor_candles ])) / spxs_buy_price # Record trial stats. spxl_blr_setup_vals.append(spxl_blr_setup_val) spxs_blr_setup_vals.append(spxs_blr_setup_val) spxl_blr_10_vals.append(spxl_blr_10_val) spxs_blr_10_vals.append(spxs_blr_10_val) spxl_blr_25_vals.append(spxl_blr_25_val) spxs_blr_25_vals.append(spxs_blr_25_val) spxl_profits.append(spxl_profit_pct) spxl_floors.append(spxl_floor_pct) spxs_profits.append(spxs_profit_pct) spxs_floors.append(spxs_floor_pct) day_trials += 1 # Print experiment stats every 100 trials. if len(spxl_blr_setup_vals ) > 0 and len(spxl_blr_setup_vals) % 100 != 0: continue live_env.info_process('\n\n') def print_immediate_profit(val_lists, profits_list, threshold, symbol, trend_name): # Get indices corresponding to vals that are above all thresholds. indices = [i for i in range(len(val_lists[0]))] for j in range(len(val_lists)): indices = [ i for i in indices if val_lists[j][i] >= threshold ] if len(indices) > 3: profits = [profits_list[i] for i in indices] profit_mean, profit_med, profit_stdev = ( mean(profits), median(profits), stdev(profits)) immediate_profit = profit_med live_env.info_process( f'Immediate {symbol} profit (< 3 mins) when {trend_name} strength >= ' f'{100 * threshold}%: ' f'{100 * immediate_profit:.2f}% (n={len(profits)})' ) def print_profit_ratio(val_lists, spxl_profits_list, spxs_profits_list, threshold, trend_name): # Get indices corresponding to vals that are above all thresholds. indices = [i for i in range(len(val_lists[0]))] for j in range(len(val_lists)): indices = [ i for i in indices if val_lists[j][i] >= threshold ] if len(indices) > 3: profit_ratios = [ spxl_profits_list[i] / max(0.0002, spxs_profits_list[i]) for i in indices ] ratios_mean, ratios_med, ratios_stdev = ( mean(profit_ratios), median(profit_ratios), stdev(profit_ratios)) live_env.info_process( f'Immediate profit ratio (SPXL:SPXS) when {trend_name} strength >= ' f'{100 * threshold}%: ' f'{ratios_med:.2f}:1 (n={len(profit_ratios)})') # TODO NEXT: Implement a -1.65% killswitch in the strategy. # TODO NEXT: What pct of oscillation range is expected profit? def print_killswitch_floor(val_lists, floors_list, threshold, symbol, trend_name): # Get indices corresponding to vals that are above all thresholds. indices = [i for i in range(len(val_lists[0]))] for j in range(len(val_lists)): indices = [ i for i in indices if val_lists[j][i] >= threshold ] if len(indices) > 3: floors = [-floors_list[i] for i in indices] floor_mean, floor_med, floor_stdev = ( mean(floors), median(floors), stdev(floors)) killswitch_floor = floor_med - 1.5 * floor_stdev live_env.info_process( f'{symbol} killswitch activation (-1.5 stdev floor) when {trend_name} strength >= ' f'{100 * threshold}%: ' f'{100 * killswitch_floor:.2f}% (n={len(floors)})' ) """ # Print immediate profits when BLR strength >= 70%. print_immediate_profit([spxl_blr_6_vals], spxl_profits, 0.7, 'SPXL', 'BLR-6') print_immediate_profit([spxs_blr_6_vals], spxs_profits, 0.7, 'SPXS', 'BLR-6') print_immediate_profit([spxl_blr_10_vals], spxl_profits, 0.7, 'SPXL', 'BLR-10') print_immediate_profit([spxs_blr_10_vals], spxs_profits, 0.7, 'SPXS', 'BLR-10') print_immediate_profit([spxl_blr_25_vals], spxl_profits, 0.7, 'SPXL', 'BLR-25') print_immediate_profit([spxs_blr_25_vals], spxs_profits, 0.7, 'SPXS', 'BLR-25') # Print immediate profits when BLR strength >= 85%. print_immediate_profit([spxl_blr_6_vals], spxl_profits, 0.85, 'SPXL', 'BLR-6') print_immediate_profit([spxs_blr_6_vals], spxs_profits, 0.85, 'SPXS', 'BLR-6') print_immediate_profit([spxl_blr_10_vals], spxl_profits, 0.85, 'SPXL', 'BLR-10') print_immediate_profit([spxs_blr_10_vals], spxs_profits, 0.85, 'SPXS', 'BLR-10') print_immediate_profit([spxl_blr_25_vals], spxl_profits, 0.85, 'SPXL', 'BLR-25') print_immediate_profit([spxs_blr_25_vals], spxs_profits, 0.85, 'SPXS', 'BLR-25') # Print immediate profits when BLR strength >= 95%. print_immediate_profit([spxl_blr_6_vals], spxl_profits, 0.95, 'SPXL', 'BLR-6') print_immediate_profit([spxs_blr_6_vals], spxs_profits, 0.95, 'SPXS', 'BLR-6') print_immediate_profit([spxl_blr_10_vals], spxl_profits, 0.95, 'SPXL', 'BLR-10') print_immediate_profit([spxs_blr_10_vals], spxs_profits, 0.95, 'SPXS', 'BLR-10') print_immediate_profit([spxl_blr_25_vals], spxl_profits, 0.95, 'SPXL', 'BLR-25') print_immediate_profit([spxs_blr_25_vals], spxs_profits, 0.95, 'SPXS', 'BLR-25') # Print SPXL immediate profit when second 2 BLR strengths >= 90%. print_immediate_profit([spxl_blr_10_vals, spxl_blr_25_vals], spxl_profits, 0.9, 'SPXL', 'BLR-10-25') # Print SPXL immediate profit when all BLR strengths >= 30%. print_immediate_profit([spxl_blr_6_vals, spxl_blr_10_vals, spxl_blr_25_vals], spxl_profits, 0.3, 'SPXL', 'BLR-6-10-25') """ # Print SPXL:SPXS profit ratio when BLR strength >= 60%. print_profit_ratio([spxl_blr_setup_vals], spxl_profits, spxs_profits, 0.6, 'BLR-3') print_profit_ratio([spxl_blr_10_vals], spxl_profits, spxs_profits, 0.6, 'BLR-10') print_profit_ratio([spxl_blr_25_vals], spxl_profits, spxs_profits, 0.6, 'BLR-25') # Print SPXL:SPXS profit ratio when BLR strength >= 85%. print_profit_ratio([spxl_blr_setup_vals], spxl_profits, spxs_profits, 0.85, 'BLR-3') print_profit_ratio([spxl_blr_10_vals], spxl_profits, spxs_profits, 0.85, 'BLR-10') print_profit_ratio([spxl_blr_25_vals], spxl_profits, spxs_profits, 0.85, 'BLR-25') # Print SPXL:SPXS profit ratio when BLR strength >= 95%. print_profit_ratio([spxl_blr_setup_vals], spxl_profits, spxs_profits, 0.95, 'BLR-3') print_profit_ratio([spxl_blr_10_vals], spxl_profits, spxs_profits, 0.95, 'BLR-10') print_profit_ratio([spxl_blr_25_vals], spxl_profits, spxs_profits, 0.95, 'BLR-25') # Print SPXL:SPXS profit ratio when long BLR strengths >= 60%. print_profit_ratio([spxl_blr_10_vals, spxl_blr_25_vals], spxl_profits, spxs_profits, 0.6, 'BLR-10-25') # Print expected min profit when osc_val >= 60%. print_immediate_profit([spxl_blr_setup_vals], [ min(spxl_profits[i], spxs_profits[i]) for i in range(len(spxl_profits)) ], 0, '', 'oscillating... N/A') # Print killswitch floor when osc_val >= 60%. print_killswitch_floor([spxl_blr_setup_vals], [ max(spxl_floors[i], spxs_floors[i]) for i in range(len(spxl_floors)) ], 0, '', 'oscillating... N/A') except Exception as e: # live_env.warn_process(f'BLR Experiment error: {traceback.format_exc()}') continue
def run(self) -> None: # Set data parameters. start_date = date(year=2002, month=1, day=1) end_date = self.program.live_env.time().now().today() - timedelta( days=1) # Clone live environment so it can run on this thread. live_env = ExecEnv(self.program.logfeed_program, self.program.logfeed_program, self.program.live_env) live_env.fork_new_thread() data_collector = PolygonDataCollector(self.program.logfeed_program, self.program.logfeed_program, live_env.time()) # Clear the data file. filename = 'debug_data/spy_ai_data.txt' try: if not os.path.exists('debug_data'): os.mkdir('debug_data') with open(filename, 'w+') as file: file.write('') os.remove(filename) except Exception as e: print(f'Error deleting file: "{filename}"') pass # Go through the data we have on file. day_date = start_date - timedelta(days=1) while day_date < end_date: # Get the next market day. day_date = self.program.live_env.time().get_next_mkt_day(day_date) # Load price data. print(f'Fetching SPY data for {day_date:%m-%d-%Y}') day_data = live_env.mongo().load_symbol_day('SPY', day_date) # Get fresh data from polygon.io, if necessary. if not SymbolDay.validate_candles(day_data.candles): try: day_data = data_collector.collect_candles_for_day( day_date, 'SPY') except Exception as e: live_env.error_process( 'Error collecting polygon-rest data:') live_env.warn_process(traceback.format_exc()) # Validate the data. if day_data is None or not SymbolDay.validate_candles( day_data.candles): print( F'COULD NOT COMPILE PRICE DATA FOR SPY ON {day_date:%m-%d-%Y}' ) continue # Convert candles into sentences. # # Convert the data into json. data_dict = day_data.to_json() # Append the data to the txt file. with open(f'debug_data/spy_ai_data.txt', 'a+') as f: f.write(json.dumps(data_dict)) print(f'Dumped data to TC2_data/{filename}')
def run(self) -> HealthCheckResult: try: # Fetch candles from the previous market day symbol = 'SPY' day_date = self.time().get_prev_mkt_day() # noinspection PyTypeChecker polygon_collector = DebuggedPolygonDataCollector( None, None, self.time()) self.debug( f'Collecting {symbol} data from polygon for {day_date:%m-%d-%Y}' ) day_data = polygon_collector.collect_candles_for_day(day=day_date, symbol=symbol) self.debug_messages.extend(polygon_collector.debug_msgs) # Print timings self.set_passing(True) time_length = '--' if len(polygon_collector.timings_total) == 0 \ else f'{mean(polygon_collector.timings_total) / 60.0:.2}m' if mean(polygon_collector.timings_total) > 60 * 7: self.debug( 'POLYGON CHECK FAILED: It takes more than 7 minutes to fetch a day of data' ) self.debug(f'Avg total task time: {time_length}') time_length = '--' if len(polygon_collector.timings_basket) == 0 \ else f'{mean(polygon_collector.timings_basket):.2f}s' self.debug(f'Avg total basket handling time: {time_length}') time_length = '--' if len(polygon_collector.timings_fetch) == 0 \ else f'{mean(polygon_collector.timings_fetch):.2f}s' self.debug(f'Avg batch fetch time: {time_length}') time_length = '--' if len(polygon_collector.timings_parse) == 0 \ else f'{mean(polygon_collector.timings_parse):.2f}s' self.debug(f'Avg basket parse time: {time_length}') time_length = '--' if len(polygon_collector.timings_avg_block1) == 0 \ else f'{mean(polygon_collector.timings_avg_block1) * 1000:.2}ms' self.debug(f'Avg block1 logic time: {time_length}') time_length = '--' if len(polygon_collector.timings_avg_block2) == 0 \ else f'{mean(polygon_collector.timings_avg_block2) * 1000:.2}ms' self.debug(f'Avg block2 logic time: {time_length}') time_length = '--' if len(polygon_collector.timings_avg_block3) == 0 \ else f'{mean(polygon_collector.timings_avg_block3) * 1000:.2}ms' self.debug(f'Avg block3 logic time: {time_length}') # Check with production version of PolygonDataCollector self.debug( f'Now collecting the same data using the production data collector...' ) polygon_collector = PolygonDataCollector( logfeed_program=self.logfeed_process, logfeed_process=self.logfeed_process, time_env=self.time()) day_data = polygon_collector.collect_candles_for_day(day=day_date, symbol=symbol) # Pass the model_type if collected data is valid if day_data is None or not SymbolDay.validate_candles( day_data.candles, debug_output=self.debug_messages): self.debug('Polygon query returned invalid candles') self.set_passing(False) else: self.debug('Production data collector returned valid data') except Exception as e: self.debug('Error fetching polygon data: {}'.format(e.args)) self.set_passing(False) return self.make_result()