def _collect_and_train(self) -> None: """ Collects polygon-rest data and trains analysis models on it. """ self.info_process('\n\n') self.info_process( 'Performing daily data collection and model training...') for symbol in Settings.get_symbols(self): # Interrupt collection if the collection loop was stopped if not self._running: break # Revert data to last stable day. date_last_collected_for = self.time().now().date() # If it's past midnight, move back a day. if self.time().get_secs_to_open() < timedelta( hours=9, minutes=30).total_seconds(): date_last_collected_for -= timedelta(days=1) # Move back two market days from the most recent market day. date_last_collected_for = self.time().get_prev_mkt_day( date_last_collected_for) date_last_collected_for = self.time().get_prev_mkt_day( date_last_collected_for) # Remove mongo price data after the stable day. self.mongo().remove_price_data_after( symbol, date_last_collected_for, today=self.time().now().today()) date_rest_available_for = self.time().get_next_mkt_day( date_last_collected_for) # Collect yesterday's polygon-rest data and train on it. if self._train_on_rest_data(symbol, date_rest_available_for): self.info_process( f'Trained {symbol} on yesterday\'s polygon rest data') else: self.warn_process( f'Invalid {symbol} rest data collected for {date_rest_available_for}. ' f'Discarding them and attempting to use cached stream data instead' ) if self._train_on_stream_data(symbol, date_rest_available_for): self.info_process( f'Trained {symbol} on yesterday\'s polygon stream data' ) else: self.warn_process( f'Invalid {symbol} candles cached for {date_rest_available_for}. ' f'Could not find valid data to train on yesterday!') # Load today's polygon-stream data and train on it. date_cache_available_for = self.time().get_next_mkt_day( date_rest_available_for) if self._train_on_stream_data(symbol, date_cache_available_for): self.info_process( f'Trained {symbol} on today\'s polygon stream data') else: self.warn_process( f'Invalid {symbol} candles cached for {date_rest_available_for}. ' f'Could not find valid data to train on today!')
def score_symbols(self, symbols: List[str] = None) -> Dict[str, float]: """ Removes failing symbols and returns a map of viable symbols with their scores. :param symbols: defaults to all symbols """ # Load all symbols from settings. if symbols is None: symbols = Settings.get_symbols(self) # Have every model assign every symbol a grade. all_symbol_grades = {} for symbol in symbols: grades_for_symbol = [] for model_type, model_weight in self.scoring_system.model_weights.items( ): try: model_output = self.models[model_type].calculate_output( symbol) self._set_model_output(model_type, model_output, symbol) symbol_grade = self.models[model_type].grade_symbol( symbol, model_output) grades_for_symbol.append(symbol_grade) except Exception as e: self.error_process( 'Error calculating model output for {} on {}: {}'. format(model_type, symbol, traceback.format_exc())) all_symbol_grades[symbol] = grades_for_symbol # Convert categorical grades to numerical scores. return self.scoring_system.score_symbols(all_symbol_grades)
def load_pre_reqs(self) -> None: # Initialize log feeds. self.logfeed_data = LogFeed(LogCategory.DATA) self.logfeed_data.log(LogLevel.ERROR, '. ...PROGRAM RESTARTED...') self.logfeed_trading = LogFeed(LogCategory.LIVE_TRADING) self.logfeed_trading.log(LogLevel.ERROR, '. ...PROGRAM RESTARTED...') self.logfeed_optimization = LogFeed(LogCategory.OPTIMIZATION) self.logfeed_optimization.log(LogLevel.ERROR, ' ...PROGRAM RESTARTED...') self.logfeed_visuals = LogFeed(LogCategory.VISUALS) self.logfeed_visuals.log(LogLevel.ERROR, '. ...PROGRAM RESTARTED...') self.logfeed_api = LogFeed(LogCategory.API) self.logfeed_api.log(LogLevel.ERROR, '. ...PROGRAM RESTARTED...') # Create time environment for live data collection and trading. live_time_env = TimeEnv(datetime.now()) # Create database managers but don't initialize connections. live_redis = RedisManager(self.logfeed_program, EnvType.LIVE) live_mongo = MongoManager(self.logfeed_program, EnvType.LIVE) # Initialize collector manager to access polygon.io. live_data_collector = PolygonDataCollector( logfeed_program=self.logfeed_program, logfeed_process=self.logfeed_data, time_env=live_time_env) # Initialize the live execution environment with program logs. self.live_env = ExecEnv(logfeed_program=self.logfeed_program, logfeed_process=self.logfeed_program) # Setup the live execution environment with live time & data variables. self.live_env.setup_first_time(env_type=EnvType.LIVE, time=live_time_env, data_collector=live_data_collector, mongo=live_mongo, redis=live_redis) # Set Alpaca credentials as environment variables so we don't have to pass them around. live_trading = True if Settings.get_endpoint( self.live_env) == BrokerEndpoint.LIVE else False os.environ['APCA_API_BASE_URL'] = 'https://api.alpaca.markets' \ if live_trading else 'https://paper-api.alpaca.markets' os.environ['APCA_API_KEY_ID'] = self.live_env.get_setting('alpaca.live_key_id') \ if live_trading else self.live_env.get_setting('alpaca.paper_key_id') os.environ['APCA_API_SECRET_KEY'] = self.live_env.get_setting('alpaca.live_secret_key') \ if live_trading else self.live_env.get_setting('alpaca.paper_secret_key') os.environ['POLYGON_KEY_ID'] = self.live_env.get_setting( 'alpaca.live_key_id')
def init_health_checks(self) -> None: """ Schedules health checks (e.g. data checks, analysis model checks) to run at night. The user can also run checks manually using the webpanel. """ # Schedule health checks to run every night. self.health_checks_refresher = HealthChecksRefresher( logfeed_program=self.logfeed_program, logfeed_process=self.logfeed_program, symbols=Settings.get_symbols(self.live_env), live_time_env=self.live_env.time()) self.health_checks_refresher.start()
def init_visualization(self) -> None: """ Schedules visuals to update continuously. The user can also update visuals manually using the webpanel. """ # Schedule visuals to continuously update in the background. self.visuals_refresher = VisualsRefresher( logfeed_program=self.logfeed_program, logfeed_process=self.logfeed_visuals, symbols=Settings.get_symbols(self.live_env), live_time_env=self.live_env.time()) self.visuals_refresher.start()
def get_optimization_times( self) -> Dict[str, Dict[AbstractStrategy, datetime]]: """ :return: a dictionary like {'TXN': {CycleStrategy: datetime}, ...} """ symbols = {} for symbol in Settings.get_symbols(self): symbol_strategy_times = {} # Each key in the dictionary to return is a symbol name for strategy in self.strategies: # Get the last eval time for (symbol, strategy) symbol_strategy_times[strategy] = self.redis( ).get_optimization_time(symbol, strategy.__class__.__name__) # Each entry in the dictionary to return is a Dict[AbstractStrategy, datetime] symbols[symbol] = symbol_strategy_times return symbols
def create_data_points(self) -> List[NeuralExample]: examples = [] # Mix in examples from all symbols for symbol in Settings.get_symbols(self): dates = self.mongo().get_dates_on_file(symbol, START_DATE, self.time().now()) for day_date in dates: # Load symbol's candles on day_date day_data = self.mongo().load_symbol_day(symbol, day_date) # Ensure first 45 minutes of data is present first_45_candles = SymbolDay.get_ordered_candles(day_data.candles, FIRST_45_MINS) if len(first_45_candles) < 45 * MIN_CANDLES_PER_MIN or len(day_data.candles) < MIN_CANDLES_PER_DAY: continue # TODO Calculate minute-to-minute price and volume changes # TODO Classify rally strength on the day # TODO Create a data point return examples
def catch_up(): self.info_main( 'Trading and simulation disabled while checking for missing recent data...' ) catch_up_start_moment = pytime.monotonic() # Fork data_env for the new thread. catch_up_env = ExecEnv(self.logfeed_program, self.logfeed_data, creator_env=self.live_env) catch_up_env.fork_new_thread() catch_up_env.info_process( 'Performing catch-up task: checking for missing recent data') # Fork model feeder for the new thread. catch_up_model_feeder = ModelFeeder(catch_up_env) # Reset models and go back 31 days if missing [t-31, t-4]. # OR go back 4 days if only missing at most [t-4, t-1]. # Start at t-31 days. day_date = catch_up_env.time().now().date() while not catch_up_env.time().is_mkt_day(day_date): day_date = catch_up_env.time().get_prev_mkt_day(day_date) for _ in range(warm_up_days + catch_up_days + 1): day_date = catch_up_env.time().get_prev_mkt_day(day_date) # Check that each day [t-31, t-4] has valid data. symbols_reset = [] for _ in range(warm_up_days): # Check the next day. day_date = catch_up_env.time().get_next_mkt_day(day_date) for symbol in Settings.get_symbols(catch_up_env): # Only check the symbol if it hasn't been reset. if symbol in symbols_reset: continue # Load the day's data and validate it. day_data = catch_up_env.mongo().load_symbol_day( symbol, day_date) if not SymbolDay.validate_candles(day_data.candles): catch_up_env.info_process( '{} missing price data on {}. Resetting its model data' .format(symbol, day_date)) catch_up_model_feeder.reset_models([symbol]) symbols_reset.append(symbol) # Go back to the latest potential missing day. day_date = catch_up_env.time().now().date() while not catch_up_env.time().is_mkt_day(day_date): day_date = catch_up_env.time().get_prev_mkt_day(day_date) for _ in range(warm_up_days + catch_up_days + 1 if len(symbols_reset) != 0 else catch_up_days + 1): day_date = catch_up_env.time().get_prev_mkt_day(day_date) # Use price data to train models. for _ in range(warm_up_days + catch_up_days if len(symbols_reset) != 0 else catch_up_days): # Go through each reset symbol. for symbol in symbols_reset: # Load mongo price data if present. start_instant = pytime.monotonic() day_data = catch_up_env.mongo().load_symbol_day( symbol, day_date) # Collect polygon-rest price data if necessary. if not SymbolDay.validate_candles(day_data.candles): try: day_data = catch_up_env.data_collector( ).collect_candles_for_day(day_date, symbol) except Exception as e: catch_up_env.error_process( 'Error collecting polygon-rest data:') catch_up_env.warn_process(traceback.format_exc()) collection_time = pytime.monotonic() - start_instant # Validate data. validation_debugger = [] if day_data is not None and SymbolDay.validate_candles( day_data.candles, debug_output=validation_debugger): # Save data catch_up_env.redis().reset_day_difficulty( symbol, day_date) catch_up_env.mongo().save_symbol_day(day_data) # Use data to train models for symbol on day. start_instant = pytime.monotonic() catch_up_model_feeder.train_models(symbol=symbol, day_date=day_date, day_data=day_data, stable=True) train_time = pytime.monotonic() - start_instant catch_up_env.info_process( f'Catch-up for {symbol} on {day_date:%m-%d-%Y}: collection took ' f'{collection_time:.2f}s; training took {train_time:.2f}s' ) else: catch_up_env.redis().incr_day_difficulty( symbol, day_date) catch_up_env.warn_process( f'Couldn\'t collect catch-up data for {symbol} on {day_date}: ' f'{"null" if day_date is None else len(day_data.candles)} candles' ) catch_up_env.warn_process( '\n'.join(validation_debugger)) # Move to the next day. day_date = catch_up_env.time().get_next_mkt_day(day_date) # Determine whether or not we have yesterday's cached data for at least one symbol. unstable_data_present = False while not catch_up_env.time().is_mkt_day(day_date): day_date = catch_up_env.time().get_prev_mkt_day(day_date) for symbol in Settings.get_symbols(catch_up_env): unstable_data = catch_up_env.redis().get_cached_candles( symbol, day_date) if unstable_data is not None and SymbolDay.validate_candles( unstable_data): unstable_data_present = True break if unstable_data_present: msg = f'Valid cached redis data on {day_date:%B %d} found. ' \ f'Models and strategies should function normally' catch_up_env.info_main(msg) catch_up_env.info_process(msg) else: msg = f'No valid redis data cached on {day_date:%b %d}. Models that need yesterday\'s data will ' \ f'fail, causing some strategies to fail.' catch_up_env.warn_main(msg) catch_up_env.warn_process(msg) # Allow processes to resume now that data_collector is not busy. catch_up_env.mark_data_as_loaded() msg = f'Trading and strategy optimization enabled (catch up task took ' \ f'{(pytime.monotonic() - catch_up_start_moment) / 3600:.2f} hrs)' catch_up_env.info_main(msg) catch_up_env.info_process(msg)
def init_account_data_streams(self) -> None: AccountDataStream.connect_to_streams(symbols=Settings.get_symbols( self.live_env), logfeed_data=self.logfeed_data)
def max_purchase_usd(self) -> float: """ Returns the maximum percentage of the account's balance to use on the strategy. """ return Settings.get_strategy_max_purchase_usd(self, self.get_id())
def start_collection_loop( self, livestream_updates: 'multiprocessing list') -> None: """ Continuously collects live data while markets are open. Collects polygon-rest data one hour after markets close. Trains analysis models on new data. """ assert not self._running, 'Tried to start collection loop twice!' self._running = True # Fork the execution environment so it can run in this thread. self.fork_new_thread() # Create a ModelFeeder to train models. self.model_feeder = ModelFeeder(self) # Hook into polygon's live data stream. acct = AlpacaAccount(env=self, logfeed_trading=self.logfeed_process, livestream_updates=livestream_updates) # Wait for missing historical data to be collected at startup. while not self.is_data_loaded(): pytime.sleep(1) # Calculate the next collection time. collection_time = self.next_collection_time(is_first_collection=True) # Start data collection loop. while self._running: # Wait 3 seconds between loops. pytime.sleep(3) # Cache live price data in redis. try: update = acct.get_next_trading_update( Settings.get_symbols(self)) updates_processed = 0 while update is not None: if update.update_type is StreamUpdateType.CANDLE: # self.info_process(f'Caching {update.raw_data["symbol"]} candle in redis') self._cache_candle(symbol=update.raw_data['symbol'], candle=update.get_candle()) updates_processed += 1 update = acct.get_next_trading_update( Settings.get_symbols(self)) if updates_processed > 500: self.warn_process( f'DailyCollector processed {updates_processed} updates at once' ) except Exception as e: self.error_process('Error processing polygon live data:') self.warn_process(traceback.format_exc()) # Collect data on all symbols and train analysis models. if self.time().now() > collection_time: self._collect_and_train() # Don't collect again until tomorrow. collection_time = self.next_collection_time( is_first_collection=False) # Display a message when the collection loop is stopped. self.info_process('DailyCollector collection loop stopped')
def heartbeat(self) -> None: """ The live trading logic to run on a continuous loop. """ if not self.day_trader and self.strategy is None: # TODO Continue executing swing strategy from yesterday pass if self.day_trader and self.strategy is None: # Dump order and positions associated with day trading. # TODO self._cancel_orders_and_positions() pass # Wait for user to enable live trading. if (self.day_trader and not DAY_TRADING_ENABLED) or (not self.day_trader and not SWING_TRADING_ENABLED): pytime.sleep(5) if random.randint(0, 100) < 10: self.info_process(f'{self._pref()} waiting for user to enable ' f'{"day" if self.day_trader else "swing"} trading') return # Wait while historical data is still being fetched. if not self.is_data_loaded(): pytime.sleep(2) if random.randint(0, 100) < 15: self.info_process(f'{self._pref()} waiting for historical data to load') return # Wait if there's nothing to monitor or start. if self.strategy is None and not self.time().is_open(): pytime.sleep(5) if random.randint(0, 100) < 5: self.debug_process(f'{self._pref()} waiting for markets to open') return # Ensure day trading is allowed by the brokerage account. if self.day_trader and not self.account.can_day_trade(): self.warn_process('Cannot execute strategies live. Made too many day trades') pytime.sleep(60 * 10) return # Find strategies that run during this time. all_strategies = create_day_strategies(self) if self.day_trader else create_swing_strategies(self) all_strategies = [strategy for strategy in all_strategies if not strategy.is_experimental()] available_strategies = [] current_time = self.time().now().time() for strategy in all_strategies: if strategy.times_active().contains_time(current_time) \ and strategy.times_active().will_contain_for(current_time).total_seconds() > 30: available_strategies.append(strategy) if len(available_strategies) == 0: self.debug_process(f'No {"day" if self.day_trader else "swing"} strategies allowed to run now') pytime.sleep(30) return else: self.info_process(f'{"Day" if self.day_trader else "Swing"} strategies ' f'allowed to run now (ordered by priority): ' f'{", ".join([strat.get_id() for strat in available_strategies])}') # Find all viable (symbol, strategy) pairs. viable_pairs = [] try: for strategy in available_strategies: # Filter out symbols that fail the strategy's viability tests. # self.debug_process(f'scoring symbols for {strategy.get_id()}') viable_symbols = strategy.score_symbols() # self.debug_process(f'scored symbols for {strategy.get_id()}') for symbol in viable_symbols: viable_pairs.append(SymbolStrategyPair(symbol, strategy)) # self.debug_process(f'added viable symbols for {strategy.get_id()}') # Log viable symbols for this strategy. self.info_process('Viable symbols for {}: {}'.format( strategy.get_id(), viable_symbols if len(viable_symbols) > 0 else 'none' )) except Exception as e: self.error_process(f'Could not check strategy viability: {traceback.format_exc()}') # Stop trying to trade if there are no viable symbols for any available strategy. if len(viable_pairs) == 0: self.debug_process(f'No {"day" if self.day_trader else "swing"} strategies viable now') pytime.sleep(30) return # Select the (symbol, strategy) pair that has performed best in the past. best_pair = self.choose_best_viable_pair(viable_pairs) strategy = best_pair.strategy strategy.set_symbols([best_pair.symbol]) if strategy.get_id().lower().__contains__('longshortstrategy'): self.warn_process(f'Resetting account: clearing orders, positions, and cache') strategy.set_symbols(['SPY', 'SPXL', 'SPXS']) pytime.sleep(3) self.account.cancel_open_orders(['SPXL', 'SPXS']) pytime.sleep(3) self.account.liquidate_positions(['SPXL', 'SPXS']) pytime.sleep(30) self.account.refresh_open_orders(['SPXL', 'SPXS']) pytime.sleep(5) self.account.refresh_positions(['SPXL', 'SPXS']) pytime.sleep(5) symbols_held = [position.symbol for position in self.account.get_positions()] self.warn_process(f'Symbols held: {", ".join(symbols_held)}') if any(symbol in symbols_held for symbol in ['SPXS', 'SPXL']): self.warn_process(f'SPXL or SPXS positions still held. Can\'t start LongShortStrategy!') return # Execute the strategy. self.account.stream_queue = StreamUpdateQueue() self.info_process(f'{self._pref()} executing {strategy.get_id()} on ' f'{self.time().now():%Y-%m-%d} at {self.time().now():%H:%M:%S}') runner = StrategyRunner(strategy) live_run = runner.run(self.account) self.info_process(f'{self._pref()} finished executing {strategy.get_id()} on ' f'{self.time().now():%Y-%m-%d} at {self.time().now():%H:%M:%S}') # Save strategy run data in redis. self.redis().record_live_run(strategy_id=strategy.get_id(), run=live_run, endpoint=Settings.get_endpoint(self)) # Before executing the next strategy, wait for account balance to update. pytime.sleep(2)
def choose_best_viable_pair(self, viable_pairs: List[SymbolStrategyPair]) -> SymbolStrategyPair: """ Selects the most promising symbol, balancing past profits, entry ratio, and volume of run data. """ # Use a points system to rank pairs. scores: Dict[SymbolStrategyPair, float] = {} for pair in viable_pairs: scores[pair] = 0 # Load strategy history for this pair. strategy_runs = self.redis().get_live_run_history( strategies=[pair.strategy.get_id()], paper=True if Settings.get_endpoint(self) == BrokerEndpoint.PAPER else False, symbols=[pair.symbol] ) finished_runs = [run for run in strategy_runs if sum([sum(symbol_run.sell_prices) for symbol_run in run.symbol_runs]) > 0] # Reward symbol score for previous profitable runs. # FORMULA: 2(log(x) + 2) (i.e. avg_profit in [0.01, 2] -> points in [0, 4.5]). positive_profits = [] # Iterate through each historical trade. for run in finished_runs: for symbol_run in run.symbol_runs: for i in range(len(symbol_run.times_bought)): # Ensure we have records of buy/sell/qties for this index (i.e. the trade was completed). if i >= len(symbol_run.qties_traded) or i >= len(symbol_run.times_sold): continue # Determine whether this was a long or short trade. long_order = symbol_run.qties_traded[i] > 0 # Calculate profit of previous long trade. if long_order and symbol_run.buy_prices[i] < symbol_run.sell_prices[i]: positive_profits.append( (symbol_run.sell_prices[i] - symbol_run.buy_prices[i]) / symbol_run.buy_prices[i]) # Calculate profit of previous short trade. elif not long_order and symbol_run.sell_prices[i] > symbol_run.buy_prices[i]: positive_profits.append( (symbol_run.buy_prices[i] - symbol_run.sell_prices[i]) / symbol_run.sell_prices[i]) # Add to symbol score based on average historical profit percent. avg_positive_profit = 0 if len(positive_profits) == 0 else \ mean(positive_profits) / len(positive_profits) x = min(2.0, max(0.01, avg_positive_profit)) scores[pair] += max(0.0, 2 * (math.log(x, 10) + 2)) # Penalize symbol score for history of losses. # Formula: -4(log(x) + 2) (i.e. avg_loss in [0.01, 2] -> points in [0, 9]). negative_profits = [] # Iterate through each historical trade. for run in finished_runs: for symbol_run in run.symbol_runs: for i in range(len(symbol_run.times_bought)): # Ensure we have records of buy/sell/qties for this index (i.e. the trade was completed). if i >= len(symbol_run.qties_traded) or i >= len(symbol_run.times_sold): continue # Determine whether this was a long or short trade. long_order = symbol_run.qties_traded[i] > 0 # Calculate loss of previous long trade. if long_order and symbol_run.buy_prices[i] > symbol_run.sell_prices[i]: negative_profits.append( (symbol_run.buy_prices[i] - symbol_run.sell_prices[i]) / symbol_run.buy_prices[i]) # Calculate loss of previous short trade. elif not long_order and symbol_run.sell_prices[i] < symbol_run.buy_prices[i]: negative_profits.append( (symbol_run.sell_prices[i] - symbol_run.buy_prices[i]) / symbol_run.sell_prices[i]) # Subtract from symbol score based on average historical loss percent. avg_loss = 0 if len(negative_profits) == 0 else \ sum(negative_profits) / len(negative_profits) x = min(2.0, max(0.01, avg_loss)) scores[pair] -= max(0.0, 4 * (math.log(x, 10) + 2)) # Penalize symbol score for low entry ratio. # FORMULA: -45log(x) + 0.5 (i.e. entry_ratio in [0.05, 1] -> points in [55, 0]). entry_ratio = 0 if len(finished_runs) == 0 \ else len(finished_runs) / len(strategy_runs) x = max(0.05, entry_ratio) scores[pair] -= max(0.0, -45 * (math.log(x, 10) + 0.5)) # Randomly penalize symbol score for lack of run history. # FORMULA: -7log(6x) + 16 (i.e. runs_on_file in [1, 25] -> points in [10, 1]). if random.randint(1, 100) < 75: x = max(1, len(strategy_runs)) scores[pair] -= max(0.0, -7 * (math.log(x, 10) + 16)) self.info_process(f'{self._pref()} <symbol, strategy> scores before normalizing: ' f'{[str(pair) + ": " + str(score) for pair, score in scores.items()]}') # Sort symbols ascending by score ascending_symbols = sorted(list(scores.keys()), key=lambda pair: scores[pair]) # Normalize scores so that their range is [0,100] lowest_score = scores[ascending_symbols[0]] highest_score = max(1.0, scores[ascending_symbols[-1]]) + abs(lowest_score) for pair in scores.keys(): scores[pair] += abs(lowest_score) scores[pair] = 100 * scores[pair] / highest_score self.info_process(f'{self._pref()} <symbol, strategy> scores after normalizing: ' f'{[str(pair) + ": " + str(score) for pair, score in scores.items()]}') # Add more copies of symbols with higher scores (increase their chance of being chosen from the list) symbol_lottery = [] for pair, score in scores.items(): symbol_lottery.append(pair) for i in range(1, int(score) + 1): symbol_lottery.append(pair) self.info_process(f'{self._pref()} <symbol, strategy> lottery after normalizing: ' f'{[str(pair) for pair in symbol_lottery]}') return random.choice(symbol_lottery)