Ejemplo n.º 1
0
    def run(self) -> None:
        # Set symbol and date we need data for.
        symbols = ['SPY', 'SPXL', 'SPXS']
        start_date = date(year=2020, month=4, day=1)
        days_to_dump = 5

        # Clone live environment so it can run on this thread.
        live_env = ExecEnv(self.program.logfeed_program,
                           self.program.logfeed_program, self.program.live_env)
        live_env.fork_new_thread()
        data_collector = PolygonDataCollector(self.program.logfeed_program,
                                              self.program.logfeed_program,
                                              live_env.time())

        # Go through each symbol.
        for symbol in symbols:

            # Go through the first 5 market days starting with start_date.
            day_date = start_date - timedelta(days=1)
            for i in range(days_to_dump):

                # Get the next market day.
                day_date = live_env.time().get_next_mkt_day(day_date)

                # Load price data.
                print(f'Fetching {symbol} data for {day_date:%m-%d-%Y}')
                day_data = live_env.mongo().load_symbol_day(symbol, day_date)

                # Get fresh data from polygon.io, if necessary.
                if not SymbolDay.validate_candles(day_data.candles):
                    try:
                        day_data = data_collector.collect_candles_for_day(
                            day_date, symbol)
                    except Exception as e:
                        live_env.error_process(
                            'Error collecting polygon-rest data:')
                        live_env.warn_process(traceback.format_exc())

                # Validate the data.
                if day_data is None or not SymbolDay.validate_candles(
                        day_data.candles):
                    print(
                        F'COULD NOT COMPILE DEBUG PRICE DATA FOR {symbol} ON {day_date:%m-%d-%Y}'
                    )
                    continue

                # Convert the data into json.
                data_dict = day_data.to_json()

                # Dump the data into a text file.
                if not os.path.exists('debug_data'):
                    os.mkdir('debug_data')
                with open(f'debug_data/{symbol}_{day_date:%m-%d-%Y}.txt',
                          'w+') as f:
                    f.write(json.dumps(data_dict))
                print(
                    f'Dumped data to TC2_data/debug_data/{symbol}_{day_date:%m-%d-%Y}'
                )
Ejemplo n.º 2
0
    def _train_on_rest_data(self, symbol: str, day_date: date) -> bool:
        """
        Collects polygon-rest data and trains the symbol on it.
        Returns False if the collected data is invalid; True otherwise.
        """
        # Collect polygon-rest data
        rest_data = None
        try:
            rest_data = self.data_collector().collect_candles_for_day(
                day_date, symbol)
        except Exception as e:
            self.error_process('Error collecting polygon-rest data:')
            self.warn_process(traceback.format_exc())

        # Validate polygon-rest data
        if rest_data is None or not SymbolDay.validate_candles(
                rest_data.candles):
            self.redis().reset_day_difficulty(symbol, day_date)
            self.redis().incr_day_difficulty(symbol, day_date)
            return False

        # Save polygon-rest data
        self.redis().reset_day_difficulty(symbol, rest_data.day_date)
        self.mongo().save_symbol_day(rest_data)

        # Train models on polygon-rest data
        self.model_feeder.train_models(symbol=symbol,
                                       day_date=day_date,
                                       day_data=rest_data,
                                       stable=True)
        return True
Ejemplo n.º 3
0
    def calculate_output(self, symbol: str) -> OUTPUT_TYPE:
        """
        Returns a number between -1 and 1, indicating how strongly the S&P-500 is trending
            and in which direction.
        """

        # Do not allow symbols to execute LongShortStrategy except SPXS and SPXL.
        if symbol not in ['SPXL', 'SPXS']:
            return LongShortFavor.NOT_APPLICABLE

        # Fetch data during the period.
        spxl_candles = self.get_latest_candles('SPXL', 30)
        spxs_candles = self.get_latest_candles('SPXS', 30)

        # Validate data.
        debugger = []
        for candles in [spxl_candles, spxs_candles]:
            if not SymbolDay.validate_candles(
                    candles, min_minutes=int(29), debug_output=debugger):
                debug_str = '\n\t'.join(debugger)
                self.debug_process(f'LSFavor invalid data')
                raise ValueError('LSFavorModel could not fetch valid data')

        # self.debug_process(f'LSFavor using candles to get favor val')
        return self.get_favor_val(self.time().now(), spxl_candles,
                                  spxs_candles)
Ejemplo n.º 4
0
    def _train_on_stream_data(self, symbol: str, day_date: date) -> bool:
        """
        Loads cached polygon-stream data and trains the symbol on it.
        Returns False if the data is invalid; True otherwise.
        """
        # Validate polygon-stream data
        stream_data = SymbolDay(symbol=symbol,
                                day_date=day_date,
                                candles=self.redis().get_cached_candles(
                                    symbol=symbol, day_date=day_date))
        if not SymbolDay.validate_candles(stream_data.candles):
            return False

        # Train models on polygon-stream data
        self.model_feeder.train_models(symbol=symbol,
                                       day_date=day_date,
                                       day_data=stream_data,
                                       stable=True)
        return True
Ejemplo n.º 5
0
 def _load_period_data(self, output: Breakout1ModelOutput,
                       symbol: str) -> bool:
     output.symbol = symbol
     output.period_data = self.get_latest_candles(
         symbol=symbol, minutes=breakout1_constants.BREAKOUT_SETUP_MINS)
     if not SymbolDay.validate_candles(
             output.period_data,
             min_minutes=breakout1_constants.BREAKOUT_SETUP_MINS):
         output.add_step(passed=False,
                         value='insufficient data for the period',
                         step_id=Breakout1ModelSteps.INITIALIZATION)
         return False
     return True
Ejemplo n.º 6
0
    def feed_model(self, day_data: SymbolDay) -> None:
        """
        Calculates the strongest dip during the first 45 minutes of CycleStrategy's typical run window.
        i.e. it predicts the worst dip that should be expected within 45 minutes of buying the symbol.
        """
        # Find price at minute 60
        start_candle: Candle = day_data.get_candle_at_sec(
            datetime.combine(day_data.day_date, time(hour=10, minute=30)))
        if start_candle is None:
            self.warn_process(
                "Couldn't update dip_45 analysis_model for CycleStrategy. Bad data at minute 60."
            )
            return

            # Find lowest price within 45 minutes after minute 60
        start_time = datetime.combine(day_data.day_date,
                                      time(hour=10, minute=30))
        end_time = datetime.combine(day_data.day_date, time(
            hour=10, minute=30)) + timedelta(minutes=45)
        lowest_candle: Candle = Candle(start_candle.moment, start_candle.open,
                                       start_candle.high, start_candle.low,
                                       start_candle.close, start_candle.volume)
        for candle in day_data.candles:
            if candle.low < lowest_candle.low and start_time < candle.moment < end_time:
                lowest_candle = candle
                lowest_candle = lowest_candle

        # Calculate the greatest downward price change as a percentage
        strongest_dip_pct = 100.0 * max(
            0.0, start_candle.low - lowest_candle.low) / start_candle.low

        # Load the current running sum
        current_sum = self.redis().get_analysis_rolling_sum(
            day_data.symbol, self.model_type)

        # Skip days that don't dip since this model is only interested in forecasting dips
        if strongest_dip_pct == 0:
            output = current_sum

        # Merge this day into the running sum
        else:
            output = RollingSumFormulas.combine(
                current_sum, strongest_dip_pct,
                RollingSumFormulas.get_30_day_weight())

        # Save model's output
        self.save_output(symbol=day_data.symbol,
                         raw_output=output,
                         day_date=day_data.day_date)
    def collect_candles_for_day(self, day: date, symbol: str) -> Optional[SymbolDay]:
        """
        Uses Polygon to collect candles for the given day.
        Does NOT save the newly-collected candles.
        """
        candles = None
        try:
            candles = self._parse_ticks_in_intervals(symbol, [[
                datetime.combine(day, OPEN_TIME),
                datetime.combine(day, CLOSE_TIME)
            ]])
        except Exception as e:
            self.debug_msgs.append(f'Error collecting {symbol} candles from polygon for {day:%m-%d-%Y}:')
            self.debug_msgs.append(traceback.format_exc())

        return SymbolDay(symbol, day, candles)
Ejemplo n.º 8
0
 def create_dummy_day(self, symbol: str, day_date: date,
                      num_candles: int) -> SymbolDay:
     """Creates a SymbolDay with mock price data."""
     dummy_candles = []
     dummy_moment = datetime.combine(day_date, OPEN_TIME)
     for i in range(num_candles):
         dummy_candles.append(
             Candle(
                 moment=dummy_moment,
                 open=0.001,
                 high=0.001,
                 low=0.001,
                 close=0.001,
                 volume=999,
             ))
         dummy_moment += timedelta(seconds=1)
     return SymbolDay(symbol, day_date, dummy_candles)
Ejemplo n.º 9
0
def init_simulation_data(live_env: 'ExecEnv',
                         sim_env: 'ExecEnv',
                         symbols: List[str],
                         days: int,
                         end_date: date,
                         model_feeder: 'ModelFeeder',
                         skip_last_day_training: bool = False) -> Optional[str]:
    """
    WARNING: this will change the time of the simulated environment, sim_env.

    :param days: the number of market days to fill with data before (including) end_date
    :param skip_last_day_training: whether or not to skip training analysis models on end_date
    Copies live data into the simulation environment and trains analysis models.
    Returns None if successful. Otherwise the error message is returned.
    """

    # Go back n days from end_date.
    day_date = end_date
    for i in range(days):
        day_date = live_env.time().get_prev_mkt_day(day_date)

    # Copy data for each day into simulation environment and train models.
    for i in range(days + 1):
        for symbol in symbols:
            # Load data from live environment.
            day_data = live_env.mongo().load_symbol_day(symbol=symbol, day=day_date)

            # Validate data.
            if not SymbolDay.validate_candles(day_data.candles):
                return f'Couldn\'t set up {days}-day simulation environment for {symbol} ending at ' \
                       f'{end_date:%Y-%m-%d}. Data missing on {day_date:%Y-%m-%d}'

            # Copy data into the simulated environment.
            sim_env.mongo().save_symbol_day(day_data)

            # Train models.
            if day_date != end_date or not skip_last_day_training:
                model_feeder.train_models(symbol=symbol,
                                          day_date=day_date,
                                          day_data=day_data,
                                          stable=True)

        # Move to the next day.
        day_date = live_env.time().get_next_mkt_day(day_date)
Ejemplo n.º 10
0
    def _check_ema_volume_excitement(self,
                                     output: Breakout1ModelOutput) -> bool:
        ema_volumes = [ema(output.minute_volumes)]
        prev_date = self.time().now().date()
        for i in range(7):
            # Load the previous day's data
            prev_date = self.time().get_prev_mkt_day(prev_date)
            day_data = self.mongo().load_symbol_day(output.symbol, prev_date)
            if not SymbolDay.validate_candles(day_data.candles):
                output.steps.append(
                    ModelStep(
                        passed=False,
                        value=
                        f'missing needed data on {prev_date.strftime(DATE_FORMAT)}',
                        step_id=Breakout1ModelSteps.EMA_MINUTE_VOLUME))
                return False

            # Aggregate the day's second-resolution candles into minute resolution
            prev_period_start = datetime.combine(prev_date,
                                                 output.period.start_time)
            prev_period_end = datetime.combine(prev_date,
                                               output.period.end_time)
            prev_second_candles = [
                candle for candle in day_data.candles
                if prev_period_start <= candle.moment <= prev_period_end
            ]
            prev_minute_candles = aggregate_minute_candles(prev_second_candles)

            # Calculate the previous day's moving-average volume
            ema_volumes.append(
                ema([candle.volume for candle in prev_minute_candles]))

        # Calculate the median ema volume of the same period on each of the past 7 days
        med_ema_volume_prev_7_periods = median(ema_volumes)
        # Calculate the ema volume of this period today
        ema_minute_volume = ema(output.minute_volumes)
        # Perform next step: ema minute volume check
        output.steps.append(
            output.check_ema_minute_volume(ema_minute_volume,
                                           med_ema_volume_prev_7_periods))
        return output.steps[-1].passed
Ejemplo n.º 11
0
    def create_data_points(self) -> List[NeuralExample]:
        examples = []

        # Mix in examples from all symbols
        for symbol in Settings.get_symbols(self):
            dates = self.mongo().get_dates_on_file(symbol, START_DATE, self.time().now())
            for day_date in dates:
                # Load symbol's candles on day_date
                day_data = self.mongo().load_symbol_day(symbol, day_date)

                # Ensure first 45 minutes of data is present
                first_45_candles = SymbolDay.get_ordered_candles(day_data.candles, FIRST_45_MINS)
                if len(first_45_candles) < 45 * MIN_CANDLES_PER_MIN or len(day_data.candles) < MIN_CANDLES_PER_DAY:
                    continue

                # TODO Calculate minute-to-minute price and volume changes

                # TODO Classify rally strength on the day

                # TODO Create a data point

        return examples
Ejemplo n.º 12
0
    def calculate_output(self, symbol: str) -> OUTPUT_TYPE:
        """
        Returns True if the symbol's 12-hour high falls in the upper 4% of its 75-day range, False otherwise.
        """

        # Fetch latest 12 hours of data
        recent_candles = self.get_latest_candles(symbol, 60 * 12)

        # Validate 12-hour data
        if not SymbolDay.validate_candles(recent_candles, min_minutes=60 * 12):
            self.error_process(
                'High96PctModel candles ({}): {}'.format(len(recent_candles),
                                                         [candle.open for candle in recent_candles][0:3]))
            raise ValueError('High96PctModel loaded invalid recent data')

        # Fetch 75-day data
        daily_candles_75 = []
        day_date = self.time().now()
        for i in range(75):
            day_date = self.time().get_prev_mkt_day(day_date)
            daily_candle = self.mongo().load_aggregate_candle(symbol, day_date)
            if daily_candle is None:
                raise ValueError('High96PctModel couldn\'t perform its check because we '
                                 'don\'t have an aggregate candle for {}'.format(day_date.strftime(DATE_FORMAT)))
            daily_candles_75.append(daily_candle)

        # Compute 75-day price range
        low_75 = min([candle.low for candle in daily_candles_75])
        high_75 = max([candle.high for candle in daily_candles_75])

        # Check whether 12-hour high falls in upper 4% of 75-day range
        min_price_required = low_75 + (0.96 * (high_75 - low_75))
        if max([candle.high for candle in recent_candles]) >= min_price_required:
            return True
        else:
            return False
Ejemplo n.º 13
0
        def catch_up():
            self.info_main(
                'Trading and simulation disabled while checking for missing recent data...'
            )
            catch_up_start_moment = pytime.monotonic()

            # Fork data_env for the new thread.
            catch_up_env = ExecEnv(self.logfeed_program,
                                   self.logfeed_data,
                                   creator_env=self.live_env)
            catch_up_env.fork_new_thread()
            catch_up_env.info_process(
                'Performing catch-up task: checking for missing recent data')

            # Fork model feeder for the new thread.
            catch_up_model_feeder = ModelFeeder(catch_up_env)

            # Reset models and go back 31 days if missing [t-31, t-4].
            # OR go back 4 days if only missing at most [t-4, t-1].

            # Start at t-31 days.
            day_date = catch_up_env.time().now().date()
            while not catch_up_env.time().is_mkt_day(day_date):
                day_date = catch_up_env.time().get_prev_mkt_day(day_date)
            for _ in range(warm_up_days + catch_up_days + 1):
                day_date = catch_up_env.time().get_prev_mkt_day(day_date)

            # Check that each day [t-31, t-4] has valid data.
            symbols_reset = []
            for _ in range(warm_up_days):
                # Check the next day.
                day_date = catch_up_env.time().get_next_mkt_day(day_date)

                for symbol in Settings.get_symbols(catch_up_env):
                    # Only check the symbol if it hasn't been reset.
                    if symbol in symbols_reset:
                        continue

                    # Load the day's data and validate it.
                    day_data = catch_up_env.mongo().load_symbol_day(
                        symbol, day_date)
                    if not SymbolDay.validate_candles(day_data.candles):
                        catch_up_env.info_process(
                            '{} missing price data on {}. Resetting its model data'
                            .format(symbol, day_date))
                        catch_up_model_feeder.reset_models([symbol])
                        symbols_reset.append(symbol)

            # Go back to the latest potential missing day.
            day_date = catch_up_env.time().now().date()
            while not catch_up_env.time().is_mkt_day(day_date):
                day_date = catch_up_env.time().get_prev_mkt_day(day_date)
            for _ in range(warm_up_days + catch_up_days +
                           1 if len(symbols_reset) != 0 else catch_up_days +
                           1):
                day_date = catch_up_env.time().get_prev_mkt_day(day_date)

            # Use price data to train models.
            for _ in range(warm_up_days + catch_up_days
                           if len(symbols_reset) != 0 else catch_up_days):

                # Go through each reset symbol.
                for symbol in symbols_reset:

                    # Load mongo price data if present.
                    start_instant = pytime.monotonic()
                    day_data = catch_up_env.mongo().load_symbol_day(
                        symbol, day_date)

                    # Collect polygon-rest price data if necessary.
                    if not SymbolDay.validate_candles(day_data.candles):
                        try:
                            day_data = catch_up_env.data_collector(
                            ).collect_candles_for_day(day_date, symbol)
                        except Exception as e:
                            catch_up_env.error_process(
                                'Error collecting polygon-rest data:')
                            catch_up_env.warn_process(traceback.format_exc())
                    collection_time = pytime.monotonic() - start_instant

                    # Validate data.
                    validation_debugger = []
                    if day_data is not None and SymbolDay.validate_candles(
                            day_data.candles,
                            debug_output=validation_debugger):
                        # Save data
                        catch_up_env.redis().reset_day_difficulty(
                            symbol, day_date)
                        catch_up_env.mongo().save_symbol_day(day_data)

                        # Use data to train models for symbol on day.
                        start_instant = pytime.monotonic()
                        catch_up_model_feeder.train_models(symbol=symbol,
                                                           day_date=day_date,
                                                           day_data=day_data,
                                                           stable=True)
                        train_time = pytime.monotonic() - start_instant
                        catch_up_env.info_process(
                            f'Catch-up for {symbol} on {day_date:%m-%d-%Y}: collection took '
                            f'{collection_time:.2f}s;  training took {train_time:.2f}s'
                        )
                    else:
                        catch_up_env.redis().incr_day_difficulty(
                            symbol, day_date)
                        catch_up_env.warn_process(
                            f'Couldn\'t collect catch-up data for {symbol} on {day_date}: '
                            f'{"null" if day_date is None else len(day_data.candles)} candles'
                        )
                        catch_up_env.warn_process(
                            '\n'.join(validation_debugger))

                # Move to the next day.
                day_date = catch_up_env.time().get_next_mkt_day(day_date)

            # Determine whether or not we have yesterday's cached data for at least one symbol.
            unstable_data_present = False
            while not catch_up_env.time().is_mkt_day(day_date):
                day_date = catch_up_env.time().get_prev_mkt_day(day_date)
            for symbol in Settings.get_symbols(catch_up_env):
                unstable_data = catch_up_env.redis().get_cached_candles(
                    symbol, day_date)
                if unstable_data is not None and SymbolDay.validate_candles(
                        unstable_data):
                    unstable_data_present = True
                    break

            if unstable_data_present:
                msg = f'Valid cached redis data on {day_date:%B %d} found. ' \
                      f'Models and strategies should function normally'
                catch_up_env.info_main(msg)
                catch_up_env.info_process(msg)
            else:
                msg = f'No valid redis data cached on {day_date:%b %d}. Models that need yesterday\'s data will ' \
                      f'fail, causing some strategies to fail.'
                catch_up_env.warn_main(msg)
                catch_up_env.warn_process(msg)

            # Allow processes to resume now that data_collector is not busy.
            catch_up_env.mark_data_as_loaded()
            msg = f'Trading and strategy optimization enabled (catch up task took ' \
                  f'{(pytime.monotonic() - catch_up_start_moment) / 3600:.2f} hrs)'
            catch_up_env.info_main(msg)
            catch_up_env.info_process(msg)
Ejemplo n.º 14
0
def find_mins_maxs(trendline_candles: List[Candle]) -> Tuple[List[Candle], List[Candle]]:
    """
    Returns two lists: the first containing local minima, and the second local maxima.
    """
    # Sanitize input.
    assert len(trendline_candles) > 9, 'Cannot find mins/maxs without at least 10 candles'
    # Get sliding window length.
    trend_length = ContinuousTimeInterval(start_time=trendline_candles[0].moment.time(),
                                          end_time=trendline_candles[-1].moment.time()).length()
    window_length = max(5, int(trend_length * 0.12))
    # Ensure sliding window length is an odd number of seconds.
    window_length = window_length if window_length % 2 == 0 else window_length + 1

    # Get slide interval.
    slide_interval = max(1, window_length * 0.02)

    # Slide the window along the trendline period.
    mins, maxs = [], []
    window = ContinuousTimeInterval(trendline_candles[0].moment.time(),
                                    (trendline_candles[0].moment + timedelta(seconds=window_length)).time())
    while datetime.combine(trendline_candles[0].moment.date(), window.end_time) <= trendline_candles[-1].moment:
        # Get candles in the window.
        window_candles = SymbolDay.get_ordered_candles(candles=trendline_candles,
                                                       interval=TimeInterval(None, window.start_time, window.end_time))
        # Get midpoint candle.
        midpoint_candle = midpoint_candle_in_period(period=window,
                                                    candles=trendline_candles,
                                                    day_date=trendline_candles[0].moment.date())
        # Get candles before and after the midpoint.
        first_half_candles = [candle for candle in window_candles if candle.moment < midpoint_candle.moment]
        second_half_candles = [candle for candle in window_candles if candle.moment > midpoint_candle.moment]

        # Ensure there are candles before/after the midpoint.
        if midpoint_candle is None or len(window_candles) == 0 or len(first_half_candles) == 0 \
                or len(second_half_candles) == 0:
            # Slide the window forward if not enough candles.
            window_start = datetime.combine(datetime.today(), window.start_time) + timedelta(seconds=slide_interval)
            window_end = datetime.combine(datetime.today(), window.end_time) + timedelta(seconds=slide_interval)
            window = ContinuousTimeInterval(window_start.time(), window_end.time())
            continue

        # Find out what percentage of prices before/after midpoint are less than the midpoint price.
        pct_prices_below = (len([candle for candle in first_half_candles if candle.low < midpoint_candle.low])
                            + len([candle for candle in second_half_candles if candle.low < midpoint_candle.low])) \
                           / len(window_candles)
        # Find out what percentage of prices before/after midpoint are greater than the midpoint price.
        pct_prices_above = (len([candle for candle in first_half_candles if candle.high > midpoint_candle.high])
                            + len([candle for candle in second_half_candles if candle.high > midpoint_candle.high])) \
                           / len(window_candles)

        # Record a local minimum if 97% of the window's prices are higher than the midpoint price.
        if pct_prices_above >= 0.97:
            mins.append(midpoint_candle)

        # Record a local maximum if 97% of the window's prices are lower than the midpoint price.
        if pct_prices_below >= 0.97:
            maxs.append(midpoint_candle)

        # Slide the window forward.
        window_start = datetime.combine(datetime.today(), window.start_time) + timedelta(seconds=slide_interval)
        window_end = datetime.combine(datetime.today(), window.end_time) + timedelta(seconds=slide_interval)
        window = ContinuousTimeInterval(window_start.time(), window_end.time())

    # Get candles at the beginning and end of the trendline period.
    start_candles = SymbolDay.get_ordered_candles(
        candles=trendline_candles,
        interval=TimeInterval(None, trendline_candles[0].moment.time(),
                              (trendline_candles[0].moment + timedelta(seconds=window_length)).time()))
    end_candles = SymbolDay.get_ordered_candles(
        candles=trendline_candles,
        interval=TimeInterval(None, (trendline_candles[-1].moment - timedelta(seconds=window_length)).time(),
                              trendline_candles[-1].moment.time()))

    # Check for a global minimum in prices at the start and end of the trendline period.
    start_min = sorted(start_candles, key=lambda candle: candle.low)[0]
    end_min = sorted(end_candles, key=lambda candle: candle.low)[0]
    if len(mins) < 2 or start_min.low < min([local_min_candle.low for local_min_candle in mins]):
        mins.insert(0, start_min)
    if len(mins) < 2 or end_min.low < min([local_min_candle.low for local_min_candle in mins]):
        mins.append(end_min)

    # Check for a global maximum in prices at the start and end of the trendline period.
    start_max = sorted(start_candles, key=lambda candle: candle.high)[-1]
    end_max = sorted(end_candles, key=lambda candle: candle.high)[-1]
    if len(maxs) < 2 or start_max.high > max([local_max_candle.high for local_max_candle in maxs]):
        maxs.insert(0, start_max)
    if len(maxs) < 2 or end_max.high > max([local_max_candle.high for local_max_candle in maxs]):
        maxs.append(end_max)

    # Ensure minima are spread apart by at least 3% of the trendline's period.
    reqd_dist = max(3, trend_length * 0.03)
    i = 0
    while i < len(mins) - 1 and len(mins) >= 3:
        if (mins[i + 1].moment - mins[i].moment).total_seconds() < reqd_dist:
            # Remove the higher of the two local minima
            mins.pop(i if mins[i].low > mins[i + 1].low else i + 1)
        else:
            i += 1

    # Ensure maxima are spread apart by at least 3% of the trendline's period.
    i = 0
    while i < len(maxs) - 1 and len(maxs) >= 3:
        if (maxs[i + 1].moment - maxs[i].moment).total_seconds() < reqd_dist:
            # Remove the lower of the two local maxima.
            maxs.pop(i if maxs[i].high < maxs[i + 1].high else i + 1)
        else:
            i += 1

    return mins, maxs
Ejemplo n.º 15
0
    def train_models(self,
                     symbol: str,
                     day_date: date,
                     day_data: Optional[SymbolDay],
                     stable: bool,
                     possibly_already_trained: bool = False) -> None:
        """
        Trains analysis models using the data provided, or loads the data from mongo/redis.

        :param possibly_already_trained: if True, don't log when we skip over a model
        """

        data_provided = True if day_data is not None else False

        # Train each model
        for model in self.models:

            # Only train models that can be trained
            if isinstance(model, AbstractSpotModel) or isinstance(
                    model, AbstractNeuralModel):
                continue

            # Get first time model was trained
            first_training_date = self.redis().get_analysis_start_date(
                symbol, model.model_type,
                self.time().now().date())

            # Get last time model was trained
            last_training_date = self.redis().get_analysis_date(
                symbol, model.model_type)

            # Restart training from this day if the model is missing a snapshot
            if self.redis().get_analysis_snapshot_raw_output(
                    symbol, model.model_type) is None:
                model.restart_training(symbol)
                self.redis().save_analysis_start_date(symbol, model.model_type,
                                                      day_date)
                self.info_process(
                    f'{self.env_type.value} restarted {model.model_type} training for {symbol} from '
                    f'{day_date.strftime(DATE_FORMAT)} (model lacks a snapshot)'
                )

            # Restart training from this day if the model is no longer continuous
            elif last_training_date < self.time().get_prev_mkt_day(day_date):
                model.restart_training(symbol)
                self.redis().save_analysis_start_date(symbol, model.model_type,
                                                      day_date)
                self.info_process(
                    f'{self.env_type.value} restarted {model.model_type} training for {symbol} from '
                    f'{day_date.strftime(DATE_FORMAT)} (model training missed a day before this date)'
                )

            # Restart training from this day if the model began training after this day
            elif day_date < first_training_date:
                model.restart_training(symbol)
                self.redis().save_analysis_start_date(symbol, model.model_type,
                                                      day_date)
                self.info_process(
                    f'{self.env_type.value} restarted {model.model_type} training for {symbol} from '
                    f'{day_date.strftime(DATE_FORMAT)} (date precedes model\'s current start date)'
                )

            # Don't train the model if it has already been trained on this day's data
            elif day_date <= last_training_date:
                if not possibly_already_trained:
                    self.warn_process(
                        f'{self.env_type.value} tried to train {symbol}\'s {model.model_type} more than '
                        f'once on {day_date.strftime(DATE_FORMAT)}')
                continue

            # Revert to last stable snapshot if about to be fed new stable data
            elif stable:
                snapshot_date = model.revert_to_snapshot(symbol)
                # Restart training from this day if the snapshot is too old
                if day_date != self.time().get_next_mkt_day(snapshot_date):
                    model.restart_training(symbol)
                    self.redis().save_analysis_start_date(
                        symbol, model.model_type, day_date)
                    self.info_process(
                        f'{self.env_type.value} restarted {model.model_type} training for {symbol} from '
                        f'{day_date.strftime(DATE_FORMAT)} (snapshot was too old)'
                    )

            # Get the data
            if not data_provided:
                day_data = self.mongo().load_symbol_day(symbol, day_date)
                if not SymbolDay.validate_candles(day_data.candles):
                    self.warn_process(
                        f'{self.env_type.value} tried to train {symbol}\'s {model.model_type.value} '
                        f'model on bad data!')
                    continue

            # Train the model
            try:
                model.feed_model(day_data)
            except Exception as e:
                self.error_process(
                    f'Error training {self.env_type.value} {model.model_type.value}:'
                )
                self.warn_process(traceback.format_exc())

            # Get model's new output
            model_output = model.get_stored_output(symbol)

            # Take a snapshot after being fed stable data
            if stable:
                model.take_snapshot(symbol, model_output, day_date)
Ejemplo n.º 16
0
    def calculate_output(self, symbol: str) -> OUTPUT_TYPE:
        """
        This tells us is whether the price has been trending downward (accounting for volume) today, not long-term.
        """

        # Fetch latest 45 mins of data
        candles = self.get_latest_candles(symbol, 45)

        # TODO REMOVE LATER: Debug candle validation
        if len(candles) > MIN_CANDLES_PER_MIN * 45:
            debug_lines = ['\n']
            debug_lines.append('candle validation debug output:')
            SymbolDay.validate_candles(candles,
                                       min_minutes=45,
                                       debug_output=debug_lines)
            debug_lines.append('\n')

        # Validate data
        if not SymbolDay.validate_candles(candles, min_minutes=45):
            self.error_process('MomentumModel candles ({}): {}'.format(
                len(candles), [candle.open for candle in candles][0:3]))
            raise ValueError('Momentum calculation given invalid data')

        # Find "typical" volume change for the period, using median and std dev
        volume_changes = []
        for i in range(1, len(candles)):
            volume_changes.append(candles[i].volume - candles[i - 1].volume)
        med_vol_chg = median(volume_changes)
        vol_chg_stdev = stdev(volume_changes)

        # Collect normalized volume and price changes to see what direction the stock is going
        # This is a slightly fancier way of multiplying trade price by number of trades
        weighted_price_changes = []
        last_candle = candles[0]
        for candle in candles[1:-1]:
            vol_change = candle.volume - last_candle.volume
            price_change = (candle.low - last_candle.low) / last_candle.low
            """
            < 2: least volume within the 45-min period
            < 0: less volume than usual for the 45-min period
            > 0: more volume than usual for the 45-min period
            > 2: most volume within the 45-min period
            """
            std_devs = (vol_change - med_vol_chg) / vol_chg_stdev
            # Always positive, high value means more volume than usual
            weight = max(0, std_devs + 1.8)
            # Sign indicates direction of the price movement, value indicates significance
            weighted_change = 100 * price_change * 100 * weight
            weighted_price_changes.append(weighted_change)

            last_candle = candle

        # Calculate a sum that favors recent data
        sum_pct_price_changes = 0
        x = 0
        incr = 3.0 / len(weighted_price_changes)
        for change in weighted_price_changes[-1:0:-1]:
            x += incr
            weight = exp(-x)
            sum_pct_price_changes += change * weight
            # The longer movement stays negative, the more we should expect it to change and vice versa
            if abs(sum_pct_price_changes) > 3:
                sum_pct_price_changes += -2 if sum_pct_price_changes > 0 else 0.2
        sum_pct_price_changes = sum_pct_price_changes / 2

        # The symbol is risky if the momentum is in a downward direction
        return sum_pct_price_changes
Ejemplo n.º 17
0
    def run(self) -> None:
        # Clone live environment, connecting this thread to real data.
        live_env = ExecEnv(self.program.logfeed_optimization,
                           self.program.logfeed_optimization,
                           self.program.live_env)
        live_env.fork_new_thread()

        # Experiment settings.
        MAX_TRIALS_PER_DAY = 250  # max number of periods to evaluate per historical day
        EVAL_PERIOD_LEN = 3 * 60  # number of seconds over which to track profits
        EVAL_FLOOR_PERIOD_LEN = 7 * 60  # number of seconds over which to track killswitch floor

        # Load dates on which we have all the needed data.
        experiment_start_date = date(2018, 6, 1)
        spy_dates = live_env.mongo().get_dates_on_file(
            symbol='SPY',
            start_date=experiment_start_date,
            end_date=live_env.time().now().date())
        spxl_dates = live_env.mongo().get_dates_on_file(
            symbol='SPXL',
            start_date=experiment_start_date,
            end_date=live_env.time().now().date())
        spxl_dates = [
            day_date for day_date in spxl_dates if day_date in spy_dates
        ]  # narrow spxl to spy dates
        spy_dates = [
            day_date for day_date in spy_dates if day_date in spxl_dates
        ]  # narrow spy to spxl dates
        spxs_dates = live_env.mongo().get_dates_on_file(
            symbol='SPXS',
            start_date=experiment_start_date,
            end_date=live_env.time().now().date())
        spxs_dates = [
            day_date for day_date in spxs_dates if day_date in spy_dates
        ]  # narrow spxs to spy=sxpl dates
        spy_dates = [
            day_date for day_date in spy_dates if day_date in spxs_dates
        ]  # narrow spy to spxs<=sxpl dates
        spxl_dates = [
            day_date for day_date in spxl_dates if day_date in spy_dates
        ]  # narrow spxl to spy<=sxpl dates
        assert len(spy_dates) == len(spxl_dates) == len(spxs_dates)

        # Init statistics on the experiment.
        spxl_blr_setup_vals = []
        spxs_blr_setup_vals = []
        spxl_blr_10_vals = []
        spxs_blr_10_vals = []
        spxl_blr_25_vals = []
        spxs_blr_25_vals = []
        spxl_profits = []
        spxl_floors = []
        spxs_profits = []
        spxs_floors = []
        oscillation_model = OscillationModel(live_env,
                                             AnalysisModelType.OSCILLATION)
        trend_model = LSFavorModel(live_env, AnalysisModelType.LS_FAVOR)

        # Simulate the days on which SPY, SPXL, and SPXS jointly have data.
        live_env.info_process(
            f'Beginning BLR simulations over {len(spxs_dates)} dates')
        for day_date in spxs_dates:
            # Load data for experiment.
            live_env.info_process(
                f'Running trials on {day_date:%m-%d-%Y} (successful trials: {len(spxl_profits)})'
            )
            spy_data = live_env.mongo().load_symbol_day(symbol='SPY',
                                                        day=day_date)
            spxl_data = live_env.mongo().load_symbol_day(symbol='SPXL',
                                                         day=day_date)
            spxs_data = live_env.mongo().load_symbol_day(symbol='SPXS',
                                                         day=day_date)

            # Validate data.
            data_is_valid = True
            for day_data in [spy_data, spxl_data, spxs_data]:
                if not SymbolDay.validate_candles(day_data.candles):
                    data_is_valid = False
                    break
            if not data_is_valid:
                live_env.info_process(f'Invalid data on {day_date:%m-%d-%Y}')
                continue

            # Init time windows variables.
            start_moment = datetime.combine(
                day_date, OPEN_TIME) + timedelta(seconds=int(30 * 60))
            end_moment = datetime.combine(day_date, CLOSE_TIME) - timedelta(
                seconds=int(EVAL_PERIOD_LEN + 15 * 60))

            # Go thru time windows on each day.
            day_trials = 0
            while start_moment < end_moment and day_trials < MAX_TRIALS_PER_DAY:

                try:
                    # Move to the next time window.
                    start_moment += timedelta(seconds=random.randint(30, 120))
                    blr_setup_period = ContinuousTimeInterval(
                        (start_moment - timedelta(seconds=3 * 60)).time(),
                        start_moment.time())
                    blr_10_period = ContinuousTimeInterval(
                        (start_moment - timedelta(seconds=10 * 60)).time(),
                        start_moment.time())
                    blr_25_period = ContinuousTimeInterval(
                        (start_moment - timedelta(seconds=25 * 60)).time(),
                        start_moment.time())
                    eval_period = ContinuousTimeInterval(
                        start_moment.time(),
                        (start_moment +
                         timedelta(seconds=EVAL_PERIOD_LEN)).time())
                    eval_floor_period = ContinuousTimeInterval(
                        start_moment.time(),
                        (start_moment +
                         timedelta(seconds=EVAL_FLOOR_PERIOD_LEN)).time())

                    # Ignore non-oscillatory periods.
                    oscillation_val = oscillation_model.get_oscillation_val(
                        candles_in_period(blr_setup_period, spy_data.candles,
                                          spy_data.day_date))
                    if oscillation_val < 0.6:
                        continue

                    # Calculate BLR trendline indicators.
                    spxl_blr_setup_val = trend_model.get_blr_strength(
                        BoundedLinearRegressions(
                            candles_in_period(blr_setup_period,
                                              spxl_data.candles,
                                              spxl_data.day_date)))
                    spxs_blr_setup_val = trend_model.get_blr_strength(
                        BoundedLinearRegressions(
                            candles_in_period(blr_setup_period,
                                              spxs_data.candles,
                                              spxs_data.day_date)))
                    spxl_blr_10_val = trend_model.get_blr_strength(
                        BoundedLinearRegressions(
                            candles_in_period(blr_10_period, spxl_data.candles,
                                              spxl_data.day_date)))
                    spxs_blr_10_val = trend_model.get_blr_strength(
                        BoundedLinearRegressions(
                            candles_in_period(blr_10_period, spxs_data.candles,
                                              spxs_data.day_date)))
                    spxl_blr_25_val = trend_model.get_blr_strength(
                        BoundedLinearRegressions(
                            candles_in_period(blr_25_period, spxl_data.candles,
                                              spxl_data.day_date)))
                    spxs_blr_25_val = trend_model.get_blr_strength(
                        BoundedLinearRegressions(
                            candles_in_period(blr_25_period, spxs_data.candles,
                                              spxs_data.day_date)))

                    # Calculate maximum profits during evaluation period.
                    spxl_buy_price = candles_in_period(
                        blr_setup_period, spxl_data.candles,
                        spxl_data.day_date)[-1].close
                    spxs_buy_price = candles_in_period(
                        blr_setup_period, spxs_data.candles,
                        spxs_data.day_date)[-1].close
                    spxl_eval_candles = candles_in_period(
                        eval_period, spxl_data.candles, spxl_data.day_date)
                    spxs_eval_candles = candles_in_period(
                        eval_period, spxs_data.candles, spxs_data.day_date)
                    spxl_eval_floor_candles = candles_in_period(
                        eval_floor_period, spxl_data.candles,
                        spxl_data.day_date)
                    spxs_eval_floor_candles = candles_in_period(
                        eval_floor_period, spxs_data.candles,
                        spxs_data.day_date)
                    spxl_profit_pct = (max([
                        candle.high * 0.3 + candle.open * 0.7
                        for candle in spxl_eval_candles
                    ]) - spxl_buy_price) / spxl_buy_price
                    spxs_profit_pct = (max([
                        candle.high * 0.3 + candle.open * 0.7
                        for candle in spxs_eval_candles
                    ]) - spxs_buy_price) / spxs_buy_price
                    spxl_floor_pct = (spxl_buy_price - min([
                        candle.low * 0.3 + candle.open * 0.7
                        for candle in spxl_eval_floor_candles
                    ])) / spxl_buy_price
                    spxs_floor_pct = (spxs_buy_price - min([
                        candle.low * 0.3 + candle.open * 0.7
                        for candle in spxs_eval_floor_candles
                    ])) / spxs_buy_price

                    # Record trial stats.
                    spxl_blr_setup_vals.append(spxl_blr_setup_val)
                    spxs_blr_setup_vals.append(spxs_blr_setup_val)
                    spxl_blr_10_vals.append(spxl_blr_10_val)
                    spxs_blr_10_vals.append(spxs_blr_10_val)
                    spxl_blr_25_vals.append(spxl_blr_25_val)
                    spxs_blr_25_vals.append(spxs_blr_25_val)
                    spxl_profits.append(spxl_profit_pct)
                    spxl_floors.append(spxl_floor_pct)
                    spxs_profits.append(spxs_profit_pct)
                    spxs_floors.append(spxs_floor_pct)
                    day_trials += 1

                    # Print experiment stats every 100 trials.
                    if len(spxl_blr_setup_vals
                           ) > 0 and len(spxl_blr_setup_vals) % 100 != 0:
                        continue

                    live_env.info_process('\n\n')

                    def print_immediate_profit(val_lists, profits_list,
                                               threshold, symbol, trend_name):
                        # Get indices corresponding to vals that are above all thresholds.
                        indices = [i for i in range(len(val_lists[0]))]
                        for j in range(len(val_lists)):
                            indices = [
                                i for i in indices
                                if val_lists[j][i] >= threshold
                            ]

                        if len(indices) > 3:
                            profits = [profits_list[i] for i in indices]
                            profit_mean, profit_med, profit_stdev = (
                                mean(profits), median(profits), stdev(profits))
                            immediate_profit = profit_med
                            live_env.info_process(
                                f'Immediate {symbol} profit (< 3 mins) when {trend_name} strength >= '
                                f'{100 * threshold}%: '
                                f'{100 * immediate_profit:.2f}% (n={len(profits)})'
                            )

                    def print_profit_ratio(val_lists, spxl_profits_list,
                                           spxs_profits_list, threshold,
                                           trend_name):
                        # Get indices corresponding to vals that are above all thresholds.
                        indices = [i for i in range(len(val_lists[0]))]
                        for j in range(len(val_lists)):
                            indices = [
                                i for i in indices
                                if val_lists[j][i] >= threshold
                            ]

                        if len(indices) > 3:
                            profit_ratios = [
                                spxl_profits_list[i] /
                                max(0.0002, spxs_profits_list[i])
                                for i in indices
                            ]
                            ratios_mean, ratios_med, ratios_stdev = (
                                mean(profit_ratios), median(profit_ratios),
                                stdev(profit_ratios))
                            live_env.info_process(
                                f'Immediate profit ratio (SPXL:SPXS) when {trend_name} strength >= '
                                f'{100 * threshold}%: '
                                f'{ratios_med:.2f}:1 (n={len(profit_ratios)})')

                    # TODO NEXT: Implement a -1.65% killswitch in the strategy.

                    # TODO NEXT: What pct of oscillation range is expected profit?

                    def print_killswitch_floor(val_lists, floors_list,
                                               threshold, symbol, trend_name):
                        # Get indices corresponding to vals that are above all thresholds.
                        indices = [i for i in range(len(val_lists[0]))]
                        for j in range(len(val_lists)):
                            indices = [
                                i for i in indices
                                if val_lists[j][i] >= threshold
                            ]

                        if len(indices) > 3:
                            floors = [-floors_list[i] for i in indices]
                            floor_mean, floor_med, floor_stdev = (
                                mean(floors), median(floors), stdev(floors))
                            killswitch_floor = floor_med - 1.5 * floor_stdev
                            live_env.info_process(
                                f'{symbol} killswitch activation (-1.5 stdev floor) when {trend_name} strength >= '
                                f'{100 * threshold}%: '
                                f'{100 * killswitch_floor:.2f}% (n={len(floors)})'
                            )

                    """
                    # Print immediate profits when BLR strength >= 70%.
                    print_immediate_profit([spxl_blr_6_vals], spxl_profits, 0.7, 'SPXL', 'BLR-6')
                    print_immediate_profit([spxs_blr_6_vals], spxs_profits, 0.7, 'SPXS', 'BLR-6')
                    print_immediate_profit([spxl_blr_10_vals], spxl_profits, 0.7, 'SPXL', 'BLR-10')
                    print_immediate_profit([spxs_blr_10_vals], spxs_profits, 0.7, 'SPXS', 'BLR-10')
                    print_immediate_profit([spxl_blr_25_vals], spxl_profits, 0.7, 'SPXL', 'BLR-25')
                    print_immediate_profit([spxs_blr_25_vals], spxs_profits, 0.7, 'SPXS', 'BLR-25')

                    # Print immediate profits when BLR strength >= 85%.
                    print_immediate_profit([spxl_blr_6_vals], spxl_profits, 0.85, 'SPXL', 'BLR-6')
                    print_immediate_profit([spxs_blr_6_vals], spxs_profits, 0.85, 'SPXS', 'BLR-6')
                    print_immediate_profit([spxl_blr_10_vals], spxl_profits, 0.85, 'SPXL', 'BLR-10')
                    print_immediate_profit([spxs_blr_10_vals], spxs_profits, 0.85, 'SPXS', 'BLR-10')
                    print_immediate_profit([spxl_blr_25_vals], spxl_profits, 0.85, 'SPXL', 'BLR-25')
                    print_immediate_profit([spxs_blr_25_vals], spxs_profits, 0.85, 'SPXS', 'BLR-25')

                    # Print immediate profits when BLR strength >= 95%.
                    print_immediate_profit([spxl_blr_6_vals], spxl_profits, 0.95, 'SPXL', 'BLR-6')
                    print_immediate_profit([spxs_blr_6_vals], spxs_profits, 0.95, 'SPXS', 'BLR-6')
                    print_immediate_profit([spxl_blr_10_vals], spxl_profits, 0.95, 'SPXL', 'BLR-10')
                    print_immediate_profit([spxs_blr_10_vals], spxs_profits, 0.95, 'SPXS', 'BLR-10')
                    print_immediate_profit([spxl_blr_25_vals], spxl_profits, 0.95, 'SPXL', 'BLR-25')
                    print_immediate_profit([spxs_blr_25_vals], spxs_profits, 0.95, 'SPXS', 'BLR-25')

                    # Print SPXL immediate profit when second 2 BLR strengths >= 90%.
                    print_immediate_profit([spxl_blr_10_vals, spxl_blr_25_vals], spxl_profits,
                                           0.9, 'SPXL', 'BLR-10-25')

                    # Print SPXL immediate profit when all BLR strengths >= 30%.
                    print_immediate_profit([spxl_blr_6_vals, spxl_blr_10_vals, spxl_blr_25_vals], spxl_profits,
                                           0.3, 'SPXL', 'BLR-6-10-25')
                    """

                    # Print SPXL:SPXS profit ratio when BLR strength >= 60%.
                    print_profit_ratio([spxl_blr_setup_vals], spxl_profits,
                                       spxs_profits, 0.6, 'BLR-3')
                    print_profit_ratio([spxl_blr_10_vals], spxl_profits,
                                       spxs_profits, 0.6, 'BLR-10')
                    print_profit_ratio([spxl_blr_25_vals], spxl_profits,
                                       spxs_profits, 0.6, 'BLR-25')

                    # Print SPXL:SPXS profit ratio when BLR strength >= 85%.
                    print_profit_ratio([spxl_blr_setup_vals], spxl_profits,
                                       spxs_profits, 0.85, 'BLR-3')
                    print_profit_ratio([spxl_blr_10_vals], spxl_profits,
                                       spxs_profits, 0.85, 'BLR-10')
                    print_profit_ratio([spxl_blr_25_vals], spxl_profits,
                                       spxs_profits, 0.85, 'BLR-25')

                    # Print SPXL:SPXS profit ratio when BLR strength >= 95%.
                    print_profit_ratio([spxl_blr_setup_vals], spxl_profits,
                                       spxs_profits, 0.95, 'BLR-3')
                    print_profit_ratio([spxl_blr_10_vals], spxl_profits,
                                       spxs_profits, 0.95, 'BLR-10')
                    print_profit_ratio([spxl_blr_25_vals], spxl_profits,
                                       spxs_profits, 0.95, 'BLR-25')

                    # Print SPXL:SPXS profit ratio when long BLR strengths >= 60%.
                    print_profit_ratio([spxl_blr_10_vals, spxl_blr_25_vals],
                                       spxl_profits, spxs_profits, 0.6,
                                       'BLR-10-25')

                    # Print expected min profit when osc_val >= 60%.
                    print_immediate_profit([spxl_blr_setup_vals], [
                        min(spxl_profits[i], spxs_profits[i])
                        for i in range(len(spxl_profits))
                    ], 0, '', 'oscillating... N/A')

                    # Print killswitch floor when osc_val >= 60%.
                    print_killswitch_floor([spxl_blr_setup_vals], [
                        max(spxl_floors[i], spxs_floors[i])
                        for i in range(len(spxl_floors))
                    ], 0, '', 'oscillating... N/A')

                except Exception as e:
                    # live_env.warn_process(f'BLR Experiment error: {traceback.format_exc()}')
                    continue
Ejemplo n.º 18
0
    def run(self) -> None:
        # Set data parameters.
        start_date = date(year=2002, month=1, day=1)
        end_date = self.program.live_env.time().now().today() - timedelta(
            days=1)

        # Clone live environment so it can run on this thread.
        live_env = ExecEnv(self.program.logfeed_program,
                           self.program.logfeed_program, self.program.live_env)
        live_env.fork_new_thread()
        data_collector = PolygonDataCollector(self.program.logfeed_program,
                                              self.program.logfeed_program,
                                              live_env.time())

        # Clear the data file.
        filename = 'debug_data/spy_ai_data.txt'
        try:
            if not os.path.exists('debug_data'):
                os.mkdir('debug_data')
            with open(filename, 'w+') as file:
                file.write('')
            os.remove(filename)
        except Exception as e:
            print(f'Error deleting file: "{filename}"')
            pass

        # Go through the data we have on file.
        day_date = start_date - timedelta(days=1)
        while day_date < end_date:

            # Get the next market day.
            day_date = self.program.live_env.time().get_next_mkt_day(day_date)

            # Load price data.
            print(f'Fetching SPY data for {day_date:%m-%d-%Y}')
            day_data = live_env.mongo().load_symbol_day('SPY', day_date)

            # Get fresh data from polygon.io, if necessary.
            if not SymbolDay.validate_candles(day_data.candles):
                try:
                    day_data = data_collector.collect_candles_for_day(
                        day_date, 'SPY')
                except Exception as e:
                    live_env.error_process(
                        'Error collecting polygon-rest data:')
                    live_env.warn_process(traceback.format_exc())

            # Validate the data.
            if day_data is None or not SymbolDay.validate_candles(
                    day_data.candles):
                print(
                    F'COULD NOT COMPILE PRICE DATA FOR SPY ON {day_date:%m-%d-%Y}'
                )
                continue

            # Convert candles into sentences.

            #

            # Convert the data into json.
            data_dict = day_data.to_json()

            # Append the data to the txt file.
            with open(f'debug_data/spy_ai_data.txt', 'a+') as f:
                f.write(json.dumps(data_dict))

        print(f'Dumped data to TC2_data/{filename}')
Ejemplo n.º 19
0
    def run(self) -> HealthCheckResult:
        try:
            # Fetch candles from the previous market day
            symbol = 'SPY'
            day_date = self.time().get_prev_mkt_day()
            # noinspection PyTypeChecker
            polygon_collector = DebuggedPolygonDataCollector(
                None, None, self.time())
            self.debug(
                f'Collecting {symbol} data from polygon for {day_date:%m-%d-%Y}'
            )
            day_data = polygon_collector.collect_candles_for_day(day=day_date,
                                                                 symbol=symbol)
            self.debug_messages.extend(polygon_collector.debug_msgs)

            # Print timings
            self.set_passing(True)
            time_length = '--' if len(polygon_collector.timings_total) == 0 \
                else f'{mean(polygon_collector.timings_total) / 60.0:.2}m'
            if mean(polygon_collector.timings_total) > 60 * 7:
                self.debug(
                    'POLYGON CHECK FAILED: It takes more than 7 minutes to fetch a day of data'
                )
            self.debug(f'Avg total task time: {time_length}')
            time_length = '--' if len(polygon_collector.timings_basket) == 0 \
                else f'{mean(polygon_collector.timings_basket):.2f}s'
            self.debug(f'Avg total basket handling time: {time_length}')
            time_length = '--' if len(polygon_collector.timings_fetch) == 0 \
                else f'{mean(polygon_collector.timings_fetch):.2f}s'
            self.debug(f'Avg batch fetch time: {time_length}')
            time_length = '--' if len(polygon_collector.timings_parse) == 0 \
                else f'{mean(polygon_collector.timings_parse):.2f}s'
            self.debug(f'Avg basket parse time: {time_length}')
            time_length = '--' if len(polygon_collector.timings_avg_block1) == 0 \
                else f'{mean(polygon_collector.timings_avg_block1) * 1000:.2}ms'
            self.debug(f'Avg block1 logic time: {time_length}')
            time_length = '--' if len(polygon_collector.timings_avg_block2) == 0 \
                else f'{mean(polygon_collector.timings_avg_block2) * 1000:.2}ms'
            self.debug(f'Avg block2 logic time: {time_length}')
            time_length = '--' if len(polygon_collector.timings_avg_block3) == 0 \
                else f'{mean(polygon_collector.timings_avg_block3) * 1000:.2}ms'
            self.debug(f'Avg block3 logic time: {time_length}')

            # Check with production version of PolygonDataCollector
            self.debug(
                f'Now collecting the same data using the production data collector...'
            )
            polygon_collector = PolygonDataCollector(
                logfeed_program=self.logfeed_process,
                logfeed_process=self.logfeed_process,
                time_env=self.time())
            day_data = polygon_collector.collect_candles_for_day(day=day_date,
                                                                 symbol=symbol)

            # Pass the model_type if collected data is valid
            if day_data is None or not SymbolDay.validate_candles(
                    day_data.candles, debug_output=self.debug_messages):
                self.debug('Polygon query returned invalid candles')
                self.set_passing(False)
            else:
                self.debug('Production data collector returned valid data')
        except Exception as e:
            self.debug('Error fetching polygon data: {}'.format(e.args))
            self.set_passing(False)

        return self.make_result()