Beispiel #1
0
def MACD(symbol: str, start: datetime.datetime, end: datetime.datetime, local=False, dir="") -> Optional[DataFrame]:
    """
    Computes MACD and Signal for the given symbol over the given range. The returned DataFrame has index "Date" and
    two columns: "MACD" and "Signal".

    If <local>=True, assumes that <dir> is a string containing a path to a directory containing stock data.
    Files in this directory should be .csv's and have filename equal to the stock symbol whose data they are holding.
    This method assumes that the file <symbol>.csv exists in <dir>, if <local>=True.

    RAISES NotEnoughDataError if there aren't enough historical data points to calculate MACD and Signal for every
    trading day between <start> and <end>.

    Returns None in the event of an error fetching data.
    """
    # We need to fetch enough data so that we can compute MACD for the range <start>-<end>. That means, including
    # weekends and holidays, we need at least MACD_LONG_AVERAGE + 3 * MACD_LONG_AVERAGE / 7 data points to be able
    # to calculate MACD for the first day of our range. But, we also need to fetch enough data points to then take an
    # EMA of the MACD, for the signal line, which is an MACD_SIGNAL_PERIOD-day EMA. So that's why we have the gross
    # timedelta here.
    price_data = get_data(symbol, start - datetime.timedelta(
        MACD_LONG_AVERAGE + 3 * MACD_LONG_AVERAGE / 7 + (MACD_SIGNAL_PERIOD + 1) + 3 * (MACD_SIGNAL_PERIOD) / 7), end,
                          local=local, dir=dir)

    if price_data is None:
        return None

    # Count how many data points we have before the first trading day in our range. We need to know if we have enough
    # to calculate what we need to calculate
    num_preceding = 0
    while num_preceding < len(price_data.index) and price_data.index[num_preceding] < start:
        num_preceding += 1

    if num_preceding < MACD_LONG_AVERAGE + MACD_SIGNAL_PERIOD:
        raise NotEnoughDataError(
            f"Not enough data points to calculate MACD and Signal for symbol {symbol} and range {start}-{end}")

    df = DataFrame()
    df["Price"] = price_data["Adj Close"]
    EMA(df, "Price", "Short", MACD_SHORT_AVERAGE)
    EMA(df, "Price", "Long", MACD_LONG_AVERAGE)
    df["MACD"] = df["Short"] - df["Long"]

    # So now we've got a DataFrame with columns "Price", "Short", "Long", and "MACD". Because "Short" and "Long" are
    # moving averages, calcualted using our EMA function, they each have some number of NaN values at the beginning.
    # And because "MACD" = "Short" - "Long", so does "MACD". That means that when we calculate an EMA of "MACD" below,
    # it'll get messed up unless we remove the NaN values. So we do that first.

    # Iterate until i points to the first row that contains an actual value for MACD
    i = 0
    while np.isnan(df["MACD"][df.index[i]]):
        i += 1

    # Chop off all the NaN values
    df = df[df.index[i]:]

    # Add the column "Signal" to the DataFrame as a MACD_SIGNAL_PERIOD EMA of the "MACD" column
    EMA(df, "MACD", "Signal", MACD_SIGNAL_PERIOD)

    df.drop(labels=["Short", "Long"], axis=1, inplace=True)
    return df[start:end]
Beispiel #2
0
def moving_average(symbol, start, end, n, local=False, dir="") -> Optional[pd.DataFrame]:
    """
    Return a DataFrame containing an <n>-day moving average for each trading day between <start> and <end>, inclusive.
    Note: the range given by start and end doesn't have to include enough data points for a full <n>-day moving average.
    The method will try to fetch enough data to calculate the average for each trading data in the given range, and then
    truncate the returned result.

    RAISES NotEnoughDataError if there isn't enough historical data available to calculate a full <n>-day moving average
    for at least the first trading day in the range <start>-<end>

    If <local>=True, assumes that <dir> is a string containing a path to a directory containing stock data.
    Files in this directory should be .csv's and have filename equal to the stock symbol whose data they are holding.
    This method assumes that the file <symbol>.csv exists in <dir>, if <local>=True. Will return None if this is not the
    case.

    The returned DataFrame has index "Date" and one column of data, "Average".

    Returns None in the event of an error fetching data.
    """
    try:
        # We get data from the given range to ensure that we are returned at least enough data points to calculate an
        # <n>-day moving average for the day <start>. We use n + 3*n/7 below because weekends and holidays take up
        # somewhat less than 3/7ths of all days. Note that if there is not enough data to give us this full window,
        # get_data simply returns whatever it can
        df = get_data(symbol, start - datetime.timedelta(n + math.ceil((3 * n) / 7)), end, local=local, dir=dir)
        if df is None:
            return None

        # Count how many data points we have up to but not including <start> (or whatever the first trading day after
        # <start> is if <start> happens to be a weekend or holiday)
        num_preceding = 0
        while num_preceding < len(df.index) and df.index[num_preceding] < start:
            num_preceding += 1

        # Raise an error if there are fewer than <n> data points, because then we won't have enough data points to
        # calculate an n-day moving average on the first day of our range
        if num_preceding < n:
            raise NotEnoughDataError(
                f"Not enough data to calculate {n}-day moving average for {symbol} with range {start}-{end}")

        average = DataFrame()
        # Since we know we have enough data points, we can set window=n without worrying about the rows that will get
        # value NaN. This is because we know none of them are in the range we were given, and when we return below we
        # slice the DataFrame to only include this range.
        average["Average"] = df["Adj Close"].rolling(window=n).mean()

        return average[start:end]
    except KeyError:
        return None
Beispiel #3
0
def MACD_signal(symbol: str, listener: Listener, start: datetime.datetime, end: datetime.datetime, local=False,
                dir="") -> \
        List[datetime.datetime]:
    """
    Look for the MACD of the given stock to cross above its signal line WHILE price is also above the 200-day EMA.
    Return a list of all days that are the endpoints of such crosses. That is, the list of all days between <start> and
    <end> where MACD was above its signal line having been below it the day before.

    If <local>=True, assumes that <dir> is a string containing a path to a directory containing stock data.
    Files in this directory should be .csv's and have filename equal to the stock symbol whose data they are holding.
    This method assumes that the file <symbol>.csv exists in <dir>, if <local>=True.

    Prints a message to the console using <listener> and returns an empty list if not enough data could be found to
    compute the necessary values.
    """
    try:
        macd = MACD(symbol, start, end, local=local, dir=dir)
    except NotEnoughDataError as e:
        msg = Message()
        msg.add_line(str(e))
        listener.send(msg)

        return []

    # Get a 200-day EMA of price
    data = get_data(symbol, start, end, local=local, dir=dir)
    if data is None:
        return []

    try:
        EMA(data, "Adj Close", "EMA", 200)
    except NotEnoughDataError as e:
        msg = Message()
        msg.add_line(str(e))
        listener.send(msg)

    signals = []
    for i in range(1, len(macd.index)):
        day_before = macd.index[i - 1]
        day = macd.index[i]
        if macd["MACD"][day] > macd["Signal"][day] and macd["MACD"][day_before] <= macd["Signal"][day_before] and \
                data["Adj Close"][day] > data["EMA"][day]:
            signals.append(day)

    return signals
Beispiel #4
0
def save(symbol: str, start: dt.datetime, end: dt.datetime, listener: Listener,
         dir: str) -> None:
    """
    Saves all price data associated with the given symbol as a .csv file in the directory given by <dir>
    The .csv file shares the same name as the symbol, and if a file already exists with that name it will be overwritten
    """
    df = get_data(symbol, start, end)

    if df is None:
        msg = Message()
        msg.add_line("*****************************")
        msg.add_line(f"KeyError fetching data for {symbol}")
        msg.add_line("*****************************")
        listener.send(msg)
        return

    f = open(dir + "/" + symbol + ".csv", "w")
    df.to_csv(f)
Beispiel #5
0
def EMA_from_symbol(symbol, start, end, n, local=False, dir="") -> Optional[DataFrame]:
    """
    Return a date-indexed DataFrame with one column: "EMA". "EMA" will contain an <n>-day exponential moving average of
    closing price, for the range of dates given by <start> and <end>.

    If <local>=True, assumes that <dir> is a string containing a path to a directory containing stock data.
    Files in this directory should be .csv's and have filename equal to the stock symbol whose data they are holding.
    This method assumes that the file <symbol>.csv exists in <dir>, if <local>=True.

    Returns None in the event of an error fetching data.

    RAISES NotEnoughDataError if there isn't enough data available for the given symbol to calculate an <n>-day EMA
    value for the first day in the given range
    """
    # We get data from the given range to ensure that we are returned at least enough data points to calculate an
    # <n>-day EMA for the day <start>. We use (n+1) + 3*(n+1)/7 below because weekends and holidays take up
    # somewhat less than 3/7ths of all days
    df = get_data(symbol, start - datetime.timedelta((n + 1) + math.ceil((3 * (n + 1)) / 7)), end, local=local, dir=dir)

    if df is None:
        return None

    # Count the number of data points BEFORE <start> that we were able to fetch. To be able to calculate a bona fide
    # EMA for each trading day between <start> and <end>, we need at least n of these, because we need to start with an
    # n-day simple moving average before we can start calculating EMA.
    num_preceding = 0
    while num_preceding < len(df.index) and df.index[num_preceding] < start:
        num_preceding += 1

    if num_preceding < n:
        raise NotEnoughDataError(f"Not enough data to calculate {n}-day EMA for {symbol}")

    average = DataFrame()
    average["Price"] = df["Adj Close"]
    EMA(average, "Price", "EMA", n)
    average.drop(labels="Price", axis=1, inplace=True)

    return average[start:end]
Beispiel #6
0
def rsi(symbol: str,
        start: datetime.datetime,
        end: datetime.datetime,
        period: int,
        local=False,
        dir="") -> Optional[DataFrame]:
    """
    Computes the RSI of period <period> for the given stock symbol for all trading days between <start> and <end>,
    inclusive. If <local>=True, assumes that <dir> is a string containing a path to a directory containing stock data.
    Files in this directory should be .csv's and have filename equal to the stock symbol whose data they are holding.
    This method assumes that the file <symbol>.csv exists in <dir>, if <local>=True.

    See tools.pull_data.py for an easy way of storing stock data locally like this
    """
    df = get_data(symbol, start, end, local=local, dir=dir)
    if df is None:
        return None

    df["Gain"] = np.nan
    df["Loss"] = np.nan
    # Fill in "Gain" and "Loss" columns
    for i in range(1, len(df.index)):
        diff = df["Adj Close"][df.index[i]] - df["Adj Close"][df.index[i - 1]]
        if diff >= 0:
            df.loc[df.index[i], "Gain"] = diff
            df.loc[df.index[i], "Loss"] = 0
        else:
            df.loc[df.index[i], "Gain"] = 0
            # When calculating RSI, we take losses as positive numbers
            df.loc[df.index[i], "Loss"] = -diff

    # Chop off the first row of the DataFrame, which has no Gain/Loss number
    df = df[df.index[1]:]

    # Create three empty columns: RSI, Average Gain, Average Loss
    df["RSI"] = np.nan
    df["Average Gain"] = np.nan
    df["Average Loss"] = np.nan

    sum_gain = 0
    sum_loss = 0
    for i in range(period):
        sum_gain += df["Gain"][df.index[i]]
        sum_loss += df["Loss"][df.index[i]]

    df.loc[df.index[period - 1], "Average Gain"] = sum_gain / 14
    df.loc[df.index[period - 1], "Average Loss"] = sum_loss / 14

    # The first RSI value is 100 - (100 / (1+ RS)), where RS = (Average Gain / Average Loss) over the first 14 days of
    # the window
    df.loc[df.index[period - 1],
           "RSI"] = 100 - 100 / (1 +
                                 (df["Average Gain"][df.index[period - 1]] /
                                  df["Average Loss"][df.index[period - 1]]))

    for i in range(period, len(df.index)):
        df.loc[df.index[i], "Average Gain"] = _exp_average(
            df["Average Gain"][df.index[i - 1]], df["Gain"][df.index[i]],
            period)
        df.loc[df.index[i], "Average Loss"] = _exp_average(
            df["Average Loss"][df.index[i - 1]], df["Loss"][df.index[i]],
            period)
        df.loc[df.index[i],
               "RSI"] = 100 - 100 / (1 + df["Average Gain"][df.index[i]] /
                                     df["Average Loss"][df.index[i]])

    return df.loc[start:end]