Python extract_data 예제들, SimpleStockDataPlot.extract_data Python 예제들

예제 #1

0

파일 보기

def get_price_data(ticker_ls, end_date, look_back_mths):
    """
    Return a dataframe of daily price data (adjusted close price), sourced from Yahoo Finance

    Args:
        ticker_ls: <list> of tickers
        end_date: <dt.datetime> of end date to apply look back months to
        look_back_mths: <int> number of months from end date to define the starting date to pull data from

    Returns:
        <pd.DataFrame> containing the price data
    """
    start_date = (end_date - relativedelta(months=look_back_mths))
    df_price_data = pd.DataFrame(
        index=pd.date_range(start=start_date.strftime(const.DATE_STR_FORMAT),
                            end=end_date.strftime(const.DATE_STR_FORMAT), freq='B'),
        columns=ticker_ls)

    for asset in ticker_ls:
        df_price_data[asset] = extract_data(asset, start_date.strftime(const.DATE_STR_FORMAT),
                                            end_date.strftime(const.DATE_STR_FORMAT))[const.ADJ_CLOSE]

    df_price_data.fillna(method='ffill', inplace=True)

    return df_price_data

예제 #2

0

파일 보기

    def __init__(self,
                 ticker1,
                 start_date,
                 end_date=dt.datetime.today().strftime('%Y-%m-%d')):
        """
        Args:
            ticker1: <str> ticker of stock to run regression against VSTOXX on
            start_date: <str> YYYY-MM-DD format
            end_date: <str> YYYY-MM-DD format, defaults to today
        """
        self.ticker1_data = extract_data(ticker1, start_date, end_date)

        vstoxx_url = r'http://www.stoxx.com/download/historical_values/h_vstoxx.txt'

        cur_dir = os.path.dirname(os.path.realpath(__file__))
        vstoxx_file = os.path.join(cur_dir, 'data/vstoxx.txt')
        urlretrieve(vstoxx_url, vstoxx_file)
        self.ticker2_data = pd.read_csv(vstoxx_file,
                                        index_col=0,
                                        header=2,
                                        parse_dates=True,
                                        sep=',',
                                        dayfirst=True)

        self.ticker2_data = self.ticker2_data['V2TX'].to_frame()

        self.ticker1_nme = ticker1
        self.ticker2_nme = 'V2TX'

예제 #3

0

파일 보기

    def test_simple_stock_data_plot(self):
        """ Tests successful run of SimpleStockDataPlot.py """

        from SimpleStockDataPlot import plot_bar_volume, PlotStockData, extract_data

        ticker = 'AAPL'
        start_date = '2020-03-01'
        end_date = '2021-03-01'

        plot_bar_volume(ticker, start_date, end_date)

        stock_data = PlotStockData(ticker, start_date, end_date)
        stock_data()

        df = extract_data(ticker, start_date, end_date)

예제 #4

0

파일 보기

파일: candlestick_chart.py 프로젝트: MichelleChung-code/FinanceSandbox

    def __init__(self,
                 ticker,
                 start_date,
                 end_date=dt.datetime.today().strftime('%Y-%m-%d'),
                 res_path=False):
        """
        Create candlestick plot of given stock

        Args:
            ticker: <str> stock ticker
            start_date: <str> YYYY-MM-DD start date for the data pull and display
            end_date: <str> YYYY-MM-DD end date for the data pull and display.  Defaults to today if not provided.
            res_path: <str> path to results folder to save image of the resulting plot.  If False, will not save image.
        """
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date
        self.res_path = res_path
        self.data = extract_data(ticker, start_date, end_date)

예제 #5

0

파일 보기

파일: factor_exposures.py 프로젝트: MichelleChung-code/FinanceSandbox

    def get_stock_return_data(self):
        """ Get the return data of the self.ticker stock from Yahoo Finance """
        str_date_ls = self.factor_data.index.strftime(const.DATE_STR_FORMAT)
        end_date = str_date_ls[-1]

        df = extract_data(self.ticker, start_date=False,
                          end_date=end_date)[const.ADJ_CLOSE].to_frame()
        df.fillna(method='ffill', inplace=True)

        df.rename(columns={const.ADJ_CLOSE: self.ticker}, inplace=True)

        # monthly data only and last calendar date per month & calculate returns
        df = df.resample('M').last().pct_change()[1:]

        # only keep factor_data for dates following the earliest ticker return data date available
        self.factor_data = self.factor_data[self.factor_data.index.
                                            get_loc(df.index[0]):]

        return df

예제 #6

0

파일 보기

def plot_corr_mat(ls_tickers, start_date, end_date, res_path=False):
    """
    Plot heatmap showing pearson's correlation matrix between inputted stocks

    Args:
        ls_tickers: <list> of tickers to produce correlation matrix
        start_date: <str> YYYY-MM-DD start date for data
        end_date: <str> YYYY-MM-DD end date for data
        res_path: <str> output path to save plot to, defaults to False i.e. not saved

    """
    df_res_ls = []

    # pull data
    for ticker in ls_tickers:
        df = extract_data(ticker, start_date, end_date)
        df[consts.TICKER] = ticker
        df_res_ls.append(df)

    df = pd.concat(df_res_ls)
    df.reset_index(inplace=True)

    # pivot to reformat data to have date as index, tickers as the columns, and adjusted close as the values
    ls_pivot = [consts.DATE, consts.TICKER, consts.ADJ_CLOSE]
    df = df[ls_pivot].pivot(*ls_pivot)

    # calculate pearson correlation
    df = df.corr(method='pearson')

    # plotting
    fig = plt.figure()
    seaborn.heatmap(df, cmap='RdYlGn', annot=True)
    if res_path:
        plt.savefig(os.path.join(res_path, 'corr_mat_{}_{}'.format(start_date, end_date)))

    # .show() must go after .savefig() or else saved figure will be blank
    plt.show()

예제 #7

0

파일 보기

파일: backtesting.py 프로젝트: MichelleChung-code/FinanceSandbox

def backtesting(benchmark_index,
                signal_tolerance,
                start_date,
                end_date=datetime.today().strftime('%Y-%m-%d')):
    """
    Backtest performance of a stock if following a signal based strategy to buy if the monthly rolling average exceeds
    a signal tolerance over the annual rolling average.  Rules for sell and hold follow simularly.

    Args:
        benchmark_index: <str> ticker for the stock of interest
        signal_tolerance: <int> tolerance controlling the buy, sell, hold actions
        start_date: <str> YYYY-MM-DD start date for data
        end_date: <str> YYYY-MM-DD end date for data.  Defaults to today if not provided.
    """
    benchmark_historic_data = extract_data(ticker=benchmark_index,
                                           start_date=start_date,
                                           end_date=end_date)

    # Get the rolling average annual and per month
    benchmark_historic_data[
        COL_NUM_TRADE_DAYS_YR_TREND] = benchmark_historic_data[CLOSE].rolling(
            NUM_TRADE_DAYS_PER_YR).mean()
    benchmark_historic_data[
        COL_NUM_TRADE_DAYS_MONTH_TREND] = benchmark_historic_data[
            CLOSE].rolling(NUM_TRADE_DAYS_PER_MONTH).mean()

    # work around the annual trend for standard deviation todo think about this, instead of using signal tolerance
    # standard_dev = np.floor(benchmark_historic_data[COL_NUM_TRADE_DAYS_YR_TREND].std())

    # trend difference
    COL_NAME_DIFF = '{}-{}_difference'.format(NUM_TRADE_DAYS_PER_MONTH,
                                              NUM_TRADE_DAYS_PER_YR)
    benchmark_historic_data[COL_NAME_DIFF] = \
        benchmark_historic_data[COL_NUM_TRADE_DAYS_MONTH_TREND] - benchmark_historic_data[COL_NUM_TRADE_DAYS_YR_TREND]

    # assuming that we neglect transaction costs and market liquidity
    benchmark_historic_data.loc[
        abs(benchmark_historic_data[COL_NAME_DIFF]) <= signal_tolerance,
        SIGNAL_VAL] = 0
    benchmark_historic_data.loc[
        benchmark_historic_data[COL_NAME_DIFF] > signal_tolerance,
        SIGNAL_VAL] = 1
    benchmark_historic_data.loc[
        benchmark_historic_data[COL_NAME_DIFF] < -signal_tolerance,
        SIGNAL_VAL] = -1

    # exhaustive mapping for signal name
    benchmark_historic_data[SIGNAL_NAME] = benchmark_historic_data[
        SIGNAL_VAL].map(SIGNALS_DICT)
    print(benchmark_historic_data[SIGNAL_NAME].value_counts())

    show_line_plot(benchmark_historic_data[SIGNAL_VAL],
                   title='Trading Day Signals',
                   x_label='Date',
                   y_label='Signal',
                   plot_arr=True)

    # Compare the returns
    benchmark_historic_data, original_returns_col_name = log_return(
        benchmark_historic_data, CLOSE)

    # apply proposed trading signals, shift up one day, returns based on t-1
    benchmark_historic_data['Custom'] = benchmark_historic_data[
        original_returns_col_name] * benchmark_historic_data[SIGNAL_VAL].shift(
            1)

    benchmark_historic_data[[original_returns_col_name,
                             'Custom']] = benchmark_historic_data[[
                                 original_returns_col_name, 'Custom'
                             ]].cumsum()

    benchmark_historic_data.rename(
        columns={original_returns_col_name: 'Market'}, inplace=True)

    show_line_plot(benchmark_historic_data[['Market', 'Custom']],
                   title='Backtesting of {}'.format(benchmark_index),
                   x_label='Date',
                   y_label='Cumulative Returns',
                   plot_arr=True)