Beispiel #1
0
def filter_symbols(prices_start_date, prices_end_date):

    summary_input_file = SUMMARY_INPUT_FILE

    if not os.path.exists(summary_input_file):
        logging.critical("Location file not found: " +
                            summary_input_file)
        sys.exit()

    try:
        logging.info("Reading: " + summary_input_file)
        stox_df = pd.read_table(summary_input_file, sep=',')
        stox_df['stock_from_date'] = pd.to_datetime(stox_df['stock_from_date'])
        stox_df['stock_to_date'] = pd.to_datetime(stox_df['stock_to_date'])

    except Exception as e:
        logging.warning("Not parsed: " + summary_input_file + "\n" + str(e))
        sys.exit()
        
    # drop any symbols that don't cover at least the analysis window
    stox_df = stox_df[(stox_df['stock_from_date'] <= prices_start_date) &
                    (stox_df['stock_to_date'] >= prices_end_date)]


    return stox_df['symbol'].tolist()
Beispiel #2
0
def load_df(df_file):

    try:
        logging.info("Reading " + df_file)
        df = pd.read_table(df_file, sep=',')
        logging.info("df shape " + str(df.shape))

    except Exception as e:
        logging.critical("Not parsed: " + df_file + "\n" + str(e))
        sys.exit()

    return df
Beispiel #3
0
def load_earnings(fname):

    try:
        logging.info("Reading " + fname)
        earn_df = pd.read_table(fname, sep=',')
        logging.info("Earnings df shape " + str(earn_df.shape))
        
    except Exception as e: 
        logging.critical("Not parsed: " + fname + "\n" + str(e))
        sys.exit()   

    return earn_df
Beispiel #4
0
def load_prices(prices_file):

    try:
        logging.info("Reading " + prices_file)
        prices_df = pd.read_table(prices_file, sep=',')
        prices_df['date'] = pd.to_datetime(prices_df['date'])
        logging.info("Prices df shape " + str(prices_df.shape))

    except Exception as e:
        logging.critical("Not parsed: " + prices_file + "\n" + str(e))
        sys.exit()

    return prices_df
Beispiel #5
0
def load_config():

    # Config parser
    ini_filename = "stox.ini"
    logging.info("Reading config from: " + ini_filename)
    config = configparser.ConfigParser()
    try:
        config.read(ini_filename)
    except Exception as e:
        logging.critical("Error reading .ini file: " + ini_filename)
        logging.critical("Exception: " + str(type(e)) + " " + str(e))
        sys.exit()

    return config
Beispiel #6
0
def test_cleaner(cfg):

    register_matplotlib_converters()

    param_list = cfg['cleaner_test_params'].split(" ")
    if len(param_list) < 3:
        logging.warn("Plot params malformed. Skipping plot.")
        return
    else:
        logging.info(f"Using symbol {param_list[0].strip()}")
        logging.info(f"  from {param_list[1]}")
        logging.info(f"  to {param_list[2]}")

    # param string [symbol start-date end-date]
    #   e.g. IBM 2009-01-01 2019-01-01
    symbol = param_list[0].strip()

    start_list = param_list[1].split('-')
    start_yr = int(start_list[0])
    start_mo = int(start_list[1])
    start_d = int(start_list[2])

    end_list = param_list[2].split('-')
    end_yr = int(end_list[0])
    end_mo = int(end_list[1])
    end_d = int(end_list[2])

    date_start = pd.Timestamp(start_yr, start_mo, start_d)
    date_end = pd.Timestamp(end_yr, end_mo, end_d)

    # use group file if exists else use raw file
    raw_symbol_file = STOX_DATA_DIR + symbol + "_raw.csv"

    if os.path.exists(raw_symbol_file):
        prices_input_file = raw_symbol_file
    else:
        prices_input_file = RAW_PRICES_INPUT_FILE

    try:
        logging.info("Reading " + prices_input_file)
        prices_df = pd.read_table(prices_input_file, sep=',')
        prices_df['date'] = pd.to_datetime(prices_df['date'])
        logging.info("Prices df shape " + str(prices_df.shape))

    except Exception as e:
        logging.critical("Not parsed: " + prices_input_file + "\n" + str(e))
        sys.exit()

    # get group for this symbol
    logging.info("Filtering on symbol")
    df = prices_df.groupby('symbol').get_group(symbol)

    # write the raw file for this symbol (all time frames)
    if not os.path.exists(raw_symbol_file):
        logging.info(f"Writing raw file for {symbol}")
        df.to_csv(raw_symbol_file, index=False, sep=",", float_format='%.3f')

    # filter on date range
    logging.info("Filtering on date range")
    df = df[(df['date'] >= date_start) & (df['date'] <= date_end)]
    df = df.sort_values(['date'])

    # write raw df to file
    span_str = (date_start.strftime("%Y-%m-%d") + "_" +
                date_end.strftime("%Y-%m-%d"))
    csv_name = STOX_DATA_DIR + symbol + "_" + span_str + "_raw.csv"
    df.to_csv(csv_name, index=False, sep="\t", float_format='%.3f')

    # test cleaner
    cdf = cleaner(df)

    # write cdf to file
    span_str = (date_start.strftime("%Y-%m-%d") + "_" +
                date_end.strftime("%Y-%m-%d"))
    csv_name = STOX_DATA_DIR + symbol + "_" + span_str + "_cleantest.csv"
    cdf.to_csv(csv_name, index=False, sep="\t", float_format='%.3f')

    # PLOT
    fig, axs = plt.subplots(nrows=2, sharex=True)
    plt.suptitle(symbol, fontsize=10)

    axs[0].set_title('Raw', {'fontsize': 10})
    axs[0].scatter(df['date'].tolist(), df['close'], color='blue', s=2)

    axs[1].set_title('Cleaned', {'fontsize': 10})
    axs[1].scatter(cdf['date'].tolist(), cdf['close'], color='green', s=2)

    plt_filename = STOX_DATA_DIR + symbol + "_" + span_str + ".png"
    plt.savefig(plt_filename)
    plt.show()
Beispiel #7
0
def plot_price(cfg):

    param_list = cfg['plot_params'].split(" ")
    if len(param_list) < 3:
        logging.warn("Plot params malformed. Skipping plot.")
        return
    else:
        logging.info(f"Plotting symbol {param_list[0].strip()}")
        logging.info(f"  from {param_list[1]}")
        logging.info(f"  to {param_list[2]}")

    register_matplotlib_converters()
    
    prices_input_file = CLEANED_PRICES_FILE
    #prices_input_file = cfg['raw_data_dir'] + cfg['raw_prices_input_file']
    try:
        logging.info("Reading " + prices_input_file)
        prices_df = pd.read_table(prices_input_file, sep=',')
        prices_df['date'] = pd.to_datetime(prices_df['date'])
        logging.info("Prices df shape " + str(prices_df.shape))
        
    except Exception as e: 
        logging.critical("Not parsed: " + prices_input_file + "\n" + str(e))
        sys.exit()   

    # param string [symbol start-date end-date] 
    #   e.g. IBM 2009-01-01 2019-01-01
    symbol = param_list[0].strip()

    start_list = param_list[1].split('-')
    start_yr = int(start_list[0])
    start_mo = int(start_list[1])
    start_d = int(start_list[2])

    end_list = param_list[2].split('-')
    end_yr = int(end_list[0])
    end_mo = int(end_list[1])
    end_d = int(end_list[2])

    date_start = pd.Timestamp(start_yr, start_mo, start_d)
    date_end = pd.Timestamp(end_yr, end_mo, end_d)
   

    # filter on date range
    logging.info("Filtering on date range")
    df = prices_df[(prices_df['date'] >= date_start) & (prices_df['date'] <= date_end)]
    df = df.sort_values(['date'])

    # get group for this symbol
    logging.info("Filtering on symbol")
    df = df.groupby('symbol').get_group(symbol)

    # write df to file
    span_str = (date_start.strftime("%Y-%m-%d") + "_" +
        date_end.strftime("%Y-%m-%d"))
    csv_name = STOX_DATA_DIR + symbol + "_" + span_str + ".csv"
    df.to_csv(csv_name, index=False, sep="\t", float_format='%.3f')

    # plot open/close price
    fig = plt.figure()
    plt.suptitle(symbol, fontsize=10)
    plt.scatter(df['date'].tolist(), df['open'], color='green', s=2)
    plt.scatter(df['date'].tolist(), df['close'], color = 'blue', s=2)

    plt_filename = STOX_DATA_DIR + symbol + "_" + span_str + ".png"
    plt.savefig(plt_filename)
    plt.show()