def filter_symbols(prices_start_date, prices_end_date): summary_input_file = SUMMARY_INPUT_FILE if not os.path.exists(summary_input_file): logging.critical("Location file not found: " + summary_input_file) sys.exit() try: logging.info("Reading: " + summary_input_file) stox_df = pd.read_table(summary_input_file, sep=',') stox_df['stock_from_date'] = pd.to_datetime(stox_df['stock_from_date']) stox_df['stock_to_date'] = pd.to_datetime(stox_df['stock_to_date']) except Exception as e: logging.warning("Not parsed: " + summary_input_file + "\n" + str(e)) sys.exit() # drop any symbols that don't cover at least the analysis window stox_df = stox_df[(stox_df['stock_from_date'] <= prices_start_date) & (stox_df['stock_to_date'] >= prices_end_date)] return stox_df['symbol'].tolist()
def load_df(df_file): try: logging.info("Reading " + df_file) df = pd.read_table(df_file, sep=',') logging.info("df shape " + str(df.shape)) except Exception as e: logging.critical("Not parsed: " + df_file + "\n" + str(e)) sys.exit() return df
def load_earnings(fname): try: logging.info("Reading " + fname) earn_df = pd.read_table(fname, sep=',') logging.info("Earnings df shape " + str(earn_df.shape)) except Exception as e: logging.critical("Not parsed: " + fname + "\n" + str(e)) sys.exit() return earn_df
def load_prices(prices_file): try: logging.info("Reading " + prices_file) prices_df = pd.read_table(prices_file, sep=',') prices_df['date'] = pd.to_datetime(prices_df['date']) logging.info("Prices df shape " + str(prices_df.shape)) except Exception as e: logging.critical("Not parsed: " + prices_file + "\n" + str(e)) sys.exit() return prices_df
def load_config(): # Config parser ini_filename = "stox.ini" logging.info("Reading config from: " + ini_filename) config = configparser.ConfigParser() try: config.read(ini_filename) except Exception as e: logging.critical("Error reading .ini file: " + ini_filename) logging.critical("Exception: " + str(type(e)) + " " + str(e)) sys.exit() return config
def test_cleaner(cfg): register_matplotlib_converters() param_list = cfg['cleaner_test_params'].split(" ") if len(param_list) < 3: logging.warn("Plot params malformed. Skipping plot.") return else: logging.info(f"Using symbol {param_list[0].strip()}") logging.info(f" from {param_list[1]}") logging.info(f" to {param_list[2]}") # param string [symbol start-date end-date] # e.g. IBM 2009-01-01 2019-01-01 symbol = param_list[0].strip() start_list = param_list[1].split('-') start_yr = int(start_list[0]) start_mo = int(start_list[1]) start_d = int(start_list[2]) end_list = param_list[2].split('-') end_yr = int(end_list[0]) end_mo = int(end_list[1]) end_d = int(end_list[2]) date_start = pd.Timestamp(start_yr, start_mo, start_d) date_end = pd.Timestamp(end_yr, end_mo, end_d) # use group file if exists else use raw file raw_symbol_file = STOX_DATA_DIR + symbol + "_raw.csv" if os.path.exists(raw_symbol_file): prices_input_file = raw_symbol_file else: prices_input_file = RAW_PRICES_INPUT_FILE try: logging.info("Reading " + prices_input_file) prices_df = pd.read_table(prices_input_file, sep=',') prices_df['date'] = pd.to_datetime(prices_df['date']) logging.info("Prices df shape " + str(prices_df.shape)) except Exception as e: logging.critical("Not parsed: " + prices_input_file + "\n" + str(e)) sys.exit() # get group for this symbol logging.info("Filtering on symbol") df = prices_df.groupby('symbol').get_group(symbol) # write the raw file for this symbol (all time frames) if not os.path.exists(raw_symbol_file): logging.info(f"Writing raw file for {symbol}") df.to_csv(raw_symbol_file, index=False, sep=",", float_format='%.3f') # filter on date range logging.info("Filtering on date range") df = df[(df['date'] >= date_start) & (df['date'] <= date_end)] df = df.sort_values(['date']) # write raw df to file span_str = (date_start.strftime("%Y-%m-%d") + "_" + date_end.strftime("%Y-%m-%d")) csv_name = STOX_DATA_DIR + symbol + "_" + span_str + "_raw.csv" df.to_csv(csv_name, index=False, sep="\t", float_format='%.3f') # test cleaner cdf = cleaner(df) # write cdf to file span_str = (date_start.strftime("%Y-%m-%d") + "_" + date_end.strftime("%Y-%m-%d")) csv_name = STOX_DATA_DIR + symbol + "_" + span_str + "_cleantest.csv" cdf.to_csv(csv_name, index=False, sep="\t", float_format='%.3f') # PLOT fig, axs = plt.subplots(nrows=2, sharex=True) plt.suptitle(symbol, fontsize=10) axs[0].set_title('Raw', {'fontsize': 10}) axs[0].scatter(df['date'].tolist(), df['close'], color='blue', s=2) axs[1].set_title('Cleaned', {'fontsize': 10}) axs[1].scatter(cdf['date'].tolist(), cdf['close'], color='green', s=2) plt_filename = STOX_DATA_DIR + symbol + "_" + span_str + ".png" plt.savefig(plt_filename) plt.show()
def plot_price(cfg): param_list = cfg['plot_params'].split(" ") if len(param_list) < 3: logging.warn("Plot params malformed. Skipping plot.") return else: logging.info(f"Plotting symbol {param_list[0].strip()}") logging.info(f" from {param_list[1]}") logging.info(f" to {param_list[2]}") register_matplotlib_converters() prices_input_file = CLEANED_PRICES_FILE #prices_input_file = cfg['raw_data_dir'] + cfg['raw_prices_input_file'] try: logging.info("Reading " + prices_input_file) prices_df = pd.read_table(prices_input_file, sep=',') prices_df['date'] = pd.to_datetime(prices_df['date']) logging.info("Prices df shape " + str(prices_df.shape)) except Exception as e: logging.critical("Not parsed: " + prices_input_file + "\n" + str(e)) sys.exit() # param string [symbol start-date end-date] # e.g. IBM 2009-01-01 2019-01-01 symbol = param_list[0].strip() start_list = param_list[1].split('-') start_yr = int(start_list[0]) start_mo = int(start_list[1]) start_d = int(start_list[2]) end_list = param_list[2].split('-') end_yr = int(end_list[0]) end_mo = int(end_list[1]) end_d = int(end_list[2]) date_start = pd.Timestamp(start_yr, start_mo, start_d) date_end = pd.Timestamp(end_yr, end_mo, end_d) # filter on date range logging.info("Filtering on date range") df = prices_df[(prices_df['date'] >= date_start) & (prices_df['date'] <= date_end)] df = df.sort_values(['date']) # get group for this symbol logging.info("Filtering on symbol") df = df.groupby('symbol').get_group(symbol) # write df to file span_str = (date_start.strftime("%Y-%m-%d") + "_" + date_end.strftime("%Y-%m-%d")) csv_name = STOX_DATA_DIR + symbol + "_" + span_str + ".csv" df.to_csv(csv_name, index=False, sep="\t", float_format='%.3f') # plot open/close price fig = plt.figure() plt.suptitle(symbol, fontsize=10) plt.scatter(df['date'].tolist(), df['open'], color='green', s=2) plt.scatter(df['date'].tolist(), df['close'], color = 'blue', s=2) plt_filename = STOX_DATA_DIR + symbol + "_" + span_str + ".png" plt.savefig(plt_filename) plt.show()