def match_changes_w_current(): final_path = find_file(DIRECTORY_TO_SAVE_IN) current_df = pd.read_csv(final_path + "current.csv") changes_df = pd.read_csv(final_path + ACTION_TYPE[0] + ".csv") hash_map = {} # get old ticker into a hash map, where the values are new ticker symbol for index_label, changes_row in changes_df.iterrows(): old_ticker = changes_row["Old Symbol"] hash_map[old_ticker] = [ changes_row["New Symbol"], changes_row["New Company Name"] ] for index_label, current_row in current_df.iterrows(): current_ticker = current_row["Ticker"] if current_ticker in hash_map: print("Changing " + current_ticker + " for " + hash_map[current_row["Ticker"]][0]) current_df.at[index_label, "Ticker"] = hash_map[current_row["Ticker"]][0] current_df.at[index_label, "Name"] = hash_map[current_row["Ticker"]][1] current_df[["Ticker", "Name"]].to_csv(final_path + "current.csv", index=False) return
def get_missing_float(): final_path = find_file(DIRECTORY_TO_SAVE_IN) current_df = pd.read_csv(final_path + "current.csv") list_missing = current_df.index[current_df["Float"] < 0.01].tolist() td_fund = "https://api.tdameritrade.com/v1/instruments" ticker_missing = [] for index in list_missing: ticker_missing.append(current_df["Ticker"][index]) params = { "apikey": tda_key, "symbol": ticker_missing, "projection": "fundamental", } content = requests.get(td_fund, params=params) data = content.json() # create ticker float dict from the response ticker_float_dict = {} for ticker in data: ticker_float_dict[ticker] = data[ticker]['fundamental'][ 'marketCapFloat'] print(ticker_float_dict)
def Harvest_selected_stocks(stock_type): final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN) name_of_file = final_path_to_save + stock_type + ".csv" if os.path.exists(name_of_file): print(name_of_file + " already exists.") return page_html = requests.get(SELECTED_STOCKS_PARTIAL_URL + stock_type).text parse = BeautifulSoup(page_html, 'lxml') # find the table with with list of stocks table = parse.tbody # determine columns for the file : first tr in the table is the names column_names = [] column_name_tags = table.find("tr") for col_name in column_name_tags: col_name_string = col_name.get_text() column_names.append(col_name_string) # iterate over every row selected_list = [] for table_row in table: stock_dict = {} # parse each row in this loop for col_index, col_string in enumerate(table_row): col_name_string = col_string.get_text() # attribute each element of the row to columns name name_of_column = column_names[col_index] stock_dict[name_of_column] = col_name_string selected_list.append(stock_dict) # initializing a dataframe with a list of dictionaries as rows ticker_and_name_df = pd.DataFrame(selected_list[1:], columns=column_names) ticker_and_name_df.to_csv(name_of_file, index=False)
def Harvest_all_stocks(): final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN) # check if file already exists name_of_file = final_path_to_save + "current.csv" if os.path.exists(name_of_file): print(name_of_file + " already exists.") return # get the page html content and filter it through bs4 page_html = requests.get(ALL_STOCKS_URL).text parse = BeautifulSoup(page_html, 'lxml') # find the table with with list of stocks table = parse.find(class_="no-spacing") # retrieve ticker and name as text from every table entry tag "li" ticker_and_name_list = [] for ticker_name_tag in table.find_all("li"): ticker_and_name = ticker_name_tag.a.get_text() name_split = ticker_and_name.split(" - ") dict_row = {"Ticker" : name_split[0], "Name" : name_split[1]} ticker_and_name_list.append(dict_row) # names based on individual dict's keys column_names = list(ticker_and_name_list[0].keys()) # initializing a dataframe with a list of dictionaries as rows ticker_and_name_df = pd.DataFrame(ticker_and_name_list, columns=column_names) ticker_and_name_df.to_csv(name_of_file)
def check_all_current_for_growth(): full_path_to_income_dir = find_file(local_path_to_income_dir) directory_to_save_in = find_file(directory) file_winners = directory_to_save_in + "Growing revenue " + str(TOTAL_QUARTERS) + " quarters.txt" files = os.listdir(full_path_to_income_dir) for index, income_file in enumerate(files): path_to_file = full_path_to_income_dir + income_file split_list = income_file.split("_") ticker_name = split_list[0] if is_ticker_growing_rev(path_to_file): with open(file_winners, "a+") as writer: writer.write(ticker_name + "\n") if index % 100 == 0: print(index)
def get_csv_based_on_float(start, end=datetime.date(datetime.now())): final_dir_with_tickers = find_file(DIRECTORY_WITH_TICKERS) final_historical = find_file(historical_path) current_df = pd.read_csv(final_dir_with_tickers + types[0] + ".csv") indices_small_floats = current_df.index[ current_df["Float"] < MAX_FLOAT].tolist() ticker_missing = [] for index in indices_small_floats: ticker_missing.append(current_df["Ticker"][index]) list_of_empty = [] for index, ticker in enumerate(ticker_missing): file_name = final_historical + "/" + ticker + "_" + start + "_" + str( end) + ".csv" if os.path.exists(file_name): print("skip " + ticker) continue url = "https://fmpcloud.io/api/v3/historical-price-full/" + ticker + "?" params = { "from": start, "to": end, "datatype": "csv", "apikey": fmp_key } content_url = url + urllib.parse.urlencode(params) downloaded_csv = pd.read_csv(content_url, encoding="iso-8859-1") if len(downloaded_csv) < 2: print(ticker) list_of_empty.append(ticker) downloaded_csv.to_csv(file_name) if index % 100 == 0: print(index) with open("Empty historical " + str(start) + "_" + str(end), "a+") as writer: writer.write(list_of_empty)
def Harvest_ALL_ticker_names(): final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN) if os.path.isdir(final_path_to_save) == "": print("Directory " + DIRECTORY_TO_SAVE_IN + " does not exit") print("Please create one manually") return Harvest_all_stocks() for stock_type in ACTION_TYPE: Harvest_selected_stocks(stock_type)
def change_date_format(): final_path = find_file(DIRECTORY_TO_SAVE_IN) for action_path in ACTION_TYPE: path = final_path + action_path + ".csv" df = pd.read_csv(path) for index in range(len(df["Date"])): old_time_version = df["Date"][index] new_time_version = datetime.datetime.strptime( old_time_version, "%b %d, %Y").strftime("%Y-%m-%d") df["Date"][index] = new_time_version df.to_csv(path, index=False)
def get_leaders_for_one_day(date, symbols, metric): final_path_to_write = find_file(PATH_TO_WRITE_TO_LEADERS) url = "https://fmpcloud.io/api/v3/historical-price-full/" params = { "from": date, "to": date, "apikey": fmp_key, } sorted_list = [] track_empty_results = [] for index, ticker in enumerate(symbols): # url = url + ticker + "?" content = requests.get(url + ticker + "?", params=params) data = content.json() # empty results if len(data) == 0: track_empty_results.append(ticker) continue try: attribute_compared = data["historical"][0][metric] except: print(data) # populate heap while it is not 10 # i want the max heap, and theres no built in solution, so i invert the numbers to negative if len(sorted_list) < TOP_ELEMENTS: sorted_list.append([attribute_compared, ticker]) sorted_list = sorted(sorted_list, key=lambda x: x[0]) # if new element has higher volume and heap is full, switch it elif sorted_list[0][0] < attribute_compared: sorted_list[0] = [attribute_compared, ticker] sorted_list = sorted(sorted_list, key=lambda x: x[0]) print("Ticker in a day: " + str(index) + " " + str(attribute_compared)) sorted_list = [str(i[1]) for i in sorted_list] # insert date as index at first place sorted_list.insert(0, date) one_line = "," one_line = one_line.join(sorted_list) FILE_NAME = "Top " + TOP_ELEMENTS + " volume leaders.csv" with open(final_path_to_write + FILE_NAME, "a") as writer: writer.write(one_line + "\n")
def Get_ticker_dir_map(): final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN) # add directory path to their string current_path = os.getcwd() + "/" + final_path_to_save files = os.listdir(current_path) csv_files = [single_csv for single_csv in files if single_csv.endswith(".csv")] hash_map = {} for csv_file in csv_files: formatted_name = csv_file.replace(".csv", "") formatted_name = formatted_name.capitalize() hash_map[formatted_name] = current_path + csv_file return hash_map
def get_leaders_for_period(metric, start, end=datetime.date(datetime.now())): final_dir_with_tickers = find_file(DIRECTORY_WITH_TICKERS) current_df = pd.read_csv(final_dir_with_tickers + types[0] + ".csv") # get all tickers that were not delisted at before this date delisted_df = pd.read_csv(final_dir_with_tickers + types[1] + ".csv") # get all tickers that were not delisted at before this date changes_df = pd.read_csv(final_dir_with_tickers + types[2] + ".csv") business_days = get_working_days(start, end) # from earliest to latest for index, date in enumerate(business_days): todays_ticker_series = get_tickers_for_the_day(current_df, delisted_df, changes_df, date) get_leaders_for_one_day(date, todays_ticker_series, metric) print("day " + str(index))
def find_peak_time(intra_file, stock_criteria, time_criteria, chart_title): if not os.path.exists(intra_file): print(intra_file) print("File does not exist.") return all_intra_df = pd.read_csv(intra_file) median_eod = all_intra_df["EOD"].median() print(median_eod) crit_name = stock_criteria[0] crit_val = stock_criteria[1] run_w_vol_df = all_intra_df[(all_intra_df[crit_name] == crit_val) & (all_intra_df["EOD"] >= median_eod)] run_w_vol_df[time_criteria] = [ get_time_of_day(tmstp) for tmstp in run_w_vol_df[time_criteria] ] x_y_dict = {} # graph from 9:30 to 4pm as x axis for time in run_w_vol_df[time_criteria]: x_y_dict[str(time)] = x_y_dict.get(str(time), 1) + 1 sorted_x_y_dict = collections.OrderedDict(sorted(x_y_dict.items())) y_vals = list(sorted_x_y_dict.values()) x_vals = list(sorted_x_y_dict.keys()) fig, ax = plt.subplots() ax.bar(x_vals, y_vals) fig.autofmt_xdate() ax.fmt_xdata = mdates.DateFormatter("%H:%M") plt.xlabel("Time between open and close") plt.ylabel("Number of occurences") plt.title(chart_title) plt.show() graph_path = find_file("Graphs") fig.savefig(graph_path + chart_title + ".png") pass
def get_float_for_current(): final_path = find_file(DIRECTORY_TO_SAVE_IN) current_df = pd.read_csv(final_path + "current.csv") # create column if necessary if "Float" not in current_df: current_df["Float"] = 0 for ticker_index in range(0, len(current_df), PER_REQUEST): td_fund = "https://api.tdameritrade.com/v1/instruments" ticker_window = current_df["Ticker"][ticker_index:ticker_index + PER_REQUEST] ticker_window = list(ticker_window) params = { "apikey": tda_key, "symbol": ticker_window, "projection": "fundamental", } content = requests.get(td_fund, params=params) data = content.json() # create ticker float dict from the response ticker_float_dict = {} for ticker in data: ticker_float_dict[ticker] = data[ticker]['fundamental'][ 'marketCapFloat'] # map ticker float to its ticker name based on the key, incrementally current_df["Float"][ticker_index:ticker_index + PER_REQUEST] = current_df[ "Ticker"][ticker_index:ticker_index + PER_REQUEST].map(ticker_float_dict) # manual verification print(current_df.tail(30)) current_df.to_csv(final_path + "current.csv", index=False)
from Data_Access.Find_file_path import find_file from Intraday_analysis_scripts import vf_linear_reg, runners_peak_time from Intraday_fetch_scripts import get_intraday_data # write here programs to write # csv files intra_all_csv = find_file("Intra_All.csv") intra_vol_csv = find_file("Intra_Volume.csv") intra_hl_csv = find_file("Intra_Highs_Lows.csv") new_vol_intra_csv = find_file("Intra_Vol_Float.csv") vol_ratio_csv = find_file("Intra_vol_ratio_direc.csv") # directories intraday_vol_path_csv = find_file("Intraday_formated") vf_w_float_path = find_file("VF_with_float") vf_pure_path = find_file("VF_with_volume_only") daily_period_path = find_file("Daily_period") # stock_criteria = ["Direction", 1] # time_criteria = "Time of high" # chart_title = time_criteria + " of day for low float runners" # runners_peak_time.find_peak_time(intra_all_csv, stock_criteria, time_criteria, chart_title) # vf_linear_reg.get_vf_vs_real_every_period(intra_vol_csv, intra_hl_csv, vol_ratio_csv, vf_pure_path) categories = ["1 min", "5 min", "15 min", "30 min", "60 min", "EOD"] vf_linear_reg.vf_real_ratio_describe(vol_ratio_csv, categories) # get_intraday_data.add_cols_to_formatted_dataset(daily_period_path, vol_ratio_csv, ['open', 'vwap', 'close'])
import os import pandas as pd import datetime import urllib import requests import time import sys sys.path.append( os.getcwd()) # current directory must be root project directory from Intraday_fetch_scripts.get_daily_vol_leaders import get_working_days from Data_Access.Find_file_path import find_file from api_keys import tda_key historical_intraday_dir = "Intraday_formated" historical_intraday_dir = find_file(historical_intraday_dir) raw_folder_to_read = "Intraday_raw" raw_folder_to_read = find_file(raw_folder_to_read) intraday_path_formatted = historical_intraday_dir + "Intra_" ''' Reading data from collected intraday csv files from intraday_raw folder the keys to writing these functions are: 1) I am capable of doing it the same thing again next time with other data this function creates a new csv OR adds new rows of the same columns to the file if I want to merge to csv I can use another function for that now the problem is that if i modify the script that already have, i will just add