def match_changes_w_current():

    final_path = find_file(DIRECTORY_TO_SAVE_IN)
    current_df = pd.read_csv(final_path + "current.csv")
    changes_df = pd.read_csv(final_path + ACTION_TYPE[0] + ".csv")
    hash_map = {}

    # get old ticker into a hash map, where the values are new ticker symbol
    for index_label, changes_row in changes_df.iterrows():
        old_ticker = changes_row["Old Symbol"]
        hash_map[old_ticker] = [
            changes_row["New Symbol"], changes_row["New Company Name"]
        ]

    for index_label, current_row in current_df.iterrows():
        current_ticker = current_row["Ticker"]
        if current_ticker in hash_map:
            print("Changing " + current_ticker + " for " +
                  hash_map[current_row["Ticker"]][0])
            current_df.at[index_label,
                          "Ticker"] = hash_map[current_row["Ticker"]][0]
            current_df.at[index_label,
                          "Name"] = hash_map[current_row["Ticker"]][1]

    current_df[["Ticker", "Name"]].to_csv(final_path + "current.csv",
                                          index=False)
    return
def get_missing_float():
    final_path = find_file(DIRECTORY_TO_SAVE_IN)
    current_df = pd.read_csv(final_path + "current.csv")
    list_missing = current_df.index[current_df["Float"] < 0.01].tolist()
    td_fund = "https://api.tdameritrade.com/v1/instruments"

    ticker_missing = []
    for index in list_missing:
        ticker_missing.append(current_df["Ticker"][index])

    params = {
        "apikey": tda_key,
        "symbol": ticker_missing,
        "projection": "fundamental",
    }

    content = requests.get(td_fund, params=params)
    data = content.json()

    # create ticker float dict from the response
    ticker_float_dict = {}
    for ticker in data:
        ticker_float_dict[ticker] = data[ticker]['fundamental'][
            'marketCapFloat']

    print(ticker_float_dict)
def Harvest_selected_stocks(stock_type):
    final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN)
    name_of_file = final_path_to_save + stock_type + ".csv"
    if os.path.exists(name_of_file):
        print(name_of_file + " already exists.")
        return

    page_html = requests.get(SELECTED_STOCKS_PARTIAL_URL + stock_type).text
    parse = BeautifulSoup(page_html, 'lxml')

    # find the table with with list of stocks
    table = parse.tbody

    # determine columns for the file : first tr in the table is the names
    column_names = []
    column_name_tags = table.find("tr")
    for col_name in column_name_tags:
            col_name_string  = col_name.get_text()
            column_names.append(col_name_string)

    # iterate over every row
    selected_list = []
    for table_row in table:
        stock_dict = {}
        # parse each row in this loop
        for col_index, col_string in enumerate(table_row):
            col_name_string  = col_string.get_text()
            # attribute each element of the row to columns name
            name_of_column = column_names[col_index] 
            stock_dict[name_of_column] = col_name_string
        selected_list.append(stock_dict)

    # initializing a dataframe with a list of dictionaries as rows
    ticker_and_name_df = pd.DataFrame(selected_list[1:], columns=column_names)
    ticker_and_name_df.to_csv(name_of_file, index=False)
def Harvest_all_stocks():
    final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN)
    # check if file already exists
    name_of_file = final_path_to_save + "current.csv"
    if os.path.exists(name_of_file):
        print(name_of_file + " already exists.")
        return

    # get the page html content and filter it through bs4
    page_html = requests.get(ALL_STOCKS_URL).text
    parse = BeautifulSoup(page_html, 'lxml')

    # find the table with with list of stocks
    table = parse.find(class_="no-spacing")

    # retrieve ticker and name as text from every table entry tag "li"
    ticker_and_name_list = []
    for ticker_name_tag in table.find_all("li"):
        ticker_and_name = ticker_name_tag.a.get_text()
        name_split = ticker_and_name.split(" - ")
        dict_row = {"Ticker" : name_split[0], "Name" : name_split[1]}
        ticker_and_name_list.append(dict_row)

    # names based on individual dict's keys
    column_names = list(ticker_and_name_list[0].keys())

    # initializing a dataframe with a list of dictionaries as rows
    ticker_and_name_df = pd.DataFrame(ticker_and_name_list, columns=column_names)
    ticker_and_name_df.to_csv(name_of_file)
Ejemplo n.º 5
0
def check_all_current_for_growth():

    full_path_to_income_dir = find_file(local_path_to_income_dir)
    directory_to_save_in = find_file(directory)

    file_winners = directory_to_save_in + "Growing revenue " + str(TOTAL_QUARTERS) +  " quarters.txt"
    files = os.listdir(full_path_to_income_dir)

    for index, income_file in enumerate(files):
        path_to_file = full_path_to_income_dir + income_file
        split_list = income_file.split("_")
        ticker_name = split_list[0]

        if is_ticker_growing_rev(path_to_file):
            with open(file_winners, "a+") as writer:
                writer.write(ticker_name + "\n")

        if index % 100 == 0:
            print(index)
Ejemplo n.º 6
0
def get_csv_based_on_float(start, end=datetime.date(datetime.now())):

    final_dir_with_tickers = find_file(DIRECTORY_WITH_TICKERS)
    final_historical = find_file(historical_path)

    current_df = pd.read_csv(final_dir_with_tickers + types[0] + ".csv")
    indices_small_floats = current_df.index[
        current_df["Float"] < MAX_FLOAT].tolist()

    ticker_missing = []
    for index in indices_small_floats:
        ticker_missing.append(current_df["Ticker"][index])

    list_of_empty = []
    for index, ticker in enumerate(ticker_missing):
        file_name = final_historical + "/" + ticker + "_" + start + "_" + str(
            end) + ".csv"
        if os.path.exists(file_name):
            print("skip " + ticker)
            continue

        url = "https://fmpcloud.io/api/v3/historical-price-full/" + ticker + "?"

        params = {
            "from": start,
            "to": end,
            "datatype": "csv",
            "apikey": fmp_key
        }

        content_url = url + urllib.parse.urlencode(params)
        downloaded_csv = pd.read_csv(content_url, encoding="iso-8859-1")
        if len(downloaded_csv) < 2:
            print(ticker)
            list_of_empty.append(ticker)
        downloaded_csv.to_csv(file_name)
        if index % 100 == 0:
            print(index)

    with open("Empty historical " + str(start) + "_" + str(end),
              "a+") as writer:
        writer.write(list_of_empty)
def Harvest_ALL_ticker_names():
    final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN)
    
    if os.path.isdir(final_path_to_save) == "":
        print("Directory " + DIRECTORY_TO_SAVE_IN + " does not exit")
        print("Please create one manually")
        return

    Harvest_all_stocks()
    for stock_type in ACTION_TYPE:
        Harvest_selected_stocks(stock_type)
def change_date_format():
    final_path = find_file(DIRECTORY_TO_SAVE_IN)

    for action_path in ACTION_TYPE:
        path = final_path + action_path + ".csv"
        df = pd.read_csv(path)

        for index in range(len(df["Date"])):
            old_time_version = df["Date"][index]
            new_time_version = datetime.datetime.strptime(
                old_time_version, "%b %d, %Y").strftime("%Y-%m-%d")
            df["Date"][index] = new_time_version

        df.to_csv(path, index=False)
Ejemplo n.º 9
0
def get_leaders_for_one_day(date, symbols, metric):

    final_path_to_write = find_file(PATH_TO_WRITE_TO_LEADERS)

    url = "https://fmpcloud.io/api/v3/historical-price-full/"

    params = {
        "from": date,
        "to": date,
        "apikey": fmp_key,
    }

    sorted_list = []

    track_empty_results = []
    for index, ticker in enumerate(symbols):
        # url = url + ticker + "?"
        content = requests.get(url + ticker + "?", params=params)
        data = content.json()
        # empty results
        if len(data) == 0:
            track_empty_results.append(ticker)
            continue
        try:
            attribute_compared = data["historical"][0][metric]
        except:
            print(data)

        # populate heap while it is not 10
        # i want the max heap, and theres no built in solution, so i invert the numbers to negative
        if len(sorted_list) < TOP_ELEMENTS:
            sorted_list.append([attribute_compared, ticker])
            sorted_list = sorted(sorted_list, key=lambda x: x[0])
        # if new element has higher volume and heap is full, switch it
        elif sorted_list[0][0] < attribute_compared:
            sorted_list[0] = [attribute_compared, ticker]
            sorted_list = sorted(sorted_list, key=lambda x: x[0])

        print("Ticker in a day: " + str(index) + " " + str(attribute_compared))

    sorted_list = [str(i[1]) for i in sorted_list]
    # insert date as index at first place
    sorted_list.insert(0, date)
    one_line = ","
    one_line = one_line.join(sorted_list)

    FILE_NAME = "Top " + TOP_ELEMENTS + " volume leaders.csv"
    with open(final_path_to_write + FILE_NAME, "a") as writer:
        writer.write(one_line + "\n")
def Get_ticker_dir_map():
    final_path_to_save = find_file(DIRECTORY_TO_SAVE_IN)

    # add directory path to their string
    current_path = os.getcwd() + "/" + final_path_to_save

    files = os.listdir(current_path)
    csv_files = [single_csv for single_csv in files if single_csv.endswith(".csv")]

    hash_map = {}
    for csv_file in csv_files:
        formatted_name = csv_file.replace(".csv", "")
        formatted_name = formatted_name.capitalize()
        hash_map[formatted_name] = current_path + csv_file

    return hash_map
Ejemplo n.º 11
0
def get_leaders_for_period(metric, start, end=datetime.date(datetime.now())):

    final_dir_with_tickers = find_file(DIRECTORY_WITH_TICKERS)

    current_df = pd.read_csv(final_dir_with_tickers + types[0] + ".csv")
    # get all tickers that were not delisted at before this date
    delisted_df = pd.read_csv(final_dir_with_tickers + types[1] + ".csv")
    # get all tickers that were not delisted at before this date
    changes_df = pd.read_csv(final_dir_with_tickers + types[2] + ".csv")

    business_days = get_working_days(start, end)

    # from earliest to latest
    for index, date in enumerate(business_days):
        todays_ticker_series = get_tickers_for_the_day(current_df, delisted_df,
                                                       changes_df, date)
        get_leaders_for_one_day(date, todays_ticker_series, metric)
        print("day " + str(index))
def find_peak_time(intra_file, stock_criteria, time_criteria, chart_title):

    if not os.path.exists(intra_file):
        print(intra_file)
        print("File does not exist.")
        return

    all_intra_df = pd.read_csv(intra_file)
    median_eod = all_intra_df["EOD"].median()
    print(median_eod)
    crit_name = stock_criteria[0]
    crit_val = stock_criteria[1]
    run_w_vol_df = all_intra_df[(all_intra_df[crit_name] == crit_val)
                                & (all_intra_df["EOD"] >= median_eod)]

    run_w_vol_df[time_criteria] = [
        get_time_of_day(tmstp) for tmstp in run_w_vol_df[time_criteria]
    ]

    x_y_dict = {}
    # graph from 9:30 to 4pm as x axis
    for time in run_w_vol_df[time_criteria]:
        x_y_dict[str(time)] = x_y_dict.get(str(time), 1) + 1

    sorted_x_y_dict = collections.OrderedDict(sorted(x_y_dict.items()))

    y_vals = list(sorted_x_y_dict.values())
    x_vals = list(sorted_x_y_dict.keys())

    fig, ax = plt.subplots()
    ax.bar(x_vals, y_vals)

    fig.autofmt_xdate()
    ax.fmt_xdata = mdates.DateFormatter("%H:%M")
    plt.xlabel("Time between open and close")
    plt.ylabel("Number of occurences")

    plt.title(chart_title)
    plt.show()
    graph_path = find_file("Graphs")
    fig.savefig(graph_path + chart_title + ".png")

    pass
def get_float_for_current():
    final_path = find_file(DIRECTORY_TO_SAVE_IN)
    current_df = pd.read_csv(final_path + "current.csv")
    # create column if necessary
    if "Float" not in current_df:
        current_df["Float"] = 0

    for ticker_index in range(0, len(current_df), PER_REQUEST):
        td_fund = "https://api.tdameritrade.com/v1/instruments"

        ticker_window = current_df["Ticker"][ticker_index:ticker_index +
                                             PER_REQUEST]
        ticker_window = list(ticker_window)

        params = {
            "apikey": tda_key,
            "symbol": ticker_window,
            "projection": "fundamental",
        }

        content = requests.get(td_fund, params=params)
        data = content.json()

        # create ticker float dict from the response
        ticker_float_dict = {}
        for ticker in data:
            ticker_float_dict[ticker] = data[ticker]['fundamental'][
                'marketCapFloat']

        # map ticker float to its ticker name based on the key, incrementally
        current_df["Float"][ticker_index:ticker_index +
                            PER_REQUEST] = current_df[
                                "Ticker"][ticker_index:ticker_index +
                                          PER_REQUEST].map(ticker_float_dict)

    # manual verification
    print(current_df.tail(30))

    current_df.to_csv(final_path + "current.csv", index=False)
Ejemplo n.º 14
0
from Data_Access.Find_file_path import find_file
from Intraday_analysis_scripts import vf_linear_reg, runners_peak_time
from Intraday_fetch_scripts import get_intraday_data

# write here programs to write
# csv files
intra_all_csv = find_file("Intra_All.csv")
intra_vol_csv = find_file("Intra_Volume.csv")
intra_hl_csv = find_file("Intra_Highs_Lows.csv")
new_vol_intra_csv = find_file("Intra_Vol_Float.csv")
vol_ratio_csv = find_file("Intra_vol_ratio_direc.csv")

# directories
intraday_vol_path_csv = find_file("Intraday_formated")
vf_w_float_path = find_file("VF_with_float")
vf_pure_path = find_file("VF_with_volume_only")
daily_period_path = find_file("Daily_period")

# stock_criteria = ["Direction", 1]
# time_criteria = "Time of high"
# chart_title = time_criteria + " of day for low float runners"
# runners_peak_time.find_peak_time(intra_all_csv, stock_criteria, time_criteria, chart_title)

# vf_linear_reg.get_vf_vs_real_every_period(intra_vol_csv, intra_hl_csv, vol_ratio_csv, vf_pure_path)
categories = ["1 min", "5 min", "15 min", "30 min", "60 min", "EOD"]
vf_linear_reg.vf_real_ratio_describe(vol_ratio_csv, categories)

# get_intraday_data.add_cols_to_formatted_dataset(daily_period_path, vol_ratio_csv, ['open', 'vwap', 'close'])
import os
import pandas as pd
import datetime
import urllib
import requests
import time

import sys
sys.path.append(
    os.getcwd())  # current directory must be root project directory
from Intraday_fetch_scripts.get_daily_vol_leaders import get_working_days
from Data_Access.Find_file_path import find_file
from api_keys import tda_key

historical_intraday_dir = "Intraday_formated"
historical_intraday_dir = find_file(historical_intraday_dir)

raw_folder_to_read = "Intraday_raw"
raw_folder_to_read = find_file(raw_folder_to_read)

intraday_path_formatted = historical_intraday_dir + "Intra_"
'''
Reading data from collected intraday csv files from intraday_raw folder

the keys to writing these functions are:
1) I am capable of doing it the same thing again next time with other data

this function creates a new csv OR adds new rows of the same columns to the file
if I want to merge to csv I can use another function for that

now the problem is that if i modify the script that already have, i will just add