Beispiel #1
0
def getData():
    sf.set_api_key('free')

    # Set the local directory where data-files are stored.
    # The dir will be created if it does not already exist.
    sf.set_data_dir('~/simfin_data/')

    # Load daily share-prices for all companies in USA.
    # The data is automatically downloaded if you don't have it already.
    df_prices = sf.load_shareprices(market='us', variant='daily')

    # Plot the closing share-prices for ticker MSFT.
    msft_close_values_TEMP = df_prices.loc['MSFT']

    print(msft_close_values_TEMP.columns)

    list_of_stocks = ['AAPL', 'MSFT', 'CLDR', 'CRM', 'TSLA', 'NVDA', 'DAL']

    big_df = pd.DataFrame()
    for stock in list_of_stocks:
        temp_df = df_prices.loc[stock].tail(100)
        temp_df = temp_df[["SimFinId", "Close", "Open", "High", "Low"]]
        big_df = pd.concat([big_df, temp_df])

    ultimate_df = big_df

    return ultimate_df
Beispiel #2
0
def main():
    log.info("Started...")
    
    if path.exists( '/home/scanlom/simfin_data/' ):
        rmtree( '/home/scanlom/simfin_data/' )
    
    # Set your API-key for downloading data.
    # If the API-key is 'free' then you will get the free data,
    # otherwise you will get the data you have paid for.
    # See www.simfin.com for what data is free and how to buy more.
    set_api_key('free')
    
    # Set the local directory where data-files are stored.
    # The dir will be created if it does not already exist.
    set_data_dir('~/simfin_data/')

    rpt = report()
    rpt.add_string( simfin_load("income", load_income, simfin_income_by_ticker, delete_simfin_income_by_id, post_simfin_income) )
    rpt.add_string( simfin_load("balance", load_balance, simfin_balance_by_ticker, delete_simfin_balance_by_id, post_simfin_balance) )
    rpt.add_string( simfin_load("cashflow", load_cashflow, simfin_cashflow_by_ticker, delete_simfin_cashflow_by_id, post_simfin_cashflow) )
    subject = 'Blue Lion - Simfin Load - Financials'
    send_mail_html_self(subject, rpt.get_html())

    rpt2 = report()
    rpt2.add_string( simfin_load_ref_data() )
    rpt2.add_string( simfin_load_market_data() )
    rpt2.add_string( simfin_load_market_data_historical() )
    subject = 'Blue Lion - Simfin Load - Market Data'
    send_mail_html_self(subject, rpt2.get_html())

    log.info("Completed")
Beispiel #3
0
    def __init__(self, which_API: str = 'yahoo', token: str = None) -> None:
        """Initalizes a new instance of the DataAPI and connects to the API platform specified

        Args:
            which_API (str, optional): currently three options
                                        - 'IEX': https://iexcloud.io/
                                        - 'SimFin': https://simfin.com/
                                        - 'yahoo finance'
            token (str, optional): [token for API]. Defaults to None.
        """

        # Set the attributes depending on different API platform
        if which_API == "IEX":
            self.API = which_API
            self.token = token

        elif which_API == "SimFin":
            self.API = which_API

            if token is None:
                #// If you are a SimFin+ user, then save your key into the simfin_api_key file. If not, API key will be set to free.
                self.token = simfin.load_api_key(path='~/simfin_api_key.txt',
                                                 default_key='free')
            else:
                self.token = token
                simfin.set_api_key(api_key=self.token)

            simfin.set_data_dir('~/simfin_data/')

        elif which_API == "yahoo":
            self.API = which_API

        self.base_url = self._base_url()
    def __init__(self, API: str = "yahoo", token: str = None) -> None:
        """Initialize an instance of Robo_Data.

        Args:
            API (str, optional): [Input API name]. Defaults to "simfin".
                            Two options:
                                - simfin API: simfin
                                - yahoo finance API: yahoo
            token (str, optional): [Input token. When using free service, no need to add token]. Defaults to None.
        """

        if API == "yahoo":
            self.API = API
            self.token = token
        elif API == "simfin":

            if token is None:
                self.token = None

            else:
                self.token = token

            #// required settings before using sf API
            sf.set_api_key(api_key=self.token)
            sf.set_data_dir('~/simfin_data/')
        else:
            self.token = token
Beispiel #5
0
def ingest():
    sf.set_api_key(os.getenv('API_KEY_SIMFIN'))

    # Set the local directory where data-files are stored.
    # The directory will be created if it does not already exist.
    sf.set_data_dir('~/simfin_data/')

    df = sf.load_shareprices(variant='daily', market='us')

    for y in range(2010, 2021):
        df_y = df.xs(slice('{y}-01-01'.format(y=y), '{y}-12-31'.format(y=y)), level='Date', drop_level=False)
        df_y.to_csv('data/daily_simfin_{y}.csv'.format(y=y))
Beispiel #6
0
    def __init__(self):

        # Set your API-key for downloading data.
        # If the API-key is 'free' then you will get the free data,
        # otherwise you will get the data you have paid for.
        # See www.simfin.com for what data is free and how to buy more.
        sf.set_api_key('free')

        # Set the local directory where data-files are stored.
        # The dir will be created if it does not already exist.
        sf.set_data_dir(RAW_DATA_PATH)

        self.report_name = 'SimFin Data Coverage Report'
    def __init__(self, dir: str = os.getcwd()):
        self._dir = dir

        dotenv_path = os.path.join('.env')
        load_dotenv(dotenv_path)

        sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX')
        sf.set_data_dir(os.path.join(self._dir, 'simfin'))

        self._industries = sf.load_industries()
        self._prices = sf.load_shareprices(refresh_days=0)
        self._balance = sf.load_balance(variant="quarterly")
        self._income = sf.load_income(variant="quarterly")
        self._companies = sf.load_companies()
Beispiel #8
0
    def __init__(self):
        # Set your API-key for downloading data. This key gets the free data.
        simfin.set_api_key('free')

        # Set the local directory where data-files are stored.
        # The directory will be created if it does not already exist.
        simfin.set_data_dir('~/simfin_data/')

        # Download the data from the SimFin server and load into a Pandas DataFrame.
        df = simfin.load_companies(market='us')

        # Print the first rows of the data.
        print(df.head())


        simfin.load_companies(market='us')
Beispiel #9
0
def load_financial_data(path: str):

    sf.set_api_key('free')

    sf.set_data_dir(path)

    # Load the full list of companies in the selected market (United States).
    df_companies = sf.load_companies(market='us')

    # Load all the industries that are available.
    df_industries = sf.load_industries()

    # Load the quarterly Income Statements for all companies in the selected market.
    df_income = sf.load_income(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_balance = sf.load_balance(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_cashflow = sf.load_cashflow(variant='quarterly', market='us')

    return df_companies, df_industries, df_income, df_balance, df_cashflow
Beispiel #10
0
def load_shareprices(refresh_days=1,
                     simfin_api_key='free',
                     simfin_directory='simfin_data/'):

    # Set Simfin Settings
    sf.set_api_key(simfin_api_key)
    sf.set_data_dir(simfin_directory)

    # Used by all datasets
    shareprices_df = sf.load_shareprices(variant='daily',
                                         market='us',
                                         refresh_days=refresh_days)

    # Merge Fundamental with Stock Prices
    # Downsample Share Prices to Rolling 30 Day End of Month
    shareprices_df = shareprices_df[['Close']].groupby('Ticker').rolling(
        30, min_periods=1).mean().reset_index(0, drop=True)
    shareprices_df = sf.resample(df=shareprices_df,
                                 rule='M',
                                 method=lambda x: x.last())

    return shareprices_df
        bigquery.SchemaField("symbol", "STRING"),
        bigquery.SchemaField("open", "FLOAT"),
        bigquery.SchemaField("high", "FLOAT"),
        bigquery.SchemaField("low", "FLOAT"),
        bigquery.SchemaField("close", "FLOAT"),
        bigquery.SchemaField("volume", "FLOAT"),
        bigquery.SchemaField("volume_weighted_price", "FLOAT"),
    ]

    full_table_id = get_full_table_id('{t}_{y}'.format(t=TABLE_ID_DAILY,
                                                       y=year))
    table = bigquery.Table(full_table_id, schema=schema)
    table = _bigquery_client.create_table(table)  # Make an API request.


import simfin as sf

sf.set_api_key(os.getenv('API_KEY_SIMFIN'))

# Set the local directory where data-files are stored.
# The directory will be created if it does not already exist.
sf.set_data_dir('~/simfin_data/')

#df = sf.load_shareprices(variant='daily', market='us')

df_l = sf.load_shareprices(variant='latest', market='us')

for index, row in df_l.iterrows():
    print(index, index[0], index[1].date(), row['Close'])
    pass
Beispiel #12
0
tickers = ['GOOG', 'MSFT', 'FB', 'AAPL', 'AMZN', 'DIS', 'NVDA', 'ADBE', 'JNJ']
tickers = ['GOOG', 'MSFT', 'FB']

periods = ["q1", "q2", "q3", "q4"]
year_start = 2012
year_end = 2020
request_url = 'https://simfin.com/api/v2/companies/statements'

# variable to store the names of the columns
columns = []
# variable to store our data
output = []

#%%

sf.set_data_dir('./data2/')
sf.set_api_key(api_key=API_KEY)
data = sf.load(dataset='income',
               variant='annual',
               market='us',
               index=[TICKER, REPORT_DATE],
               refresh_days=0)

#%%

# if you don't have a SimFin+ subscription, you can only request data for single companies and one period at a time (with SimFin+, you can request multiple tickers and periods at once)
# for ticker in tickers:
#     # loop through years:
#     for year in range(year_start, year_end + 1):
#         # loop through periods
#         for period in periods:
Beispiel #13
0
def setup_simfin():
    sf.set_api_key('free')
    sf.set_data_dir(os.path.join(ROOT_DIR, 'simfin_data'))
 def __load_share_data__(self):
     sf.set_data_dir('~/simfin_data/')
     self.df_prices = sf.load_shareprices(variant='daily', market='US')
Beispiel #15
0
from simfin.names import *
import calendar
from datetime import timedelta


def getDate(d):
    if d.weekday() == calendar.SATURDAY:
        return d - timedelta(days=1)
    elif d.weekday() == calendar.SUNDAY:
        return d - timedelta(days=2)
    else:
        return d


# Wohin sollen die temporären Daten geladen werden
sf.set_data_dir('pfad/zu/den/simfin/daten')
# Kostenlose Variante
sf.set_api_key(api_key='free')

# Die möglichen Datensätze
datasets = ['income', 'balance', 'cashflow']

df_markets = sf.load_markets()
# Eine Liste aller verfügbaren Märkte
market_list = df_markets.index.values.tolist()

df_list = list()
# Alle Datensätze durchgehen
for ds in datasets:
    frames = list()
    # Alle Märkte
import simfin as sf
from simfin.names import CLOSE, NET_INCOME, REVENUE,\
                         NET_PROFIT_MARGIN, SALES_GROWTH,\
                         ROA, ROE, TOTAL_EQUITY, TOTAL_ASSETS,\
                         REPORT_DATE, EARNINGS_GROWTH, SHARES_DILUTED,\
                         NET_INCOME_COMMON, FCF, NET_CASH_OPS, CAPEX, \
                         PSALES, PE, PFCF


# Set your API-key for downloading data.
sf.set_api_key('free')

# Set the local directory where data-files are stored.
# The dir will be created if it does not already exist.
sf.set_data_dir('C:/Users/user/Desktop/Systematic Trading/Strategies/MLStrategies/FundamentalDataPred/simfin_data/')

##Features
#Market Cap OK with df_prices
#Enterprise Value OK with df_prices and df_balance
#Trailing P/E OK with df_income and df_prices
#Forward P/E Impossible to have
#PEG Ratio OK with df_income and df_prices
#Price/Sales OK with df_prices and df_income
#Price/Book  OK with df_prices and df_balance
#Enterprise Value/Revenue OK with df_prices and df_balance and df_income
#Enterprise Value/EBITDA OK with df_prices and df_balance and df_income
#Profit Margin OK with df_income
#Operating Margin OK with df_income
#Return on Assets OK with df_income and df_balance
#Return on Equity OK with df_income and df_balance
@author: ArmelFabrice
"""

import simfin as sf
import pandas as pd
from datetime import datetime
import numpy as np
import ta

# Set your API-key for downloading data.
sf.set_api_key('free')

# Set the local directory where data-files are stored.
# The dir will be created if it does not already exist.
sf.set_data_dir('YourSimDataDirPath')

# Data for USA.
market = 'us'

# Daily Share-Prices.
df_prices = sf.load_shareprices(variant='daily', market=market)
        
#Obtain SP500 tickers
import urllib.request
from html_table_parser import HTMLTableParser

url_snp500 = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

def obtain_parse_wiki_stocks_sp500(url):
  """Download and parse the Wikipedia list of S&P500 
Beispiel #18
0
def load_dataset(refresh_days=1,
                 dataset='general',
                 thresh=0.7,
                 simfin_api_key='free',
                 simfin_directory='simfin_data/',
                 data_directory=DATA_DIR,
                 shareprices_df=''):

    # Set Simfin Settings
    sf.set_api_key(simfin_api_key)
    sf.set_data_dir(simfin_directory)

    derived_shareprice_df = sf.load_derived_shareprices(variant='latest',
                                                        market='us')
    derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv')

    company_df = sf.load_companies(market='us', refresh_days=1)
    company_df.to_csv(data_directory / 'company.csv')

    industry_df = sf.load_industries(refresh_days=1)
    industry_df.to_csv(data_directory / 'industry.csv')

    if dataset == 'general':

        # Load Data from Simfin
        income_df = sf.load_income(variant='ttm',
                                   market='us',
                                   refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_quarterly_df = sf.load_income(variant='quarterly',
                                             market='us',
                                             refresh_days=refresh_days)
        income_quarterly_df = income_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'general_income.csv')

        balance_df = sf.load_balance(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_quarterly_df = sf.load_balance(variant='quarterly',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_quarterly_df = balance_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'general_balance.csv')

        cashflow_df = sf.load_cashflow(variant='ttm',
                                       market='us',
                                       refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly',
                                                  market='us',
                                                  refresh_days=refresh_days)
        cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'general_cashflow.csv')

        derived_df = sf.load_derived(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'general_fundamental_derived.csv')

        cache_args = {
            'cache_name': 'financial_signals',
            'cache_refresh': refresh_days
        }

        fin_signal_df = sf.fin_signals(df_income_ttm=income_df,
                                       df_balance_ttm=balance_df,
                                       df_cashflow_ttm=cashflow_df,
                                       **cache_args)

        growth_signal_df = sf.growth_signals(
            df_income_ttm=income_df,
            df_income_qrt=income_quarterly_df,
            df_balance_ttm=balance_df,
            df_balance_qrt=balance_quarterly_df,
            df_cashflow_ttm=cashflow_df,
            df_cashflow_qrt=cashflow_quarterlay_df,
            **cache_args)

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns, growth_signal_df.columns, fin_signal_df.
                columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(fin_signal_df).join(
                growth_signal_df).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'general'

    elif dataset == 'banks':

        # Load Data from Simfin
        income_df = sf.load_income_banks(variant='ttm',
                                         market='us',
                                         refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'banks_income.csv')

        balance_df = sf.load_balance_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'banks_balance.csv')

        cashflow_df = sf.load_cashflow_banks(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'banks_cashflow.csv')

        derived_df = sf.load_derived_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'banks'

    elif dataset == 'insurance':

        # Load Data from Simfin
        income_df = sf.load_income_insurance(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'insurance_income.csv')

        balance_df = sf.load_balance_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'insurance_balance.csv')

        cashflow_df = sf.load_cashflow_insurance(variant='ttm',
                                                 market='us',
                                                 refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'insurance_cashflow.csv')

        derived_df = sf.load_derived_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'insurance_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'insurance'

    # Drop Columns with more then 1-thresh nan values
    fundamental_df = fundamental_df.dropna(thresh=int(thresh *
                                                      len(fundamental_df)),
                                           axis=1)

    # Drop Duplicate Index
    fundamental_df = fundamental_df[~fundamental_df.index.duplicated(
        keep='first')]

    # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public
    fundamental_df['Published Date'] = fundamental_df['Publish Date']
    fundamental_df = fundamental_df.reset_index().set_index(
        ['Ticker', 'Publish Date'])

    df = sf.reindex(df_src=fundamental_df,
                    df_target=shareprices_df,
                    group_index=TICKER,
                    method='ffill').dropna(how='all').join(shareprices_df)

    # General
    # Clean Up
    df = df.drop([
        'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date',
        'Fiscal Period', 'Published Date'
    ],
                 axis=1)

    if dataset == 'general':
        # Remove Share Prices Over Amazon Share Price
        df = df[df['Close'] <= df.loc['AMZN']['Close'].max()]

        df = df.dropna(subset=[
            'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth'
        ])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist(
        ) + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    else:
        df = df.dropna(
            subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue'])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    df = df.replace([np.inf, -np.inf], 0)
    df = df.fillna(0)

    per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)]

    df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0)

    # Add Company and Industry Information and Categorize
    df = df.join(company_df).merge(
        industry_df, left_on='IndustryId', right_index=True).drop(
            columns=['IndustryId', 'Company Name', 'SimFinId'])

    categorical_features = [
        col for col in df.columns if df[col].dtype == 'object'
    ]

    encoder = OrdinalEncoder(cols=categorical_features,
                             handle_unknown='ignore',
                             return_df=True).fit(df)

    df = encoder.transform(df)

    # Sort
    df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1])

    return df
Beispiel #19
0
 def __init__(self, key):
     sf.set_data_dir('~/stock_data/')
     sf.set_api_key(api_key='free')
Beispiel #20
0
#-----------------------------------Set up-----------------------------
#import packages for data analysis
import pandas as pd
import matplotlib as plt
import seaborn as sns
import numpy as np
# Import the main functionality from the SimFin Python API.
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

#Set the local directory where data-files are stored.
sf.set_data_dir(
    'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/'
)

# Set up API key
sf.set_api_key(api_key='free')

#set plotting style
sns.set_style("whitegrid")

#--------------------------Load dataframe-----------------------------
df_prices = sf.load_shareprices(variant='daily', market='us')

#--------------------------Add Columns-----------------------------
#simple way
df2 = pd.DataFrame()
df2[FOO] = df_prices[CLOSE] / df_prices[ADJ_CLOSE]
def get_book(marketvalue, data_source=None):
    """
    Get fundamental data from simfin, Finnhub.io or ychart.
    simfin:
    https://github.com/SimFin/simfin-tutorials
    Finnhub:
    https://finnhub.io/docs/api
    https://github.com/Finnhub-Stock-API/finnhub-python
    ychart:
    https://ycharts.com/dashboard/
    """
    if data_source == 'simfin':
        books = {}
        books_miss = []

        final_tickers = [i for i in marketvalue]

        sf.set_data_dir(os.getcwd())
        balance = sf.load_balance(variant='quarterly', market='us')
        required_columns = [
            'Total Assets', 'Total Liabilities', 'Total Equity'
        ]

        for ticker in final_tickers:
            try:
                ticker_book = balance.loc[ticker][required_columns]
                ticker_book.rename(
                    columns={'Total Equity': '{}_book'.format(ticker)})
                books.update({ticker: ticker_book})
            except KeyError:
                books_miss.append(ticker)
        return books, books_miss

    elif data_source == 'finnhub':
        api = 'bta56t748v6oo3au8vi0'
        finnhub_client = finnhub.Client(api_key=api)
        books = {}
        books_miss = []

        if isinstance(marketvalue, dict):
            final_tickers = [i for i in marketvalue]
        elif isinstance(marketvalue, list):
            final_tickers = marketvalue.copy()

        for ticker in final_tickers:
            data = finnhub_client.financials_reported(symbol=ticker,
                                                      freq='quarterly')['data']
            if len(data) != 0:
                books.update({ticker: data})
            else:
                books_miss.append(ticker)
        return books, books_miss

    elif data_source == 'ychart':
        book = {}
        headers = {
            'User-Agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/85.0.4183.83 Safari/537.36"
        }
        error = {}

        for ticker in marketvalue:
            date_text = []
            asset_text = []
            liabilities_text = []
            equity_text = []
            book_miss = []
            index_error = []
            value_error = []

            try:
                for page in range(1, 3):
                    url = 'https://ycharts.com/financials/{}/balance_sheet/quarterly/'.format(
                        ticker) + str(page)
                    data = requests.get(url, headers=headers)
                    soup = BeautifulSoup(data.text, 'html.parser')
                    if len(soup) != 0:
                        table = soup.findAll('table', {'id': 'report'})[0]
                        # Get date data:
                        date_sub = soup_format(table, 'ychart', 'date')
                        date_text.extend(date_sub)

                        # Get asset data
                        asset_sub = soup_format(table, 'ychart', 'asset')
                        asset_text.extend(asset_sub)

                        # Get liabilities
                        liabilities_sub = soup_format(table, 'ychart',
                                                      'liabilities')
                        liabilities_text.extend(liabilities_sub)

                        # Get equity
                        equity_sub = soup_format(table, 'ychart', 'equity')
                        equity_text.extend(equity_sub)

                    else:
                        book_miss.append(ticker)
                        continue
            except IndexError:
                index_error.append(ticker)

            date_text = pd.Series(date_text)
            asset_text = pd.Series(format_unit(asset_text))
            liabilities_text = pd.Series(format_unit(liabilities_text))
            equity_text = pd.Series(format_unit(equity_text))

            try:
                df = pd.concat(
                    [date_text, asset_text, liabilities_text, equity_text],
                    axis=1)
                df.rename(columns={
                    0: 'date',
                    1: 'Total Assets',
                    2: 'Total Liabilities',
                    3: 'Total Equity'
                },
                          inplace=True)
                df['date'] = df['date'].astype('datetime64[ns]')
                df.set_index('date', inplace=True)
                df.sort_index(axis=0, inplace=True)
                book.update({ticker: df})
            except ValueError:
                value_error.append(ticker)

            error.update({
                'IndexError': index_error,
                'ValueError': value_error
            })

        return book, book_miss, error