Exemple #1
0
def test_load():
    """Test simfin.bulk.load()"""
    for dataset, variant, market in iter_all_datasets():
        sf.load(dataset=dataset,
                variant=variant,
                market=market,
                refresh_days=refresh_days)
Exemple #2
0
def load_all_datasets(*args, **kwargs):
    """
    Load all datasets and variants. Create and return a nested
    dict for fast lookup given dataset, variant and market names.

    Accepts the same args as the `sf.load()` function, except for
    dataset, variant and market. For example, `refresh_days` can be set
    to 0 to ensure all datasets are downloaded again, which is
    useful for testing purposes.

    :return:
        Nested dict `dfs` with all datasets, variants and markets.

        Example: dfs['income']['annual']['us'] is the dataset for
                 annual Income Statements for the US market.
    """

    # Update the list of available markets from the SimFin server.
    _load_market_list()

    # Initialize a dict that can be nested to any depth.
    dfs = defaultdict(lambda: defaultdict(dict))

    # For all possible datasets, variants and markets.
    for dataset, variant, market in iter_all_datasets():
        try:
            # Load the dataset and variant as a Pandas DataFrame.
            df = sf.load(dataset=dataset,
                         variant=variant,
                         market=market,
                         *args,
                         **kwargs)

            # Add the Pandas DataFrame to the nested dict.
            dfs[dataset][variant][market] = df
        except Exception as e:
            # Exceptions can occur e.g. if the API key is invalid, or if there
            # is another server error, or if there is no internet connection.

            # Print the exception and continue.
            print(e, file=sys.stderr)

            # Set the Pandas DataFrame to None in the nested dict,
            # to indicate that it could not be loaded.
            dfs[dataset][variant][market] = None

    # Return the nested dict. It is a bit tricky to convert the
    # defaultdict to a normal dict, and it is not really needed,
    # so just return the defaultdict as it is.
    return dfs
Exemple #3
0
# command line statement to start local server
# python -m SimpleHTTPServer 8857

# import data
import pandas as pd
import simfin as sf

# set up api connection to simfin
sf.set_data_dir('~/simfin_data/')
sf.set_api_key(api_key='free')

# download share prices, cash flow statements, and company info. Select columns of interest
shares = sf.load(dataset='shareprices', variant='daily', market='us')
shares = shares.set_index(['Ticker', 'Date'])
shares = shares[['Close', 'Shares Outstanding', 'Volume', 'Dividend']]

cash = sf.load(dataset='cashflow', variant='annual', market='us')
cash = cash.set_index(['Ticker', 'Fiscal Year'])
cash = cash[[
    'Shares (Basic)', 'Net Income/Starting Line',
    'Net Cash from Operating Activities', 'Net Change in Cash'
]]

companies = sf.load(dataset='companies', market='us', index='Ticker')
industries = sf.load(dataset='industries')

# perform basic transformations
# 16 polarizing stock tickers selected for mock portfolio
bchips = [
    'GOOG', 'DIS', 'AAPL', 'BSX', 'CAT', 'CBS', 'FOX', 'LYFT', 'MDT', 'MDB',
    'MS', 'NEO', 'NVDA', 'PFG', 'STX', 'TDOC', 'TSLA', 'V'
Exemple #4
0
year_start = 2012
year_end = 2020
request_url = 'https://simfin.com/api/v2/companies/statements'

# variable to store the names of the columns
columns = []
# variable to store our data
output = []

#%%

sf.set_data_dir('./data2/')
sf.set_api_key(api_key=API_KEY)
data = sf.load(dataset='income',
               variant='annual',
               market='us',
               index=[TICKER, REPORT_DATE],
               refresh_days=0)

#%%

# if you don't have a SimFin+ subscription, you can only request data for single companies and one period at a time (with SimFin+, you can request multiple tickers and periods at once)
# for ticker in tickers:
#     # loop through years:
#     for year in range(year_start, year_end + 1):
#         # loop through periods
#         for period in periods:

#             # define the parameters for the query
#             parameters = {"statement": "pl", "ticker": ticker, "period": period, "fyear": year, "api-key": API_KEY}
#             # make the request
Exemple #5
0
# Die möglichen Datensätze
datasets = ['income', 'balance', 'cashflow']

df_markets = sf.load_markets()
# Eine Liste aller verfügbaren Märkte
market_list = df_markets.index.values.tolist()

df_list = list()
# Alle Datensätze durchgehen
for ds in datasets:
    frames = list()
    # Alle Märkte
    for mkt in market_list:
        # Lade den Datensatz für den aktuellen Markt
        frames.append(sf.load(dataset=ds, variant='annual',
                              market=mkt, index=[SIMFIN_ID, REPORT_DATE],
                              parse_dates=[REPORT_DATE, PUBLISH_DATE]))
    df_list.append(pd.concat(frames))

companies_list = list()
for mkt in market_list:
    # Lade alle Firmen
    companies_list.append(sf.load_companies(index=SIMFIN_ID, market=mkt))

df_companies = pd.concat(companies_list)
df_companies[SIMFIN_ID] = df_companies.index

# Lade alle Branchen
df_industries = sf.load_industries()

# Füge die einzelnen Teilstücke rechts aneinander
Exemple #6
0
    def get_data_of_all_assets(self, source, verbose=True):

        if source == 'web':

            # verify the website has the same number of assets as local (or local has 0)
            if verbose:
                print(
                    '\nVerifying the website has the same number of assets as local (or local has 0) ...'
                )
            assets_on_web = self.get_all_asset_names_and_locations('web')
            assets_on_local = self.get_all_asset_names_and_locations('local')
            if len(set(assets_on_local)) == 0:
                pass  # this IF block is used to check if local has 0 before checking if their equal
            elif len(set(assets_on_web)) != len(set(assets_on_local)):
                print(
                    'Number of Assets on Web does not equal number of assets on local. Aborting data gathering.'
                )
                print('%d assets on the web, %d asset on local' %
                      (len(set(assets_on_web)), len(set(assets_on_local))))
                print('Asset(s) on web but not on local:')
                for aow in assets_on_web.keys():
                    if aow not in assets_on_local.keys():
                        print(aow)
                print('Asset(s) on local but not on web')
                for ail in assets_on_local.keys():
                    if ail not in assets_on_web.keys():
                        print(ail)
                sys.exit()
            if verbose:
                print('Verification complete.\n')

            # get the data for each asset from online
            if verbose:
                print('\nDownloading the data for each asset from the web ...')
                start_time = datetime.now()
            income_df = sf.load('income', variant='quarterly', market='us')
            balance_df = sf.load('balance', variant='quarterly', market='us')
            cashflow_df = sf.load('cashflow', variant='quarterly', market='us')
            shareprices_df = sf.load('shareprices',
                                     variant='daily',
                                     market='us')
            if verbose:
                end_time = datetime.now()
                print('Downloads complete. Duration: %.1f minutes\n' %
                      ((end_time - start_time).total_seconds() / 60.0))

            # format the data
            if verbose:
                print('\nFormatting the data for each asset ...')
                start_time = datetime.now()
            n = len(assets_on_web.keys())
            bp = BlockPrinter()
            assets = {}
            for i, (ticker, url) in enumerate(assets_on_web.items()):
                filepath = assets_on_local[ticker] if assets_on_local != {} else \
                 os.path.join(FMTD_DATA_PATH, ticker+'.csv')
                assets[ticker] = self.get_data_of_1_asset(
                    ticker,
                    income_df[income_df['Ticker'] == ticker],
                    balance_df[balance_df['Ticker'] == ticker],
                    cashflow_df[cashflow_df['Ticker'] == ticker],
                    shareprices_df[shareprices_df['Ticker'] == ticker],
                    save=True,
                    filepath=filepath,
                    append=True)
                if verbose:
                    bp.print(
                        'Ticker %s:\tasset %d out of %d, %.1f %% complete.' %
                        (ticker, (i + 1), n, 100 * (i + 1) / n))
            if verbose:
                end_time = datetime.now()
                print('Formatting complete. Duration: %.1f minutes\n' %
                      ((end_time - start_time).total_seconds() / 60.0))
            sys.exit()

        elif source == 'local':

            if verbose:
                print(
                    '\nGetting the data for each asset from the local file system ...'
                )
            assets_on_local = self.get_all_asset_names_and_locations('local')
            assets = {}
            for ticker, filepath in assets_on_local.items():
                assets[ticker] = pd.read_csv(filepath, index_col=[0])
            if verbose:
                print('Data aquired.\n')

        else:
            print(
                'Invalid Argument: input into get_all_asset_names_and_location'
            )
            print('Valid values: [\"web\", \"local\"]')
            print('Argument: \"source\" = %s' % source)
            sys.exit()

        return assets
Exemple #7
0
# pip install simfin
# https://simfin.com/

import pandas as pd
import simfin as sf
import matplotlib.pyplot as plt
from simfin.names import TICKER, REPORT_DATE, PUBLISH_DATE, REVENUE

sf.set_api_key(api_key='free')
sf.set_data_dir('~/simfin_data/')

df1 = sf.load(dataset='income',
              variant='annual',
              market='us',
              index=[TICKER, REPORT_DATE],
              parse_dates=[REPORT_DATE, PUBLISH_DATE])

msft = df1.loc['MSFT']
print(msft.head())
print(";".join((*msft, )))

msft[REVENUE].plot(grid=True)
plt.show(block=True)
Exemple #8
0
import dash
import dash_html_components as html
import dash_core_components as dcc
import pandas as pd
import simfin as sf
from simfin.names import *
import dash_table
from dash.dependencies import Output, Input, State

tabtitle = 'Financial Statements'
sf.set_data_dir('~/simfin_data/')
api_key = "ZxGEGRnaTpxMF0pbGQ3JLThgqY2HBL17"

df_income = sf.load(dataset='income',
                    variant='annual',
                    market='us',
                    index=[TICKER])
df_income = df_income.drop([
    'Currency', 'SimFinId', 'Fiscal Period', 'Publish Date', 'Shares (Basic)',
    'Abnormal Gains (Losses)', 'Abnormal Gains (Losses)',
    'Net Extraordinary Gains (Losses)',
    'Income (Loss) from Continuing Operations', 'Net Income (Common)',
    'Pretax Income (Loss), Adj.', 'Report Date'
],
                           axis=1)
df_income = df_income.fillna(0)

df_income[[
    'Shares (Diluted)', 'Revenue', 'Cost of Revenue', 'Gross Profit',
    'Operating Expenses', 'Selling, General & Administrative',
    'Research & Development', 'Operating Income (Loss)',