def test_load(): """Test simfin.bulk.load()""" for dataset, variant, market in iter_all_datasets(): sf.load(dataset=dataset, variant=variant, market=market, refresh_days=refresh_days)
def load_all_datasets(*args, **kwargs): """ Load all datasets and variants. Create and return a nested dict for fast lookup given dataset, variant and market names. Accepts the same args as the `sf.load()` function, except for dataset, variant and market. For example, `refresh_days` can be set to 0 to ensure all datasets are downloaded again, which is useful for testing purposes. :return: Nested dict `dfs` with all datasets, variants and markets. Example: dfs['income']['annual']['us'] is the dataset for annual Income Statements for the US market. """ # Update the list of available markets from the SimFin server. _load_market_list() # Initialize a dict that can be nested to any depth. dfs = defaultdict(lambda: defaultdict(dict)) # For all possible datasets, variants and markets. for dataset, variant, market in iter_all_datasets(): try: # Load the dataset and variant as a Pandas DataFrame. df = sf.load(dataset=dataset, variant=variant, market=market, *args, **kwargs) # Add the Pandas DataFrame to the nested dict. dfs[dataset][variant][market] = df except Exception as e: # Exceptions can occur e.g. if the API key is invalid, or if there # is another server error, or if there is no internet connection. # Print the exception and continue. print(e, file=sys.stderr) # Set the Pandas DataFrame to None in the nested dict, # to indicate that it could not be loaded. dfs[dataset][variant][market] = None # Return the nested dict. It is a bit tricky to convert the # defaultdict to a normal dict, and it is not really needed, # so just return the defaultdict as it is. return dfs
# command line statement to start local server # python -m SimpleHTTPServer 8857 # import data import pandas as pd import simfin as sf # set up api connection to simfin sf.set_data_dir('~/simfin_data/') sf.set_api_key(api_key='free') # download share prices, cash flow statements, and company info. Select columns of interest shares = sf.load(dataset='shareprices', variant='daily', market='us') shares = shares.set_index(['Ticker', 'Date']) shares = shares[['Close', 'Shares Outstanding', 'Volume', 'Dividend']] cash = sf.load(dataset='cashflow', variant='annual', market='us') cash = cash.set_index(['Ticker', 'Fiscal Year']) cash = cash[[ 'Shares (Basic)', 'Net Income/Starting Line', 'Net Cash from Operating Activities', 'Net Change in Cash' ]] companies = sf.load(dataset='companies', market='us', index='Ticker') industries = sf.load(dataset='industries') # perform basic transformations # 16 polarizing stock tickers selected for mock portfolio bchips = [ 'GOOG', 'DIS', 'AAPL', 'BSX', 'CAT', 'CBS', 'FOX', 'LYFT', 'MDT', 'MDB', 'MS', 'NEO', 'NVDA', 'PFG', 'STX', 'TDOC', 'TSLA', 'V'
year_start = 2012 year_end = 2020 request_url = 'https://simfin.com/api/v2/companies/statements' # variable to store the names of the columns columns = [] # variable to store our data output = [] #%% sf.set_data_dir('./data2/') sf.set_api_key(api_key=API_KEY) data = sf.load(dataset='income', variant='annual', market='us', index=[TICKER, REPORT_DATE], refresh_days=0) #%% # if you don't have a SimFin+ subscription, you can only request data for single companies and one period at a time (with SimFin+, you can request multiple tickers and periods at once) # for ticker in tickers: # # loop through years: # for year in range(year_start, year_end + 1): # # loop through periods # for period in periods: # # define the parameters for the query # parameters = {"statement": "pl", "ticker": ticker, "period": period, "fyear": year, "api-key": API_KEY} # # make the request
# Die möglichen Datensätze datasets = ['income', 'balance', 'cashflow'] df_markets = sf.load_markets() # Eine Liste aller verfügbaren Märkte market_list = df_markets.index.values.tolist() df_list = list() # Alle Datensätze durchgehen for ds in datasets: frames = list() # Alle Märkte for mkt in market_list: # Lade den Datensatz für den aktuellen Markt frames.append(sf.load(dataset=ds, variant='annual', market=mkt, index=[SIMFIN_ID, REPORT_DATE], parse_dates=[REPORT_DATE, PUBLISH_DATE])) df_list.append(pd.concat(frames)) companies_list = list() for mkt in market_list: # Lade alle Firmen companies_list.append(sf.load_companies(index=SIMFIN_ID, market=mkt)) df_companies = pd.concat(companies_list) df_companies[SIMFIN_ID] = df_companies.index # Lade alle Branchen df_industries = sf.load_industries() # Füge die einzelnen Teilstücke rechts aneinander
def get_data_of_all_assets(self, source, verbose=True): if source == 'web': # verify the website has the same number of assets as local (or local has 0) if verbose: print( '\nVerifying the website has the same number of assets as local (or local has 0) ...' ) assets_on_web = self.get_all_asset_names_and_locations('web') assets_on_local = self.get_all_asset_names_and_locations('local') if len(set(assets_on_local)) == 0: pass # this IF block is used to check if local has 0 before checking if their equal elif len(set(assets_on_web)) != len(set(assets_on_local)): print( 'Number of Assets on Web does not equal number of assets on local. Aborting data gathering.' ) print('%d assets on the web, %d asset on local' % (len(set(assets_on_web)), len(set(assets_on_local)))) print('Asset(s) on web but not on local:') for aow in assets_on_web.keys(): if aow not in assets_on_local.keys(): print(aow) print('Asset(s) on local but not on web') for ail in assets_on_local.keys(): if ail not in assets_on_web.keys(): print(ail) sys.exit() if verbose: print('Verification complete.\n') # get the data for each asset from online if verbose: print('\nDownloading the data for each asset from the web ...') start_time = datetime.now() income_df = sf.load('income', variant='quarterly', market='us') balance_df = sf.load('balance', variant='quarterly', market='us') cashflow_df = sf.load('cashflow', variant='quarterly', market='us') shareprices_df = sf.load('shareprices', variant='daily', market='us') if verbose: end_time = datetime.now() print('Downloads complete. Duration: %.1f minutes\n' % ((end_time - start_time).total_seconds() / 60.0)) # format the data if verbose: print('\nFormatting the data for each asset ...') start_time = datetime.now() n = len(assets_on_web.keys()) bp = BlockPrinter() assets = {} for i, (ticker, url) in enumerate(assets_on_web.items()): filepath = assets_on_local[ticker] if assets_on_local != {} else \ os.path.join(FMTD_DATA_PATH, ticker+'.csv') assets[ticker] = self.get_data_of_1_asset( ticker, income_df[income_df['Ticker'] == ticker], balance_df[balance_df['Ticker'] == ticker], cashflow_df[cashflow_df['Ticker'] == ticker], shareprices_df[shareprices_df['Ticker'] == ticker], save=True, filepath=filepath, append=True) if verbose: bp.print( 'Ticker %s:\tasset %d out of %d, %.1f %% complete.' % (ticker, (i + 1), n, 100 * (i + 1) / n)) if verbose: end_time = datetime.now() print('Formatting complete. Duration: %.1f minutes\n' % ((end_time - start_time).total_seconds() / 60.0)) sys.exit() elif source == 'local': if verbose: print( '\nGetting the data for each asset from the local file system ...' ) assets_on_local = self.get_all_asset_names_and_locations('local') assets = {} for ticker, filepath in assets_on_local.items(): assets[ticker] = pd.read_csv(filepath, index_col=[0]) if verbose: print('Data aquired.\n') else: print( 'Invalid Argument: input into get_all_asset_names_and_location' ) print('Valid values: [\"web\", \"local\"]') print('Argument: \"source\" = %s' % source) sys.exit() return assets
# pip install simfin # https://simfin.com/ import pandas as pd import simfin as sf import matplotlib.pyplot as plt from simfin.names import TICKER, REPORT_DATE, PUBLISH_DATE, REVENUE sf.set_api_key(api_key='free') sf.set_data_dir('~/simfin_data/') df1 = sf.load(dataset='income', variant='annual', market='us', index=[TICKER, REPORT_DATE], parse_dates=[REPORT_DATE, PUBLISH_DATE]) msft = df1.loc['MSFT'] print(msft.head()) print(";".join((*msft, ))) msft[REVENUE].plot(grid=True) plt.show(block=True)
import dash import dash_html_components as html import dash_core_components as dcc import pandas as pd import simfin as sf from simfin.names import * import dash_table from dash.dependencies import Output, Input, State tabtitle = 'Financial Statements' sf.set_data_dir('~/simfin_data/') api_key = "ZxGEGRnaTpxMF0pbGQ3JLThgqY2HBL17" df_income = sf.load(dataset='income', variant='annual', market='us', index=[TICKER]) df_income = df_income.drop([ 'Currency', 'SimFinId', 'Fiscal Period', 'Publish Date', 'Shares (Basic)', 'Abnormal Gains (Losses)', 'Abnormal Gains (Losses)', 'Net Extraordinary Gains (Losses)', 'Income (Loss) from Continuing Operations', 'Net Income (Common)', 'Pretax Income (Loss), Adj.', 'Report Date' ], axis=1) df_income = df_income.fillna(0) df_income[[ 'Shares (Diluted)', 'Revenue', 'Cost of Revenue', 'Gross Profit', 'Operating Expenses', 'Selling, General & Administrative', 'Research & Development', 'Operating Income (Loss)',