def get_all_asset_names_and_locations(self, source, verbose=False): if source == 'web': df = sf.load_companies(market='us') # download data assets_on_web = df.index.tolist() # get assets on web list assets = {ticker: '' for ticker in assets_on_web} elif source == 'local': base_filepath = FMTD_DATA_PATH assets = {} for filename in os.listdir(base_filepath): if os.path.isfile(os.path.join(base_filepath, filename)): ticker_name = '.'.join(filename.split('.')[:-1]) filepath = os.path.join(base_filepath, filename) assets[ticker_name] = filepath else: print( 'Invalid Argument: input into get_all_asset_names_and_location' ) print('Valid values: [\"web\", \"local\"]') print('Argument: \"source\" = %s' % source) sys.exit() if verbose: print(len(assets)) for tn, loc in assets.items(): print('%s\t%s' % (tn, loc)) input() return assets
def __init__(self): # Set your API-key for downloading data. This key gets the free data. simfin.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. simfin.set_data_dir('~/simfin_data/') # Download the data from the SimFin server and load into a Pandas DataFrame. df = simfin.load_companies(market='us') # Print the first rows of the data. print(df.head()) simfin.load_companies(market='us')
def simfin_load_ref_data(): log.info("Called simfin_load_ref_data...") df = load_companies(market='us') json_companies = frame_to_json(df) df = load_industries() json_industries = frame_to_json(df) industries_by_id = { i['industryId'] : i for i in json_industries } num_inserted = 0 num_updated = 0 for c in json_companies: log.info("Processing %s" % (c['ticker'])) sector = "" industry = "" if c['industryId'] in industries_by_id: sector = industries_by_id[c['industryId']]['sector'] industry = industries_by_id[c['industryId']]['industry'] cur = ref_data_by_symbol(c['ticker']) if cur == None: num_inserted += 1 post_ref_data(c['ticker'], c['companyName'], sector, industry) else: num_updated += 1 put_ref_data(cur['id'], cur['symbol'], cur['symbolAlphaVantage'], c['companyName'], sector, industry, cur['active'], cur['focus']) ret = "ref_data: Inserted %d records, Updated %d records" % (num_inserted, num_updated) log.info(ret) return ret
def _bookCheck(input_list): companies = sf.load_companies(market='us') book_confirm = [] for i in input_list: if i in companies.index.values: book_confirm.append(i) return book_confirm
def __init__(self, dir: str = os.getcwd()): self._dir = dir dotenv_path = os.path.join('.env') load_dotenv(dotenv_path) sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX') sf.set_data_dir(os.path.join(self._dir, 'simfin')) self._industries = sf.load_industries() self._prices = sf.load_shareprices(refresh_days=0) self._balance = sf.load_balance(variant="quarterly") self._income = sf.load_income(variant="quarterly") self._companies = sf.load_companies()
def load_financial_data(path: str): sf.set_api_key('free') sf.set_data_dir(path) # Load the full list of companies in the selected market (United States). df_companies = sf.load_companies(market='us') # Load all the industries that are available. df_industries = sf.load_industries() # Load the quarterly Income Statements for all companies in the selected market. df_income = sf.load_income(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_balance = sf.load_balance(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_cashflow = sf.load_cashflow(variant='quarterly', market='us') return df_companies, df_industries, df_income, df_balance, df_cashflow
def test_load_companies(): """Test simfin.bulk.load_companies()""" for dataset, variant, market in iter_all_datasets(datasets='companies'): kwargs = _create_kwargs(variant=variant, market=market) sf.load_companies(**kwargs)
df_list = list() # Alle Datensätze durchgehen for ds in datasets: frames = list() # Alle Märkte for mkt in market_list: # Lade den Datensatz für den aktuellen Markt frames.append(sf.load(dataset=ds, variant='annual', market=mkt, index=[SIMFIN_ID, REPORT_DATE], parse_dates=[REPORT_DATE, PUBLISH_DATE])) df_list.append(pd.concat(frames)) companies_list = list() for mkt in market_list: # Lade alle Firmen companies_list.append(sf.load_companies(index=SIMFIN_ID, market=mkt)) df_companies = pd.concat(companies_list) df_companies[SIMFIN_ID] = df_companies.index # Lade alle Branchen df_industries = sf.load_industries() # Füge die einzelnen Teilstücke rechts aneinander df_all = pd.concat(df_list, axis=1) df_all[REPORT_DATE] = df_all.index.get_level_values(REPORT_DATE) # Entferne mögliche Duplikate df_all = df_all.loc[:, ~df_all.columns.duplicated()] # Füge die Branche dazu df_all = df_all.merge( df_companies.merge(
def load_dataset(refresh_days=1, dataset='general', thresh=0.7, simfin_api_key='free', simfin_directory='simfin_data/', data_directory=DATA_DIR, shareprices_df=''): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) derived_shareprice_df = sf.load_derived_shareprices(variant='latest', market='us') derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv') company_df = sf.load_companies(market='us', refresh_days=1) company_df.to_csv(data_directory / 'company.csv') industry_df = sf.load_industries(refresh_days=1) industry_df.to_csv(data_directory / 'industry.csv') if dataset == 'general': # Load Data from Simfin income_df = sf.load_income(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_quarterly_df = sf.load_income(variant='quarterly', market='us', refresh_days=refresh_days) income_quarterly_df = income_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'general_income.csv') balance_df = sf.load_balance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_quarterly_df = sf.load_balance(variant='quarterly', market='us', refresh_days=refresh_days) balance_quarterly_df = balance_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'general_balance.csv') cashflow_df = sf.load_cashflow(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly', market='us', refresh_days=refresh_days) cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'general_cashflow.csv') derived_df = sf.load_derived(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'general_fundamental_derived.csv') cache_args = { 'cache_name': 'financial_signals', 'cache_refresh': refresh_days } fin_signal_df = sf.fin_signals(df_income_ttm=income_df, df_balance_ttm=balance_df, df_cashflow_ttm=cashflow_df, **cache_args) growth_signal_df = sf.growth_signals( df_income_ttm=income_df, df_income_qrt=income_quarterly_df, df_balance_ttm=balance_df, df_balance_qrt=balance_quarterly_df, df_cashflow_ttm=cashflow_df, df_cashflow_qrt=cashflow_quarterlay_df, **cache_args) # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns, growth_signal_df.columns, fin_signal_df. columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(fin_signal_df).join( growth_signal_df).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'general' elif dataset == 'banks': # Load Data from Simfin income_df = sf.load_income_banks(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'banks_income.csv') balance_df = sf.load_balance_banks(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'banks_balance.csv') cashflow_df = sf.load_cashflow_banks(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'banks_cashflow.csv') derived_df = sf.load_derived_banks(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'banks' elif dataset == 'insurance': # Load Data from Simfin income_df = sf.load_income_insurance(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_income.csv') balance_df = sf.load_balance_insurance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_balance.csv') cashflow_df = sf.load_cashflow_insurance(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_cashflow.csv') derived_df = sf.load_derived_insurance(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'insurance_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'insurance' # Drop Columns with more then 1-thresh nan values fundamental_df = fundamental_df.dropna(thresh=int(thresh * len(fundamental_df)), axis=1) # Drop Duplicate Index fundamental_df = fundamental_df[~fundamental_df.index.duplicated( keep='first')] # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public fundamental_df['Published Date'] = fundamental_df['Publish Date'] fundamental_df = fundamental_df.reset_index().set_index( ['Ticker', 'Publish Date']) df = sf.reindex(df_src=fundamental_df, df_target=shareprices_df, group_index=TICKER, method='ffill').dropna(how='all').join(shareprices_df) # General # Clean Up df = df.drop([ 'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date', 'Fiscal Period', 'Published Date' ], axis=1) if dataset == 'general': # Remove Share Prices Over Amazon Share Price df = df[df['Close'] <= df.loc['AMZN']['Close'].max()] df = df.dropna(subset=[ 'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth' ]) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist( ) + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() else: df = df.dropna( subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue']) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() df = df.replace([np.inf, -np.inf], 0) df = df.fillna(0) per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)] df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0) # Add Company and Industry Information and Categorize df = df.join(company_df).merge( industry_df, left_on='IndustryId', right_index=True).drop( columns=['IndustryId', 'Company Name', 'SimFinId']) categorical_features = [ col for col in df.columns if df[col].dtype == 'object' ] encoder = OrdinalEncoder(cols=categorical_features, handle_unknown='ignore', return_df=True).fit(df) df = encoder.transform(df) # Sort df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1]) return df
import os import simfin as sf # Print the default directory? print(os.path.expanduser('~')) # Set your API-key for downloading data. This key gets the free data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # Download the data from the SimFin server and load into a Pandas DataFrame. df_companies = sf.load_companies(market='us') df_industries = sf.load_industries() # Non financial df_income = sf.load_income(variant='quarterly', market='us') df_balance = sf.load_balance(variant='quarterly', market='us') df_cashflow = sf.load_cashflow(variant='quarterly', market='us') # Banks df_income = sf.load_income_banks(variant='quarterly', market='us') df_balance = sf.load_balance_banks(variant='quarterly', market='us') df_cashflow = sf.load_cashflow_banks(variant='quarterly', market='us') # Insurance df_income = sf.load_income_insurance(variant='quarterly', market='us') df_balance = sf.load_balance_insurance(variant='quarterly', market='us')
#Plot Microsoft's revenue across years #don't have to add quatation marks when using pyhon shortcut df_q.loc['MSFT'][REVENUE].plot(grid=True) #Load in income statement for banks and insurance companies df = sf.load_income_banks(variant='annual', market='us') df = sf.load_balance_insurance(variant='annual', market='us') #load share prices df_prices_latest = sf.load_shareprices(variant='latest', market='us') df_prices = sf.load_shareprices(variant='daily', market='us') df_prices_latest.head() #Load Companies Detail df_companies = sf.load_companies(index=TICKER, market='us') #Load sector and industry details df_industries = sf.load_industries() #Look up industry detail of Microsoft industry_id = df_companies.loc['MSFT'][INDUSTRY_ID] df_industries.loc[industry_id] #try to load full income statement data try: df = sf.load_income(variant='annual-full', market='us') except sf.ServerException as e: print(e) #Get info about certain datasets