コード例 #1
0
def test_load_income():
    """Test simfin.bulk.load_income()"""
    for dataset, variant, market in iter_all_datasets(datasets='income'):
        kwargs = _create_kwargs(variant=variant, market=market)

        sf.load_income(**kwargs)
        sf.load_income_banks(**kwargs)
        sf.load_income_insurance(**kwargs)
コード例 #2
0
def get_income() -> pd.DataFrame:
    """Gets the bulk income statements from SimFin API

  Downloads the data if you don't already have it
  """
    setup_simfin()
    df = sf.load_income(variant='quarterly', market='us')
    return df
コード例 #3
0
    def __init__(self, dir: str = os.getcwd()):
        self._dir = dir

        dotenv_path = os.path.join('.env')
        load_dotenv(dotenv_path)

        sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX')
        sf.set_data_dir(os.path.join(self._dir, 'simfin'))

        self._industries = sf.load_industries()
        self._prices = sf.load_shareprices(refresh_days=0)
        self._balance = sf.load_balance(variant="quarterly")
        self._income = sf.load_income(variant="quarterly")
        self._companies = sf.load_companies()
コード例 #4
0
def load_financial_data(path: str):

    sf.set_api_key('free')

    sf.set_data_dir(path)

    # Load the full list of companies in the selected market (United States).
    df_companies = sf.load_companies(market='us')

    # Load all the industries that are available.
    df_industries = sf.load_industries()

    # Load the quarterly Income Statements for all companies in the selected market.
    df_income = sf.load_income(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_balance = sf.load_balance(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_cashflow = sf.load_cashflow(variant='quarterly', market='us')

    return df_companies, df_industries, df_income, df_balance, df_cashflow
コード例 #5
0
    def SF_income_statement(self,
                            sec_id: str = 'AAPL',
                            period: str = 'quarterly',
                            market: str = 'us') -> pd.DataFrame:
        """[summary] Load income statement data.

        Args:
            sec_id (str, optional): [stock tiker]. Defaults to 'AAPL'.
            period (str, optional): [annual, quarterly]. Defaults to 'annual'.
            market (str, optional): [us, de, etc]. Defaults to 'us'.

        Returns:
            pd.DataFrame: 
        """

        df_income = simfin.load_income(
            variant=period,
            market=market,
            index=[TICKER, REPORT_DATE, FISCAL_PERIOD],
            parse_dates=[REPORT_DATE, PUBLISH_DATE, RESTATED_DATE])
        df_income = df_income.loc[sec_id]

        return df_income
コード例 #6
0
import requests
import pandas as pd
import os

os.getcwd()
os.chdir(r"C:\Users\think\Desktop\UVA\2020 Spring\STAT 4996 Capstone\python code")
#Set the local directory where data-files are stored.
sf.set_data_dir('C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/')

# Set API-key for downloading data.
sf.set_api_key('free')
api_key = "xCc24BXcpHP6KWBZmERIE4vA95ialBuU"


# Download the data from the SimFin server and load into a Pandas DataFrame.
df = sf.load_income(variant='quarterly', market='us')

# Print the first rows of the data.
print(df.head())

# Print all column names of income statement data
print(df.columns)

#Print all Revenue and Net Income for Microsoft (ticker MSFT).
print(df.loc['MSFT', [REVENUE, NET_INCOME]])


# Load daily share-prices for all companies in USA.
df_prices = sf.load_shareprices(market='us', variant='daily')

# Plot the closing share-prices for ticker MSFT.
コード例 #7
0
# Set your API-key for downloading data. This key gets the free data.
sf.set_api_key('free')

# Set the local directory where data-files are stored.
# The directory will be created if it does not already exist.
sf.set_data_dir('~/simfin_data/')

# NOMBRE EN LA BOLSA
company = 'AAPL'

# Download the data from the SimFin server and load into a Pandas DataFrame.
# annual/quarterly/ttm
BALANCE = sf.load_balance(variant='annual', market='us').loc[company, ]
CASH_FLOW = sf.load_cashflow(variant='annual', market='us').loc[company, ]
INCOME = sf.load_income(variant='annual', market='us').loc[company, ]
#PRICE = sf.load_shareprices(variant='daily', market='us').loc[company, ]
PRICE = yf.download(tickers=f'{company}', period='10y', interval='1mo')
PRICE.reset_index(inplace=True)
PRICE = PRICE[PRICE['Date'].dt.month == 12][['Close', 'Date']]

INCOME['Date'] = INCOME.index.strftime('%m-%Y')
BALANCE['Date'] = BALANCE.index.strftime('%m-%Y')
CASH_FLOW['Date'] = CASH_FLOW.index.strftime('%m-%Y')
PRICE['Date'] = PRICE['Date'].dt.strftime('%m-%Y')

PRICE = PRICE.set_index('Date')
INCOME = INCOME.set_index('Date')
BALANCE = BALANCE.set_index('Date')
CASH_FLOW = CASH_FLOW.set_index('Date')
コード例 #8
0
ファイル: load.py プロジェクト: gardnmi/valuation-dashboard
def load_dataset(refresh_days=1,
                 dataset='general',
                 thresh=0.7,
                 simfin_api_key='free',
                 simfin_directory='simfin_data/',
                 data_directory=DATA_DIR,
                 shareprices_df=''):

    # Set Simfin Settings
    sf.set_api_key(simfin_api_key)
    sf.set_data_dir(simfin_directory)

    derived_shareprice_df = sf.load_derived_shareprices(variant='latest',
                                                        market='us')
    derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv')

    company_df = sf.load_companies(market='us', refresh_days=1)
    company_df.to_csv(data_directory / 'company.csv')

    industry_df = sf.load_industries(refresh_days=1)
    industry_df.to_csv(data_directory / 'industry.csv')

    if dataset == 'general':

        # Load Data from Simfin
        income_df = sf.load_income(variant='ttm',
                                   market='us',
                                   refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_quarterly_df = sf.load_income(variant='quarterly',
                                             market='us',
                                             refresh_days=refresh_days)
        income_quarterly_df = income_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'general_income.csv')

        balance_df = sf.load_balance(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_quarterly_df = sf.load_balance(variant='quarterly',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_quarterly_df = balance_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'general_balance.csv')

        cashflow_df = sf.load_cashflow(variant='ttm',
                                       market='us',
                                       refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly',
                                                  market='us',
                                                  refresh_days=refresh_days)
        cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'general_cashflow.csv')

        derived_df = sf.load_derived(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'general_fundamental_derived.csv')

        cache_args = {
            'cache_name': 'financial_signals',
            'cache_refresh': refresh_days
        }

        fin_signal_df = sf.fin_signals(df_income_ttm=income_df,
                                       df_balance_ttm=balance_df,
                                       df_cashflow_ttm=cashflow_df,
                                       **cache_args)

        growth_signal_df = sf.growth_signals(
            df_income_ttm=income_df,
            df_income_qrt=income_quarterly_df,
            df_balance_ttm=balance_df,
            df_balance_qrt=balance_quarterly_df,
            df_cashflow_ttm=cashflow_df,
            df_cashflow_qrt=cashflow_quarterlay_df,
            **cache_args)

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns, growth_signal_df.columns, fin_signal_df.
                columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(fin_signal_df).join(
                growth_signal_df).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'general'

    elif dataset == 'banks':

        # Load Data from Simfin
        income_df = sf.load_income_banks(variant='ttm',
                                         market='us',
                                         refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'banks_income.csv')

        balance_df = sf.load_balance_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'banks_balance.csv')

        cashflow_df = sf.load_cashflow_banks(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'banks_cashflow.csv')

        derived_df = sf.load_derived_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'banks'

    elif dataset == 'insurance':

        # Load Data from Simfin
        income_df = sf.load_income_insurance(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'insurance_income.csv')

        balance_df = sf.load_balance_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'insurance_balance.csv')

        cashflow_df = sf.load_cashflow_insurance(variant='ttm',
                                                 market='us',
                                                 refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'insurance_cashflow.csv')

        derived_df = sf.load_derived_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'insurance_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'insurance'

    # Drop Columns with more then 1-thresh nan values
    fundamental_df = fundamental_df.dropna(thresh=int(thresh *
                                                      len(fundamental_df)),
                                           axis=1)

    # Drop Duplicate Index
    fundamental_df = fundamental_df[~fundamental_df.index.duplicated(
        keep='first')]

    # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public
    fundamental_df['Published Date'] = fundamental_df['Publish Date']
    fundamental_df = fundamental_df.reset_index().set_index(
        ['Ticker', 'Publish Date'])

    df = sf.reindex(df_src=fundamental_df,
                    df_target=shareprices_df,
                    group_index=TICKER,
                    method='ffill').dropna(how='all').join(shareprices_df)

    # General
    # Clean Up
    df = df.drop([
        'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date',
        'Fiscal Period', 'Published Date'
    ],
                 axis=1)

    if dataset == 'general':
        # Remove Share Prices Over Amazon Share Price
        df = df[df['Close'] <= df.loc['AMZN']['Close'].max()]

        df = df.dropna(subset=[
            'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth'
        ])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist(
        ) + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    else:
        df = df.dropna(
            subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue'])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    df = df.replace([np.inf, -np.inf], 0)
    df = df.fillna(0)

    per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)]

    df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0)

    # Add Company and Industry Information and Categorize
    df = df.join(company_df).merge(
        industry_df, left_on='IndustryId', right_index=True).drop(
            columns=['IndustryId', 'Company Name', 'SimFinId'])

    categorical_features = [
        col for col in df.columns if df[col].dtype == 'object'
    ]

    encoder = OrdinalEncoder(cols=categorical_features,
                             handle_unknown='ignore',
                             return_df=True).fit(df)

    df = encoder.transform(df)

    # Sort
    df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1])

    return df
コード例 #9
0
ファイル: tutorial_basic.py プロジェクト: chengxinru/code
# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

#Set the local directory where data-files are stored.
sf.set_data_dir(
    'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/'
)

# Set up API key
sf.set_api_key(api_key='free')

#---------------------------------General Load Function--------------------

#load in entire annual,quaterly, monthly US income statement data
df_a = sf.load_income(variant='annual', market='us')
df_q = sf.load_income(variant='quarterly', market='us')
df_m = sf.load_income(variant='ttm', market='us')

#check how the dataframe looks like
df_a.head()
df_q.head()
df_m.head()

#Plot Microsoft's revenue across years
#don't have to add quatation marks when using pyhon shortcut
df_q.loc['MSFT'][REVENUE].plot(grid=True)

#Load in income statement for banks and insurance companies
df = sf.load_income_banks(variant='annual', market='us')
df = sf.load_balance_insurance(variant='annual', market='us')
コード例 #10
0
import simfin as sf

# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

#Set the local directory where data-files are stored.
sf.set_data_dir('C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/')

# Set up API key
sf.set_api_key(api_key='free')

#set plotting style 
sns.set_style("whitegrid")
#---------------------------------Load Datasets-----------------------------
market = 'us'
df_income = sf.load_income(variant='annual', market=market)
df_prices = sf.load_shareprices(variant='daily', market=market)
df_prices_latest = sf.load_shareprices(variant='latest', market=market)

tickers = ['AAPL', 'AMZN', 'MSFT']
df_income = df_income.loc[tickers, [REVENUE, NET_INCOME]].copy()
df_prices = df_prices.loc[tickers, [CLOSE, ADJ_CLOSE]].copy()
df_prices_latest = df_prices_latest.loc[tickers, [CLOSE, ADJ_CLOSE]].copy()

#-----------------------------Start resampling------------------------------
#forward-filling the missing values from the value before it 
df_income.loc['MSFT']
df_income.loc['MSFT'].asfreq(freq='D', method='ffill')
df_income.loc['MSFT'].asfreq(freq='D', method='ffill').plot()

#asfreq on multiindex
コード例 #11
0
from simfin.names import *

#Set the local directory where data-files are stored.
sf.set_data_dir(
    'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/'
)

# Set up API key
sf.set_api_key(api_key='free')

#set plotting style
sns.set_style("whitegrid")

#---------------------------------Load Datasets-----------------------------
market = 'us'
df_income_ann = sf.load_income(variant='annual', market=market)
df_income_qrt = sf.load_income(variant='quarterly', market=market)
df_prices = sf.load_shareprices(variant='daily', market=market)
tickers = ['AAPL', 'AMZN', 'MSFT']
df_income_ann = df_income_ann.loc[tickers, [REVENUE, NET_INCOME]].copy()
df_income_qrt = df_income_qrt.loc[tickers, [REVENUE, NET_INCOME]].copy()
df_prices = df_prices.loc[tickers, [CLOSE, ADJ_CLOSE]].copy()

#-----------------------------start calculating growth rate--------------
#moves the data one step foward
df.shift(periods=1).head()
#When we perform arithmetic operations with the original and shifted DataFrames,
#they are first aligned by the index-dates. This allows us to calculate relative changes over time.
#the two commands below are the same
((df / df.shift(1)) - 1).head()
df.pct_change(periods=1).head()
コード例 #12
0
#200-days MA OK with df_prices
#Avg 3-months volume with df_prices
#Shares outstanding with df_prices
#Float = Number of shares publicly available to trade / Don't have access
#% Held by insiders Don't have access
#% Held by institutions Don't have access
# Shares Short (as of) Don't have access
#Short ratio Don't have access
#Short % of float Don't have access
#Shares Short (prior month) Don't have access

# Data for USA.
market = 'us'

# TTM Income Statements.
df_income_ttm = sf.load_income(variant='ttm', market=market)
#Contains: Revenue, Shares(Diluted), Revenue Per Share, Gross Profit, 
#          Net Income (Common)
#          Diluted Earnings Per Share(Diluted EPS) = Net Income (Common) / Shares Diluted
#          Operating Margin = Operating Income (Loss) * 100 / Revenue
#          Profit Margin = (Revenue - Cost of Revenue) * 100 / Revenue
#          Net Profit Margin = Net Income * 100 / Revenue

# Quarterly Income Statements.
df_income_qrt = sf.load_income(variant='quarterly', market=market)
#Contains: Qtrly Earnings Growth = amount by with this quarter earnings exceeds 
#                                  the same quarter earnings for past year

# TTM Balance Sheets.
df_balance_ttm = sf.load_balance(variant='ttm', market=market)
#Contains: Total Debt = Short Term Debt + Long Term Debt,