コード例 #1
0
    def get_all_asset_names_and_locations(self, source, verbose=False):

        if source == 'web':
            df = sf.load_companies(market='us')  # download data
            assets_on_web = df.index.tolist()  # get assets on web list
            assets = {ticker: '' for ticker in assets_on_web}

        elif source == 'local':
            base_filepath = FMTD_DATA_PATH
            assets = {}
            for filename in os.listdir(base_filepath):
                if os.path.isfile(os.path.join(base_filepath, filename)):
                    ticker_name = '.'.join(filename.split('.')[:-1])
                    filepath = os.path.join(base_filepath, filename)
                    assets[ticker_name] = filepath

        else:
            print(
                'Invalid Argument: input into get_all_asset_names_and_location'
            )
            print('Valid values: [\"web\", \"local\"]')
            print('Argument: \"source\" = %s' % source)
            sys.exit()

        if verbose:
            print(len(assets))
            for tn, loc in assets.items():
                print('%s\t%s' % (tn, loc))
                input()

        return assets
コード例 #2
0
    def __init__(self):
        # Set your API-key for downloading data. This key gets the free data.
        simfin.set_api_key('free')

        # Set the local directory where data-files are stored.
        # The directory will be created if it does not already exist.
        simfin.set_data_dir('~/simfin_data/')

        # Download the data from the SimFin server and load into a Pandas DataFrame.
        df = simfin.load_companies(market='us')

        # Print the first rows of the data.
        print(df.head())


        simfin.load_companies(market='us')
コード例 #3
0
ファイル: job_simfin_load.py プロジェクト: ajmal017/Kumamon
def simfin_load_ref_data():
    log.info("Called simfin_load_ref_data...")

    df = load_companies(market='us')
    json_companies = frame_to_json(df)     
    df = load_industries()
    json_industries = frame_to_json(df)
    industries_by_id = { i['industryId'] : i for i in json_industries }
            
    num_inserted = 0
    num_updated = 0
    for c in json_companies:
        log.info("Processing %s" % (c['ticker']))
        sector = ""
        industry = ""
        if c['industryId'] in industries_by_id:
            sector = industries_by_id[c['industryId']]['sector']
            industry = industries_by_id[c['industryId']]['industry']
        cur = ref_data_by_symbol(c['ticker'])
        if cur == None:
            num_inserted += 1
            post_ref_data(c['ticker'], c['companyName'], sector, industry)
        else:
            num_updated += 1
            put_ref_data(cur['id'], cur['symbol'], cur['symbolAlphaVantage'], c['companyName'], sector, industry, cur['active'], cur['focus'])

    ret = "ref_data: Inserted %d records, Updated %d records" % (num_inserted, num_updated)
    log.info(ret)
    return ret
def _bookCheck(input_list):
    companies = sf.load_companies(market='us')
    book_confirm = []

    for i in input_list:
        if i in companies.index.values:
            book_confirm.append(i)

    return book_confirm
コード例 #5
0
    def __init__(self, dir: str = os.getcwd()):
        self._dir = dir

        dotenv_path = os.path.join('.env')
        load_dotenv(dotenv_path)

        sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX')
        sf.set_data_dir(os.path.join(self._dir, 'simfin'))

        self._industries = sf.load_industries()
        self._prices = sf.load_shareprices(refresh_days=0)
        self._balance = sf.load_balance(variant="quarterly")
        self._income = sf.load_income(variant="quarterly")
        self._companies = sf.load_companies()
コード例 #6
0
def load_financial_data(path: str):

    sf.set_api_key('free')

    sf.set_data_dir(path)

    # Load the full list of companies in the selected market (United States).
    df_companies = sf.load_companies(market='us')

    # Load all the industries that are available.
    df_industries = sf.load_industries()

    # Load the quarterly Income Statements for all companies in the selected market.
    df_income = sf.load_income(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_balance = sf.load_balance(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_cashflow = sf.load_cashflow(variant='quarterly', market='us')

    return df_companies, df_industries, df_income, df_balance, df_cashflow
コード例 #7
0
def test_load_companies():
    """Test simfin.bulk.load_companies()"""
    for dataset, variant, market in iter_all_datasets(datasets='companies'):
        kwargs = _create_kwargs(variant=variant, market=market)

        sf.load_companies(**kwargs)
コード例 #8
0
df_list = list()
# Alle Datensätze durchgehen
for ds in datasets:
    frames = list()
    # Alle Märkte
    for mkt in market_list:
        # Lade den Datensatz für den aktuellen Markt
        frames.append(sf.load(dataset=ds, variant='annual',
                              market=mkt, index=[SIMFIN_ID, REPORT_DATE],
                              parse_dates=[REPORT_DATE, PUBLISH_DATE]))
    df_list.append(pd.concat(frames))

companies_list = list()
for mkt in market_list:
    # Lade alle Firmen
    companies_list.append(sf.load_companies(index=SIMFIN_ID, market=mkt))

df_companies = pd.concat(companies_list)
df_companies[SIMFIN_ID] = df_companies.index

# Lade alle Branchen
df_industries = sf.load_industries()

# Füge die einzelnen Teilstücke rechts aneinander
df_all = pd.concat(df_list, axis=1)
df_all[REPORT_DATE] = df_all.index.get_level_values(REPORT_DATE)
# Entferne mögliche Duplikate
df_all = df_all.loc[:, ~df_all.columns.duplicated()]
# Füge die Branche dazu
df_all = df_all.merge(
    df_companies.merge(
コード例 #9
0
ファイル: load.py プロジェクト: gardnmi/valuation-dashboard
def load_dataset(refresh_days=1,
                 dataset='general',
                 thresh=0.7,
                 simfin_api_key='free',
                 simfin_directory='simfin_data/',
                 data_directory=DATA_DIR,
                 shareprices_df=''):

    # Set Simfin Settings
    sf.set_api_key(simfin_api_key)
    sf.set_data_dir(simfin_directory)

    derived_shareprice_df = sf.load_derived_shareprices(variant='latest',
                                                        market='us')
    derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv')

    company_df = sf.load_companies(market='us', refresh_days=1)
    company_df.to_csv(data_directory / 'company.csv')

    industry_df = sf.load_industries(refresh_days=1)
    industry_df.to_csv(data_directory / 'industry.csv')

    if dataset == 'general':

        # Load Data from Simfin
        income_df = sf.load_income(variant='ttm',
                                   market='us',
                                   refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_quarterly_df = sf.load_income(variant='quarterly',
                                             market='us',
                                             refresh_days=refresh_days)
        income_quarterly_df = income_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'general_income.csv')

        balance_df = sf.load_balance(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_quarterly_df = sf.load_balance(variant='quarterly',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_quarterly_df = balance_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'general_balance.csv')

        cashflow_df = sf.load_cashflow(variant='ttm',
                                       market='us',
                                       refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly',
                                                  market='us',
                                                  refresh_days=refresh_days)
        cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'general_cashflow.csv')

        derived_df = sf.load_derived(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'general_fundamental_derived.csv')

        cache_args = {
            'cache_name': 'financial_signals',
            'cache_refresh': refresh_days
        }

        fin_signal_df = sf.fin_signals(df_income_ttm=income_df,
                                       df_balance_ttm=balance_df,
                                       df_cashflow_ttm=cashflow_df,
                                       **cache_args)

        growth_signal_df = sf.growth_signals(
            df_income_ttm=income_df,
            df_income_qrt=income_quarterly_df,
            df_balance_ttm=balance_df,
            df_balance_qrt=balance_quarterly_df,
            df_cashflow_ttm=cashflow_df,
            df_cashflow_qrt=cashflow_quarterlay_df,
            **cache_args)

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns, growth_signal_df.columns, fin_signal_df.
                columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(fin_signal_df).join(
                growth_signal_df).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'general'

    elif dataset == 'banks':

        # Load Data from Simfin
        income_df = sf.load_income_banks(variant='ttm',
                                         market='us',
                                         refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'banks_income.csv')

        balance_df = sf.load_balance_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'banks_balance.csv')

        cashflow_df = sf.load_cashflow_banks(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'banks_cashflow.csv')

        derived_df = sf.load_derived_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'banks'

    elif dataset == 'insurance':

        # Load Data from Simfin
        income_df = sf.load_income_insurance(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'insurance_income.csv')

        balance_df = sf.load_balance_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'insurance_balance.csv')

        cashflow_df = sf.load_cashflow_insurance(variant='ttm',
                                                 market='us',
                                                 refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'insurance_cashflow.csv')

        derived_df = sf.load_derived_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'insurance_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'insurance'

    # Drop Columns with more then 1-thresh nan values
    fundamental_df = fundamental_df.dropna(thresh=int(thresh *
                                                      len(fundamental_df)),
                                           axis=1)

    # Drop Duplicate Index
    fundamental_df = fundamental_df[~fundamental_df.index.duplicated(
        keep='first')]

    # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public
    fundamental_df['Published Date'] = fundamental_df['Publish Date']
    fundamental_df = fundamental_df.reset_index().set_index(
        ['Ticker', 'Publish Date'])

    df = sf.reindex(df_src=fundamental_df,
                    df_target=shareprices_df,
                    group_index=TICKER,
                    method='ffill').dropna(how='all').join(shareprices_df)

    # General
    # Clean Up
    df = df.drop([
        'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date',
        'Fiscal Period', 'Published Date'
    ],
                 axis=1)

    if dataset == 'general':
        # Remove Share Prices Over Amazon Share Price
        df = df[df['Close'] <= df.loc['AMZN']['Close'].max()]

        df = df.dropna(subset=[
            'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth'
        ])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist(
        ) + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    else:
        df = df.dropna(
            subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue'])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    df = df.replace([np.inf, -np.inf], 0)
    df = df.fillna(0)

    per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)]

    df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0)

    # Add Company and Industry Information and Categorize
    df = df.join(company_df).merge(
        industry_df, left_on='IndustryId', right_index=True).drop(
            columns=['IndustryId', 'Company Name', 'SimFinId'])

    categorical_features = [
        col for col in df.columns if df[col].dtype == 'object'
    ]

    encoder = OrdinalEncoder(cols=categorical_features,
                             handle_unknown='ignore',
                             return_df=True).fit(df)

    df = encoder.transform(df)

    # Sort
    df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1])

    return df
コード例 #10
0
import os
import simfin as sf

# Print the default directory?
print(os.path.expanduser('~'))

# Set your API-key for downloading data. This key gets the free data.
sf.set_api_key('free')

# Set the local directory where data-files are stored.
# The directory will be created if it does not already exist.
sf.set_data_dir('~/simfin_data/')

# Download the data from the SimFin server and load into a Pandas DataFrame.
df_companies = sf.load_companies(market='us')
df_industries = sf.load_industries()

# Non financial
df_income = sf.load_income(variant='quarterly', market='us')
df_balance = sf.load_balance(variant='quarterly', market='us')
df_cashflow = sf.load_cashflow(variant='quarterly', market='us')

# Banks
df_income = sf.load_income_banks(variant='quarterly', market='us')
df_balance = sf.load_balance_banks(variant='quarterly', market='us')
df_cashflow = sf.load_cashflow_banks(variant='quarterly', market='us')

# Insurance
df_income = sf.load_income_insurance(variant='quarterly', market='us')
df_balance = sf.load_balance_insurance(variant='quarterly', market='us')
コード例 #11
0
ファイル: tutorial_basic.py プロジェクト: chengxinru/code
#Plot Microsoft's revenue across years
#don't have to add quatation marks when using pyhon shortcut
df_q.loc['MSFT'][REVENUE].plot(grid=True)

#Load in income statement for banks and insurance companies
df = sf.load_income_banks(variant='annual', market='us')
df = sf.load_balance_insurance(variant='annual', market='us')

#load share prices
df_prices_latest = sf.load_shareprices(variant='latest', market='us')
df_prices = sf.load_shareprices(variant='daily', market='us')
df_prices_latest.head()

#Load Companies Detail
df_companies = sf.load_companies(index=TICKER, market='us')

#Load sector and industry details
df_industries = sf.load_industries()

#Look up industry detail of Microsoft
industry_id = df_companies.loc['MSFT'][INDUSTRY_ID]
df_industries.loc[industry_id]

#try to load full income statement data
try:
    df = sf.load_income(variant='annual-full', market='us')
except sf.ServerException as e:
    print(e)

#Get info about certain datasets