Esempio n. 1
0
def simfin_load_market_data_historical():
    log.info("Called simfin_load_market_data...")
    df = load_shareprices(variant='daily', market='us')
    json = frame_to_json(df)

    num_inserted = 0
    num_updated = 0
    num_no_ref_data = 0
    num_no_close = 0
    for j in json:
        log.info("Processing %s" % (j['ticker']))
        if j['close'] == None or j['adjClose'] == None:
            num_no_close += 1
            log.info("No close found, skipping")
            continue
        ref = ref_data_by_symbol(j['ticker'])
        if ref == None:
            num_no_ref_data += 1
            log.info("No ref data found, skipping")
            continue
        cur = mdh_by_ref_data_id_date(ref['id'],j['date'])
        if cur == None:
            num_inserted += 1
            post_market_data_historical(j['date'], ref['id'], j['close'], j['adjClose'])
        else:
            num_updated += 1
            put_market_data_historical(cur['id'], cur['date'], cur['refDataId'], j['close'], j['adjClose'])

    ret = "market_data_historical: Inserted %d records, Updated %d records, %d no ref data and %d no close" % (num_inserted, num_updated, num_no_ref_data, num_no_close)
    log.info(ret)
    return ret
Esempio n. 2
0
def getData():
    sf.set_api_key('free')

    # Set the local directory where data-files are stored.
    # The dir will be created if it does not already exist.
    sf.set_data_dir('~/simfin_data/')

    # Load daily share-prices for all companies in USA.
    # The data is automatically downloaded if you don't have it already.
    df_prices = sf.load_shareprices(market='us', variant='daily')

    # Plot the closing share-prices for ticker MSFT.
    msft_close_values_TEMP = df_prices.loc['MSFT']

    print(msft_close_values_TEMP.columns)

    list_of_stocks = ['AAPL', 'MSFT', 'CLDR', 'CRM', 'TSLA', 'NVDA', 'DAL']

    big_df = pd.DataFrame()
    for stock in list_of_stocks:
        temp_df = df_prices.loc[stock].tail(100)
        temp_df = temp_df[["SimFinId", "Close", "Open", "High", "Low"]]
        big_df = pd.concat([big_df, temp_df])

    ultimate_df = big_df

    return ultimate_df
Esempio n. 3
0
def get_share_prices() -> pd.DataFrame:
    """Gets the bulk share prices from SimFin API

  Downloads the data if you don't already have it
  """
    setup_simfin()
    df = sf.load_shareprices(variant='daily', market='us')
    return df
Esempio n. 4
0
def export_simfin(date_str, table_id=_TABLE_ID_DAILY_SIMFIN_TEMP):
    logging.info('daily export simfin data, date: {date}, table_id: {table_id}'.format(date=date_str, table_id=table_id))
    #df = sf.load_shareprices(variant='latest', market='us', refresh_days=0)
    df = sf.load_shareprices(variant='daily', market='us', refresh_days=0)
    try:
        df_date = df.xs(date_str, level=1, drop_level=False)
        if len(df_date) == 0:
            logging.info('can not find any data for the given date {d}'.format(d=date_str))
        _export_simfin_df(df_date, _DATASET_ID_EQUITY_DAILY, table_id)
    except Exception as ex:
        logging.error(ex)
Esempio n. 5
0
def ingest():
    sf.set_api_key(os.getenv('API_KEY_SIMFIN'))

    # Set the local directory where data-files are stored.
    # The directory will be created if it does not already exist.
    sf.set_data_dir('~/simfin_data/')

    df = sf.load_shareprices(variant='daily', market='us')

    for y in range(2010, 2021):
        df_y = df.xs(slice('{y}-01-01'.format(y=y), '{y}-12-31'.format(y=y)), level='Date', drop_level=False)
        df_y.to_csv('data/daily_simfin_{y}.csv'.format(y=y))
Esempio n. 6
0
    def SF_pricing_data(self,
                        sec_id='AAPL',
                        period: str = 'Daily',
                        market: str = 'us',
                        start_date=None,
                        end_date=None) -> pd.DataFrame:
        """[summary] Get dailiy pricing data from simfin. Token is needed for the most recent data.
                    This function will download a csv file with pricing data for US stocks.

        Args:
            sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'.
            period (str, optional): [the period of stock price]. Defaults to 'Daily'.
            market (str, optional): [stock market, us, de, etc]. Defaults to 'us'.
            start_date ([type], optional): ['xxxx-xx-xx' start date of pricing data]. Defaults to None.
            end_date ([type], optional): ['xxxx-xx-xx' end date of pricing data]. Defaults to None.

        Returns:
            pd.DataFrame: 
        """

        df = simfin.load_shareprices(variant=period, market=market)

        df = df.loc[sec_id]
        df = df.reset_index()
        df = df.rename(
            columns={
                "Date": "date",
                'Open': 'open',
                'Close': 'close',
                'High': 'high',
                'Low': 'low',
                'Volume': 'volume'
            })

        df = df[['date', 'open', 'close', 'high', 'low', 'volume']]
        df['date'] = pd.to_datetime(df['date'])

        start_date = pd.to_datetime(start_date)
        end_date = pd.to_datetime(end_date)

        if start_date is None and end_date is None:
            return df
        elif start_date is not None and end_date is None:
            df = df[(df['date'] >= start_date)]
        elif start_date is None and end_date is not None:
            df = df[(df['date'] <= end_date)]
        else:
            df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]

        df = df.reset_index()
        return df
    def simfin_historical_data(self,
                               sec_id='AAPL',
                               period: str = 'Daily',
                               market: str = 'us',
                               start_date=None,
                               end_date=None) -> pd.DataFrame:
        """ Download historica data from simfin as a csv file. 
            Token is needed for the most recent data, which is not free. Can sign up membership on https://simfin.com/

        Args:
            sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'.
            period (str, optional): [the period of stock price]. Defaults to 'Daily'.
            market (str, optional): [stock market, us, de, etc]. Defaults to 'us'.
            start_date ([type], optional): ['xxxx-xx-xx' start date of pricing data]. Defaults to None.
            end_date ([type], optional): ['xxxx-xx-xx' end date of pricing data]. Defaults to None.

        Returns:
            pd.DataFrame
        """

        df = sf.load_shareprices(variant=period, market=market)

        df = df.loc[sec_id]
        df = df.reset_index()
        df = df.rename(
            columns={
                "Date": "date",
                'Open': 'open',
                'Close': 'close',
                'High': 'high',
                'Low': 'low',
                'Volume': 'volume'
            })

        df = df[['date', 'open', 'close', 'high', 'low', 'volume']]
        df['date'] = pd.to_datetime(df['date'])

        start_date = pd.to_datetime(start_date)
        end_date = pd.to_datetime(end_date)

        if start_date is None and end_date is None:
            return df
        elif start_date is not None and end_date is None:
            df = df[(df['date'] >= start_date)]
        elif start_date is None and end_date is not None:
            df = df[(df['date'] <= end_date)]
        else:
            df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]

        df = df.reset_index()
        return df
Esempio n. 8
0
    def __init__(self, dir: str = os.getcwd()):
        self._dir = dir

        dotenv_path = os.path.join('.env')
        load_dotenv(dotenv_path)

        sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX')
        sf.set_data_dir(os.path.join(self._dir, 'simfin'))

        self._industries = sf.load_industries()
        self._prices = sf.load_shareprices(refresh_days=0)
        self._balance = sf.load_balance(variant="quarterly")
        self._income = sf.load_income(variant="quarterly")
        self._companies = sf.load_companies()
Esempio n. 9
0
def load_shareprices(refresh_days=1,
                     simfin_api_key='free',
                     simfin_directory='simfin_data/'):

    # Set Simfin Settings
    sf.set_api_key(simfin_api_key)
    sf.set_data_dir(simfin_directory)

    # Used by all datasets
    shareprices_df = sf.load_shareprices(variant='daily',
                                         market='us',
                                         refresh_days=refresh_days)

    # Merge Fundamental with Stock Prices
    # Downsample Share Prices to Rolling 30 Day End of Month
    shareprices_df = shareprices_df[['Close']].groupby('Ticker').rolling(
        30, min_periods=1).mean().reset_index(0, drop=True)
    shareprices_df = sf.resample(df=shareprices_df,
                                 rule='M',
                                 method=lambda x: x.last())

    return shareprices_df
Esempio n. 10
0
def simfin_load_market_data():
    log.info("Called simfin_load_market_data...")

    df = load_shareprices(variant='latest', market='us')
    json = frame_to_json(df)

    num_inserted = 0
    num_updated = 0
    num_price_to_old = 0
    num_no_ref_data = 0
    num_no_close = 0
    for j in json:
        log.info("Processing %s" % (j['ticker']))
        if j['close'] == None:
            num_no_close += 1
            log.info("No close found, skipping")
            continue
        if datetime.strptime(j['date'][:10], '%Y-%m-%d') < datetime.now() - timedelta(days=10):
            num_price_to_old += 1
            log.info("Price too old (%s), skipping" % (j['date']))
            continue
        cur = market_data_by_symbol(j['ticker'])
        if cur == None:
            ref = ref_data_by_symbol(j['ticker'])
            if ref == None:
                num_no_ref_data += 1
                log.info("No ref data found, skipping")
                continue
            num_inserted += 1
            post_market_data(ref['id'], j['close'])
        else:
            num_updated += 1
            put_market_data(cur['id'], cur['refDataId'], j['close'])

    ret = "market_data: Inserted %d records, Updated %d records, %d price to old and %d no ref data and %d no close" % (num_inserted, num_updated, num_price_to_old, num_no_ref_data, num_no_close)
    log.info(ret)
    return ret
Esempio n. 11
0
# Import names used for easy access to SimFin's data-columns.
from simfin.names import *

#Set the local directory where data-files are stored.
sf.set_data_dir(
    'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/'
)

# Set up API key
sf.set_api_key(api_key='free')

#set plotting style
sns.set_style("whitegrid")

#--------------------------Load dataframe-----------------------------
df_prices = sf.load_shareprices(variant='daily', market='us')

#--------------------------Add Columns-----------------------------
#simple way
df2 = pd.DataFrame()
df2[FOO] = df_prices[CLOSE] / df_prices[ADJ_CLOSE]
df2[BAR] = df_prices[CLOSE] * df_prices[ADJ_CLOSE]
df2[QUX] = df_prices[CLOSE] * df_prices[VOLUME]

#much faster to add new columns to the DataFrame  if index match
df3 = pd.DataFrame(index=df_prices.index)
df3[FOO] = df_prices[CLOSE] / df_prices[ADJ_CLOSE]
df3[BAR] = df_prices[CLOSE] * df_prices[ADJ_CLOSE]
df3[QUX] = df_prices[CLOSE] * df_prices[VOLUME]

#another way: use dict
        bigquery.SchemaField("symbol", "STRING"),
        bigquery.SchemaField("open", "FLOAT"),
        bigquery.SchemaField("high", "FLOAT"),
        bigquery.SchemaField("low", "FLOAT"),
        bigquery.SchemaField("close", "FLOAT"),
        bigquery.SchemaField("volume", "FLOAT"),
        bigquery.SchemaField("volume_weighted_price", "FLOAT"),
    ]

    full_table_id = get_full_table_id('{t}_{y}'.format(t=TABLE_ID_DAILY,
                                                       y=year))
    table = bigquery.Table(full_table_id, schema=schema)
    table = _bigquery_client.create_table(table)  # Make an API request.


import simfin as sf

sf.set_api_key(os.getenv('API_KEY_SIMFIN'))

# Set the local directory where data-files are stored.
# The directory will be created if it does not already exist.
sf.set_data_dir('~/simfin_data/')

#df = sf.load_shareprices(variant='daily', market='us')

df_l = sf.load_shareprices(variant='latest', market='us')

for index, row in df_l.iterrows():
    print(index, index[0], index[1].date(), row['Close'])
    pass
Esempio n. 13
0
def test_load_shareprices():
    """Test simfin.bulk.load_shareprices()"""
    for dataset, variant, market in iter_all_datasets(datasets='shareprices'):
        kwargs = _create_kwargs(variant=variant, market=market)

        sf.load_shareprices(**kwargs)
Esempio n. 14
0
# Download the data from the SimFin server and load into a Pandas DataFrame.
df = sf.load_income(variant='quarterly', market='us')

# Print the first rows of the data.
print(df.head())

# Print all column names of income statement data
print(df.columns)

#Print all Revenue and Net Income for Microsoft (ticker MSFT).
print(df.loc['MSFT', [REVENUE, NET_INCOME]])


# Load daily share-prices for all companies in USA.
df_prices = sf.load_shareprices(market='us', variant='daily')

# Plot the closing share-prices for ticker MSFT.
df_prices.loc['MSFT', CLOSE].plot(grid=True, figsize=(20,10), title='MSFT Close')


------------------------------------------------------------------------------------


tickers = ["AAPL","NVDA","WMT"]

sim_ids = []
for ticker in tickers:
    request_url = f'https://simfin.com/api/v1/info/find-id/ticker/{ticker}?api-key={api_key}'
    content = requests.get(request_url)
    data = content.json()   
 def __load_share_data__(self):
     sf.set_data_dir('~/simfin_data/')
     self.df_prices = sf.load_shareprices(variant='daily', market='US')
Esempio n. 16
0
#check how the dataframe looks like
df_a.head()
df_q.head()
df_m.head()

#Plot Microsoft's revenue across years
#don't have to add quatation marks when using pyhon shortcut
df_q.loc['MSFT'][REVENUE].plot(grid=True)

#Load in income statement for banks and insurance companies
df = sf.load_income_banks(variant='annual', market='us')
df = sf.load_balance_insurance(variant='annual', market='us')

#load share prices
df_prices_latest = sf.load_shareprices(variant='latest', market='us')
df_prices = sf.load_shareprices(variant='daily', market='us')
df_prices_latest.head()

#Load Companies Detail
df_companies = sf.load_companies(index=TICKER, market='us')

#Load sector and industry details
df_industries = sf.load_industries()

#Look up industry detail of Microsoft
industry_id = df_companies.loc['MSFT'][INDUSTRY_ID]
df_industries.loc[industry_id]

#try to load full income statement data
try: