def parse_items(years=years): ed = EdgarClone(config.datapath['10X'], zipped=False) sql = SQL(**config.credentials['sql']) bday = BusDay(sql) pstat = PSTAT(sql, bday) to_permno = pstat.build_lookup(target='lpermno', source='cik', fillna=0) items = {'10-K': ['bus10K', 'mda10K']} # '10-Q': ['mda10Q']} logger = [] for year in years: #2019, 2021): # Start 1998++ rows = ed.open(date=year) row = rows.iloc[0] for i, row in rows.iterrows(): permno = to_permno(int(row['cik'])) if row['form'] in items and permno: filing = ed[row['pathname']] for item in items[row['form']]: extract = Edgar.extract_item(filing, item) s = ed.to_path(form=row['form'], permno=permno, item=item, basename=os.path.basename(row['pathname'])) with open(s, 'wt') as g: g.write(extract) r = {'year': year, 'permno': permno, 'item': item, 'text_c': len(filing), 'item_c': len(extract), 'text_w': len(filing.split()), 'item_w': len(extract.split())} logger.append(r) print(", ".join([f"{k}: {v}" for k,v in r.items()])) logger = DataFrame.from_records(logger)
from numpy.ma import masked_invalid as valid import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import igraph # pip3 install cairocffi import rpy2.robjects as ro from rpy2.robjects.packages import importr from finds.pyR import PyR from finds.busday import BusDay from finds.database import SQL, Redis from finds.structured import CRSP, PSTAT from finds.sectors import Sectoring, BEA from finds.graph import igraph_draw from settings import settings ECHO = True sql = SQL(**settings['sql']) bd = BusDay(sql) rdb = Redis(**settings['redis']) crsp = CRSP(sql, bd, rdb) pstat = PSTAT(sql, bd) bea = BEA(rdb, **settings['bea'], echo=ECHO) logdir = None # os.path.join(settings['images'], 'bea') years = np.arange(1947, 2020) vintages = [1997, 1963, 1947] # when sectoring schemes were revised # Read IOUse tables from BEA website ioUses = dict() for vintage in vintages: for year in [y for y in years if y >= vintage]: df = bea.read_ioUse(year, vintage=vintage) ioUses[(vintage, year)] = df
import numpy as np import pandas as pd import time import os from pandas import DataFrame, Series from matplotlib import colors import matplotlib.pyplot as plt from sklearn.decomposition import PCA from finds.database import SQL, Redis from finds.structured import CRSP from finds.busday import BusDay from finds.taq import opentaq, itertaq, open_t, close_t, bin_trades, bin_quotes from finds.display import plot_time, row_formatted from finds.solve import weighted_average from settings import settings sql = SQL(**settings['sql']) user = SQL(**settings['user']) bday = BusDay(sql) rdb = Redis(**settings['redis']) crsp = CRSP(sql, bday, rdb=rdb) logdir = os.path.join(settings['images'], 'micro') # None taqdir = os.path.join(settings['remote'], 'TAQ') _open = pd.to_datetime('1900-01-01T9:30') # exclude <= _close = pd.to_datetime('1900-01-01T16:00') # exclude > # Loop through the sample TAQ data dates available from NYSE and collect info shareclass = [] daily = [] bins = {k:{} for k in ['effective', 'realized', 'impact', 'quoted', 'volume', 'offersize', 'bidsize', 'ret', 'retq', 'counts']}
import os import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import igraph # pip3 install cairocffi from igraph import Graph from pandas.api import types import numpy.ma as ma from numpy.ma import masked_invalid as valid from itertools import chain from finds.graph import igraph_draw, igraph_info, igraph_path from finds.graph import igraph_centrality, igraph_community from finds.database import SQL from settings import settings sql = SQL(**settings['sql']) logdir = os.path.join(settings['images'], 'supplychain') # None # Retrieve principal customers info year = 2016 cust = sql.read_dataframe( f"select gvkey, cgvkey, stic, ctic, conm, cconm from customer" f" where srcdate >= {year}0101 and srcdate <= {year}1231") # To lookup company full name from ticker lookup = Series(cust['conm'].values, cust['stic'].values)\ .append(Series(cust['cconm'].values, cust['ctic'].values))\ .drop_duplicates() # Construct Directed Graph vertices = np.array(list(set(cust['stic']).union(set(cust['ctic']))))
import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt import os import time from finds.database import SQL from finds.busday import BusDay from finds.structured import PSTAT, CRSP, Benchmarks from finds.backtesting import EventStudy from settings import settings LAST_DATE = settings['crsp_date'] ECHO = True sql = SQL(**settings['sql'], echo=ECHO) user = SQL(**settings['user'], echo=ECHO) bd = BusDay(sql) keydev = PSTAT(sql, bd) crsp = CRSP(sql, bd, rdb=None) bench = Benchmarks(sql, bd) eventstudy = EventStudy(user, bench, LAST_DATE) outdir = os.path.join(settings['images'], 'events') # event window parameters end = 20201201 beg = 19890101 # 20020101 minobs = 250 left, right, post = -1, 1, 21 # str formatter to pretty print event and role description given their id's
return _to_values(self.weeks['beg'].iloc[self.numwk(date) + weeks]) def endwk(self, date, weeks=0): """Return ending business week date/s""" return _to_values(self.weeks['end'].iloc[self.numwk(date) + weeks]) def ismonthend(self, date): """If date/s in last complete week in any month""" return _to_values(self.weeks['ismonthend'].iloc[self.numwk(date)]) if False: # create custom busday trading dates from settings import settings from finds.database import SQL from finds.busday import BusDay sql = SQL(**settings['sql'], echo=True) busday = BusDay(sql, create=True) # set create flag as True if False: # some unit tests from settings import settings from finds.database import SQL from finds.busday import Weekly sql = SQL(**settings['sql'], echo=True) wd = Weekly(sql, day=3, end=20201231) # derive weekly trading calendar print(wd.numwk(20201230)) print(wd.numwk(20210130)) print(wd.numwk(20201231)) print(wd.endwk([20201209, 20201219])) print(wd.endwk(20201209)) print(wd.endmo([20201209, 20201219]))
""" from settings import settings import os import glob import time import numpy as np import pandas as pd from pandas import DataFrame, Series import matplotlib.pyplot as plt from finds.database import SQL, Redis from finds.busday import BusDay, Weekly from finds.structured import PSTAT, CRSP, IBES, Benchmarks from finds.structured import famafrench_sorts, chunk_signal from finds.readers import fetch_FamaFrench from finds.display import plot_date sql = SQL(**settings['sql'], echo=True) user = SQL(**settings['user'], echo=True) rdb = Redis(**settings['redis']) imgdir = os.path.join(settings['images'], 'monitor') # Real-time updates """Monthly: update busdays and Fama-French research factors bd = BusDay(sql, create=False) # create=True to update busdays bd = BusDay(sql) bench = Benchmarks(sql, bd) datasets = fetch_FamaFrench() print("\n".join(f"[{i}] {d}" for i, d in enumerate(datasets))) for name, item, suffix in datasets: df = fetch_FamaFrench(name=name, item=item, suffix=suffix, index_formatter=bd.offset)
for pathname in paths[1:]: df = pd.read_csv(os.path.join(pathname, 'prices.csv.gz'), sep='|') new = set(np.unique(df['ticker'])).difference( set(np.unique(prices['ticker']))) df = df[df['ticker'].isin(new)] prices = prices.append(df, sort=False) print(pathname, 'added prices', new) df = pd.read_csv(os.path.join(pathname, 'dividends.csv.gz'), sep='|') new = set(np.unique(df['ticker'])).difference( set(np.unique(dividends['ticker']))) df = df[df['ticker'].isin(new)] dividends = dividends.append(df, sort=False) print(pathname, 'added dividends', new) sql = SQL(**config.credentials['sql'], echo=config.ECHO) bd = BusDay(sql) crsp = CRSP(sql, bd, rdb=None) date = bd.offset(crsp_date) # get price and shrout as of last date price = crsp.get_section('daily', ['prc', 'shrout'], 'date', date, start=None) # get tickers to lookup permno tickers = crsp.get_section('names', ['tsymbol', 'date'], 'date', date, start=0).reindex(price.index)