def parse_items(years=years):
    ed = EdgarClone(config.datapath['10X'], zipped=False)
    sql = SQL(**config.credentials['sql'])
    bday = BusDay(sql)
    pstat = PSTAT(sql, bday)
    to_permno = pstat.build_lookup(target='lpermno', source='cik', fillna=0)

    items = {'10-K': ['bus10K', 'mda10K']}  # '10-Q': ['mda10Q']}
    logger = []
    for year in years:    #2019, 2021):  # Start 1998++
        rows = ed.open(date=year)
        row = rows.iloc[0]
        for i, row in rows.iterrows():
            permno = to_permno(int(row['cik']))
            if row['form'] in items and permno:
                filing = ed[row['pathname']]
                for item in items[row['form']]:
                    extract = Edgar.extract_item(filing, item)
                    s = ed.to_path(form=row['form'], permno=permno, item=item,
                                   basename=os.path.basename(row['pathname']))
                    with open(s, 'wt') as g:
                        g.write(extract)
                    r = {'year': year, 'permno': permno, 'item': item,
                         'text_c': len(filing),
                         'item_c': len(extract),
                         'text_w': len(filing.split()),
                         'item_w': len(extract.split())}
                    logger.append(r)
                    print(", ".join([f"{k}: {v}" for k,v in r.items()]))
    logger = DataFrame.from_records(logger)
Exemple #2
0
from numpy.ma import masked_invalid as valid
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import igraph  # pip3 install cairocffi
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from finds.pyR import PyR    
from finds.busday import BusDay
from finds.database import SQL, Redis
from finds.structured import CRSP, PSTAT
from finds.sectors import Sectoring, BEA
from finds.graph import igraph_draw
from settings import settings
ECHO = True
sql = SQL(**settings['sql'])
bd = BusDay(sql)
rdb = Redis(**settings['redis'])
crsp = CRSP(sql, bd, rdb)
pstat = PSTAT(sql, bd)
bea = BEA(rdb, **settings['bea'], echo=ECHO)
logdir = None # os.path.join(settings['images'], 'bea')
years = np.arange(1947, 2020) 
vintages = [1997, 1963, 1947]   # when sectoring schemes were revised

# Read IOUse tables from BEA website
ioUses = dict()
for vintage in vintages:
    for year in [y for y in years if y >= vintage]:
        df = bea.read_ioUse(year, vintage=vintage)
        ioUses[(vintage, year)] = df
import numpy as np
import pandas as pd
import time
import os
from pandas import DataFrame, Series
from matplotlib import colors
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from finds.database import SQL, Redis
from finds.structured import CRSP
from finds.busday import BusDay
from finds.taq import opentaq, itertaq, open_t, close_t, bin_trades, bin_quotes
from finds.display import plot_time, row_formatted
from finds.solve import weighted_average
from settings import settings
sql = SQL(**settings['sql'])
user = SQL(**settings['user'])
bday = BusDay(sql)
rdb = Redis(**settings['redis'])
crsp = CRSP(sql, bday, rdb=rdb)
logdir = os.path.join(settings['images'], 'micro')  # None
taqdir = os.path.join(settings['remote'], 'TAQ')
_open = pd.to_datetime('1900-01-01T9:30')    # exclude <= 
_close = pd.to_datetime('1900-01-01T16:00')  # exclude >

# Loop through the sample TAQ data dates available from NYSE and collect info
shareclass = []
daily = []

bins = {k:{} for k in ['effective', 'realized', 'impact', 'quoted', 'volume',
                       'offersize', 'bidsize', 'ret', 'retq', 'counts']}
import os
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import igraph  # pip3 install cairocffi
from igraph import Graph
from pandas.api import types
import numpy.ma as ma
from numpy.ma import masked_invalid as valid
from itertools import chain
from finds.graph import igraph_draw, igraph_info, igraph_path
from finds.graph import igraph_centrality, igraph_community
from finds.database import SQL
from settings import settings
sql = SQL(**settings['sql'])
logdir = os.path.join(settings['images'], 'supplychain')  # None

# Retrieve principal customers info
year = 2016
cust = sql.read_dataframe(
    f"select gvkey, cgvkey, stic, ctic, conm, cconm from customer"
    f" where srcdate >= {year}0101 and srcdate <= {year}1231")
    
# To lookup company full name from ticker
lookup = Series(cust['conm'].values, cust['stic'].values)\
         .append(Series(cust['cconm'].values, cust['ctic'].values))\
         .drop_duplicates()

# Construct Directed Graph
vertices = np.array(list(set(cust['stic']).union(set(cust['ctic']))))
Exemple #5
0
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import os
import time
from finds.database import SQL
from finds.busday import BusDay
from finds.structured import PSTAT, CRSP, Benchmarks
from finds.backtesting import EventStudy
from settings import settings

LAST_DATE = settings['crsp_date']

ECHO = True
sql = SQL(**settings['sql'], echo=ECHO)
user = SQL(**settings['user'], echo=ECHO)
bd = BusDay(sql)
keydev = PSTAT(sql, bd)
crsp = CRSP(sql, bd, rdb=None)
bench = Benchmarks(sql, bd)
eventstudy = EventStudy(user, bench, LAST_DATE)
outdir = os.path.join(settings['images'], 'events')

# event window parameters
end = 20201201
beg = 19890101  # 20020101
minobs = 250
left, right, post = -1, 1, 21

# str formatter to pretty print event and role description given their id's
        return _to_values(self.weeks['beg'].iloc[self.numwk(date) + weeks])

    def endwk(self, date, weeks=0):
        """Return ending business week date/s"""
        return _to_values(self.weeks['end'].iloc[self.numwk(date) + weeks])

    def ismonthend(self, date):
        """If date/s in last complete week in any month"""
        return _to_values(self.weeks['ismonthend'].iloc[self.numwk(date)])


if False:  # create custom busday trading dates
    from settings import settings
    from finds.database import SQL
    from finds.busday import BusDay
    sql = SQL(**settings['sql'], echo=True)
    busday = BusDay(sql, create=True)  # set create flag as True

if False:  # some unit tests
    from settings import settings
    from finds.database import SQL
    from finds.busday import Weekly
    sql = SQL(**settings['sql'], echo=True)
    wd = Weekly(sql, day=3, end=20201231)  # derive weekly trading calendar

    print(wd.numwk(20201230))
    print(wd.numwk(20210130))
    print(wd.numwk(20201231))
    print(wd.endwk([20201209, 20201219]))
    print(wd.endwk(20201209))
    print(wd.endmo([20201209, 20201219]))
"""
from settings import settings
import os
import glob
import time
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
from finds.database import SQL, Redis
from finds.busday import BusDay, Weekly
from finds.structured import PSTAT, CRSP, IBES, Benchmarks
from finds.structured import famafrench_sorts, chunk_signal
from finds.readers import fetch_FamaFrench
from finds.display import plot_date
sql = SQL(**settings['sql'], echo=True)
user = SQL(**settings['user'], echo=True)
rdb = Redis(**settings['redis'])
imgdir = os.path.join(settings['images'], 'monitor')

# Real-time updates
"""Monthly: update busdays and Fama-French research factors
bd = BusDay(sql, create=False)    # create=True to update busdays

bd = BusDay(sql)
bench = Benchmarks(sql, bd)
datasets = fetch_FamaFrench()
print("\n".join(f"[{i}] {d}" for i, d in enumerate(datasets)))
for name, item, suffix in datasets:
    df = fetch_FamaFrench(name=name, item=item, suffix=suffix,
                          index_formatter=bd.offset)
    for pathname in paths[1:]:
        df = pd.read_csv(os.path.join(pathname, 'prices.csv.gz'), sep='|')
        new = set(np.unique(df['ticker'])).difference(
            set(np.unique(prices['ticker'])))
        df = df[df['ticker'].isin(new)]
        prices = prices.append(df, sort=False)
        print(pathname, 'added prices', new)

        df = pd.read_csv(os.path.join(pathname, 'dividends.csv.gz'), sep='|')
        new = set(np.unique(df['ticker'])).difference(
            set(np.unique(dividends['ticker'])))
        df = df[df['ticker'].isin(new)]
        dividends = dividends.append(df, sort=False)
        print(pathname, 'added dividends', new)

    sql = SQL(**config.credentials['sql'], echo=config.ECHO)
    bd = BusDay(sql)
    crsp = CRSP(sql, bd, rdb=None)
    date = bd.offset(crsp_date)

    # get price and shrout as of last date
    price = crsp.get_section('daily', ['prc', 'shrout'],
                             'date',
                             date,
                             start=None)

    # get tickers to lookup permno
    tickers = crsp.get_section('names', ['tsymbol', 'date'],
                               'date',
                               date,
                               start=0).reindex(price.index)