Esempio n. 1
0
    def get_10k(self):
        file_dir = os.getcwd() + '/filings/' + self.ticker + '/10-k/'
        file_name = ""

        if os.path.isdir(file_dir):
            # first file is the most recently downloaded
            if len(os.listdir(file_dir)) > 0:
                for file in os.listdir(file_dir):
                    file_name = file
                return file_dir + file_name

        try:
            file_dir = os.getcwd() + '/filings/'
            my_filings = Filing(cik_lookup=self.ticker,
                                filing_type=FilingType.FILING_10K,
                                count=self.NUM_10Ks)
            my_filings.save(file_dir)
            print(self.ticker + " 10k downloaded")
            file_dir += self.ticker + '/10-k/'
            for file in os.listdir(file_dir):
                file_name = file
            return file_dir + file_name

        except OSError as err:
            print("OS error: {0}".format(err))
            print('Unable to download ' + self.ticker + ' 10k!')
            return None
Esempio n. 2
0
def filing(ctx, lookups, ftype, start, end, count, directory):
    r"""Click command for downloading filings. Run ``secedgar filing --help`` for info.

    \f

    Args:
        ctx (click.core.Context): Click context.
        lookups (str): Companies and tickers to include in filing download.
        ftype (str): String of FilingType enum.
        start (str): Start date for filings in YYYYMMDD format.
            Will implicitly default to first available filing.
        end (str): End date for filings in YYYYMMDD format.
            Will implicitly default to today.
        count (int): Number of filings to save per ticker/company.
        directory (str): Directory where files should be saved.
            Defaults to current working directory.

    Returns:
        None
    """
    # If given filing type is not valid enum, raise FilingTypeError
    try:
        ftype = FilingType[ftype]
    except KeyError:
        raise FilingTypeError()

    f = Filing(cik_lookup=lookups,
               filing_type=ftype,
               start_date=date_cleanup(start),
               end_date=date_cleanup(end),
               count=count,
               user_agent=ctx.obj['user_agent'])
    f.save(directory=directory)
Esempio n. 3
0
def get_most_recent_10_K(firms, fp):
    for firm in firms:
        my_filings = Filing(cik_lookup=firm,
                    filing_type=FilingType.FILING_10K,
                    count=1)
        my_filings.save(fp)
    return
Esempio n. 4
0
def filing(lookups, ftype, start, end, count, directory):
    """Click command for downloading filings. Run ``secedgar filing --help`` for info."""
    # If given filing type is not valid enum, raise FilingTypeError
    try:
        ftype = FilingType[ftype]
    except KeyError:
        raise FilingTypeError()

    f = Filing(cik_lookup=lookups,
               filing_type=ftype,
               start_date=date_cleanup(start),
               end_date=date_cleanup(end),
               count=count)
    f.save(directory=directory)
Esempio n. 5
0
def run(df):
    cik = list(df['CIK'])
    names = list(df['Name'])
    for c, n in zip(cik, names):
        if len(str(c)) < 10:
            missing = 10 - len(str(c))
            temp = ("0" * missing) + str(c)
            print("SCRAPING {} ...".format(temp))
        my_filings = Filing(cik=temp, filing_type=FilingType.FILING_10K
                            )  # 10-Q filings for Apple (NYSE: AAPL)
        try:
            my_filings.save(
                './filings/'
            )  # Saves last 15 10Q reports from AAPL to ~/path/to/dir
        except ValueError:
            print("No {}".format(n))
Esempio n. 6
0
 def test_filing_raises_warning_when_less_filings_than_count(self,
                                                             recwarn,
                                                             count,
                                                             raises_error,
                                                             tmp_data_directory,
                                                             mock_cik_validator_get_single_cik,
                                                             mock_single_cik_filing_limited_responses):  # noqa:E501
     f = Filing(cik_lookup=['aapl', 'msft', 'amzn'], filing_type=FilingType.FILING_10Q,
                count=count, client=NetworkClient(batch_size=10))
     f.save(tmp_data_directory)
     if raises_error:
         w = recwarn.pop(UserWarning)
         assert issubclass(w.category, UserWarning)
     else:
         try:
             w = recwarn.pop(UserWarning)
             pytest.fail("Expected no UserWarning, but received one.")
         # Should raise assertion error since no UserWarning should be found
         except AssertionError:
             pass
Esempio n. 7
0
def download_filing_4(symbol, data_filings_path,
                      start_date=datetime(2019, 7, 1),
                      end_date=datetime(2020, 6, 30)):
    """
    Download Form 4 from SEC
    Downloads all the info about the form 4 in multiple txt files.
    TODO:
    Look if it overwrites.
    Save a file with metadata of already looked dates.
    Create a folder for the symbol and for the looked filing
    """
    current_path = os.path.join(data_filings_path, symbol.lower())
    if not(os.path.exists(current_path)):
        filing = Filing(cik_lookup=symbol.lower(),
                        filing_type=FilingType.FILING_4,
                        start_date=start_date,
                        end_date=end_date)
        filing.save(data_filings_path)
    else:
        print("Already downloaded")
Esempio n. 8
0
 def test_filing_raises_warning_when_less_filings_than_count(
         self, monkeypatch, recwarn, count, raises_error,
         tmp_data_directory):
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorGetCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response",
                         MockSingleCIKFilingLimitedResponses(10))
     f = Filing(cik_lookup=['aapl', 'msft', 'amzn'],
                filing_type=FilingType.FILING_10Q,
                count=count,
                client=NetworkClient(batch_size=10))
     f.save(tmp_data_directory)
     if raises_error:
         w = recwarn.pop(UserWarning)
         assert issubclass(w.category, UserWarning)
     else:
         try:
             w = recwarn.pop(UserWarning)
             pytest.fail("Expected no UserWarning, but received one.")
         # Should raise assertion error since no UserWarning should be found
         except AssertionError:
             pass
Esempio n. 9
0
 def test_save_no_filings_raises_error(self, tmp_data_directory,
                                       monkeypatch, no_urls):
     monkeypatch.setattr(Filing, "get_urls", lambda x: no_urls)
     f = Filing(cik_lookup='aapl', filing_type=FilingType.FILING_10K)
     with pytest.raises(ValueError):
         f.save(tmp_data_directory)
Esempio n. 10
0
 def test_filing_save_multiple_ciks(self, tmp_data_directory,
                                    mock_cik_validator_get_multiple_ciks,
                                    mock_single_cik_filing,
                                    mock_filing_response):
     f = Filing(["aapl", "amzn", "msft"], FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
nest_asyncio.apply()

# In[19]:

from secedgar.filings import Filing, FilingType

# 10Q filings for Apple (ticker "aapl")
from secedgar.filings import Filing, FilingType

my_filings = Filing(cik_lookup=['gme'],
                    filing_type=FilingType.FILING_10K,
                    count=1326380,
                    user_agent='deeptendies')

my_filings.save('filings')

# # Parse Data

# In[1]:

import glob
from secedgar.parser import MetaParser
from pathlib import Path

out_dir = 'parsed_filings'
Path(out_dir).mkdir(parents=True, exist_ok=True)

directory = 'filings'
for filepath in glob.iglob('filings/*/*/*.txt'):
    print(filepath)
Esempio n. 12
0
from secedgar.filings import Filing, FilingType
import os
from tqdm import tqdm

f = open('tickers.txt', 'r')
tickers = []

for x in f:
    tickers.append(x)

for ticker in tqdm(tickers):
    ticker = ticker[:-1]
    try:
        file_dir = os.getcwd() + '/filings/'
        my_filings = Filing(cik_lookup=ticker,
                            filing_type=FilingType.FILING_10K,
                            count=1)
        my_filings.save(file_dir)
        print(ticker + " 10k downloaded")

    except OSError as err:
        print("OS error: {0}".format(err))
        print('Unable to download ' + ticker + ' 10k!')
Esempio n. 13
0
from secedgar.utils import get_cik_map
from secedgar.filings import Filing, FilingType
print(list(get_cik_map().items())[:5])
my_filings = Filing(cik_lookup='aapl',
                    filing_type=FilingType.FILING_10Q,
                    count=1)
my_filings.save('/home/sroot/kaizha/temp')
Esempio n. 14
0
from secedgar.filings import Filing, FilingType

# 10Q filings for Apple (ticker "aapl")

my_filings = Filing(cik_lookup='aapl',
                    filing_type=FilingType.FILING_10Q,
                    count=15)

my_filings.save('./data')
    def __get_data(self, cik, filing_type, data_set):
        result = pd.DataFrame()
        filing_word_count = dict()
        my_filings = Filing(cik=str(cik), filing_type=filing_type)
        path = f'../data/company_filings/{cik}_{filing_type.value}/'
        if not os.path.exists(path):
            try:
                print(
                    f'Fetching data for cik={cik}, filing_type={filing_type}')
                my_filings.save(path)
            except:
                try:
                    if os.path.exists(path):
                        shutil.rmtree(path)
                except OSError as e:
                    print("Error: %s : %s" % (path, e.strerror))
        else:
            print(f'Skipping data fetching. Using cache at {path}')
        for subdir, dirs, files in os.walk(path):
            for file in files:
                file_metadata = self.__get_file_metadata(f'{subdir}/{file}')
                for url in my_filings.get_urls():
                    if url.rsplit('/')[-1].strip() == file:
                        file_metadata['url'] = url
                        break
                assert len(
                    file_metadata
                ) == 8, "Could not get all relevant metadata: %r" % file_metadata
                if file_metadata['year'] < 2007 or \
                        (file_metadata['form_type'] != '10-K' and file_metadata['form_type'] != '10-Q'):
                    print(
                        f'Skipping file. year={file_metadata["year"]} form_type={file_metadata["form_type"]}'
                    )
                    continue
                violations_in_file, local_word_count = self.__get_violations_for_file(
                    f'{subdir}/{file}')

                file_info = {
                    'cik':
                    cik,
                    'firm name':
                    file_metadata['company_name'],
                    'firm address':
                    file_metadata['address'],
                    'zip code':
                    str(file_metadata['zip']),
                    'year':
                    file_metadata['year'],
                    'quarter':
                    file_metadata['quarter']
                    if filing_type is FilingType.FILING_10Q else None,
                    'url':
                    file_metadata['url'],
                    'filing type':
                    filing_type.value,
                    'dataset':
                    data_set,
                    'has covenant violation':
                    0 if violations_in_file == 0 else 1,
                    'total violations':
                    violations_in_file
                }
                result = result.append(pd.DataFrame(file_info, index=[0]))
                for word in local_word_count:
                    if word in filing_word_count:
                        filing_word_count[word] = filing_word_count[
                            word] + local_word_count[word]
                    else:
                        filing_word_count[word] = local_word_count[word]
        return result, filing_word_count
Esempio n. 16
0
 def test_filing_simple_example(self, tmp_data_directory):
     my_filings = Filing(cik_lookup='IBM', filing_type=FilingType.FILING_10Q)
     my_filings.save(tmp_data_directory)
Esempio n. 17
0
# https://github.com/coyo8/sec-edgar
# pip install secedgar
from secedgar.filings import CIKLookup, Filing, FilingType

lookup = '0000320193'
lookups = CIKLookup(['aapl', 'msft', 'Facebook'])

my_filings = Filing(cik_lookup=lookup, filing_type=FilingType.FILING_10Q)

my_filings.save('tempdir')
Esempio n. 18
0
 def test_filing_save_single_cik(self, tmp_data_directory,
                                 mock_cik_validator_get_single_cik,
                                 mock_single_cik_filing):
     f = Filing('aapl', FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
Esempio n. 19
0
 def test_filing_save_multiple_ciks(self, tmp_data_directory,
                                    mock_cik_validator_get_multiple_ciks,
                                    mock_single_cik_filing):
     f = Filing(['aapl', 'amzn', 'msft'], FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
Esempio n. 20
0
 def test_filing_save_multiple_ciks(self, tmp_data_directory, monkeypatch):
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorMultipleCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     f = Filing(['aapl', 'amzn', 'msft'], FilingType.FILING_10Q, count=3)
     f.save(tmp_data_directory)
Esempio n. 21
0
def get_mda(id):
    def normalize_text(text):
        """Normalize Text
        """
        text = unicodedata.normalize("NFKD", text)  # Normalize
        text = '\n'.join(text.splitlines())  # Unicode break lines

        # Convert to upper
        text = text.upper()  # Convert to upper

        # Take care of breaklines & whitespaces combinations due to beautifulsoup parsing
        text = re.sub(r'[ ]+\n', '\n', text)
        text = re.sub(r'\n[ ]+', '\n', text)
        text = re.sub(r'\n+', '\n', text)

        # To find MDA section, reformat item headers
        text = text.replace('\n.\n', '.\n')  # Move Period to beginning

        text = text.replace('\nI\nTEM', '\nITEM')
        text = text.replace('\nITEM\n', '\nITEM ')
        text = text.replace('\nITEM  ', '\nITEM ')

        text = text.replace(':\n', '.\n')

        # Math symbols for clearer looks
        text = text.replace('$\n', '$')
        text = text.replace('\n%', '%')

        # Reformat
        text = text.replace('\n', '\n\n')  # Reformat by additional breakline

        return text

    def find_mda_from_text(text, start=0):
        """Find MDA (Management Discussion and Analysis) section from normalized text
        Args:
            text (str)s
        """
        debug = False

        mda = ""
        end = 0

        # Define start & end signal for parsing
        item7_begins = [
            '\nITEM 7.', '\nITEM 7 –', '\nITEM 7:', '\nITEM 7 ', '\nITEM 7\n'
        ]
        item7_ends = ['\nITEM 7A']
        if start != 0:
            item7_ends.append('\nITEM 7')  # Case: ITEM 7A does not exist
        item8_begins = ['\nITEM 8']
        """
        Parsing code section
        """
        text = text[start:]

        # Get begin
        for item7 in item7_begins:
            begin = text.find(item7)
            if debug:
                print(item7, begin)
            if begin != -1:
                break

        if begin != -1:  # Begin found
            for item7A in item7_ends:
                end = text.find(item7A, begin + 1)
                if debug:
                    print(item7A, end)
                if end != -1:
                    break

            if end == -1:  # ITEM 7A does not exist
                for item8 in item8_begins:
                    end = text.find(item8, begin + 1)
                    if debug:
                        print(item8, end)
                    if end != -1:
                        break

            # Get MDA
            if end > begin:
                mda = text[begin:end].strip()
            else:
                end = 0

        return mda, end

#     df_names = pd.read_csv('master/stocknames_form10k.csv')

#     df_names.drop_duplicates(subset=['gvkey'], inplace = True)

    df_names = pd.read_pickle('df_company.pkl')

    gvkeys = df_names['gvkey'].values[id * 14:(id + 1) * 14]
    names = df_names['CoName'].values[id * 14:(id + 1) * 14]

    for j in range(len(gvkeys)):
        my_filings = Filing(cik_lookup=names[j],
                            filing_type=FilingType.FILING_10K,
                            start_date=dt.datetime(2010, 1, 1),
                            end_date=dt.datetime(2020, 12, 31))

        try:
            my_filings.save('Corpus_10k')
        except:
            continue

        company = names[j]

        files = glob.glob('Corpus_10k/' + company + '/10-k/*')
        files.sort()

        df = pd.DataFrame()
        for i in files:
            print(i)
            try:
                with open(i) as f:
                    content = f.read()
            except:
                continue
            try:
                soup = bs4.BeautifulSoup(content, "html.parser")
            except:
                continue
            text = soup.get_text("\n")
            text = normalize_text(text)
            mda, end = find_mda_from_text(text)
            if mda and len(mda.encode('utf-8')) < 1000:
                mda, _ = find_mda_from_text(text, start=end)
            if len(mda.encode('utf-8')) < 1000:
                continue
            df = df.append(
                pd.DataFrame({
                    'company': [company],
                    'filename': [i],
                    'mda': [mda]
                }))

        df.to_pickle('Corpus_mda/' + str(gvkeys[j]) + '_mda.pkl')
        shutil.rmtree('Corpus_10k/' + company)

    return 0
Esempio n. 22
0
 def test_filing_save_single_cik(self, tmp_data_directory, monkeypatch):
     f = Filing('aapl', FilingType.FILING_10Q, count=3)
     monkeypatch.setattr(_CIKValidator, "get_ciks",
                         MockCIKValidatorGetCIKs.get_ciks)
     monkeypatch.setattr(NetworkClient, "get_response", MockSingleCIKFiling)
     f.save(tmp_data_directory)
Esempio n. 23
0
from secedgar.filings import Filing, FilingType
from secedgar.utils import get_cik_map
import pandas as pd
import os

path = 'E:\stockdata3\Filings'
filing = Filing(cik_lookup='msft', filing_type=FilingType.FILING_10K, count=10)
filing.save(path)
pass