예제 #1
0
def handleMissingEdgarIndex(edgar_addresses_dirpath, start_year=1993):
    print("Downloading EDGAR address tables to \"" + edgar_addresses_dirpath +
          "\"")
    if not os.path.exists(edgar_addresses_dirpath):
        os.mkdir(edgar_addresses_dirpath)
    import edgar
    edgar.download_index(edgar_addresses_dirpath, start_year)
예제 #2
0
def _download(year: int):
    #download large .tsv files containing all listed filings per qtr
    #Would be good to add a check on data_path for existing files to only download most up-to-date files
    if not os.path.exists(data_path):
        os.makedirs(data_path)
    edgar.download_index(data_path, year, skip_all_present_except_last=False)
    print("Downloading financial data")
    return os.listdir(data_path)
예제 #3
0
 def GetEdgarIndex(self):
     
     edgarpath =  os.path.expanduser(self.edgar_dir)
     year = self.start_year # need to set up data type check
     try:
         edgar.download_index(edgarpath, year)
     except Exception as err:
         print('Exception:', str(err))
예제 #4
0
 def _download_indexes(self, since_year: float) -> List[str]:
     """Downloads company indexes from Edgar into temporary directory"""
     # Begin download
     edgar.download_index(self.tmp_dir, since_year=since_year)
     # Retrieve the file paths downloaded
     fpaths = []
     for a, b, fnames in os.walk(self.tmp_dir):
         fpaths = [os.path.join(self.tmp_dir, f) for f in fnames]
         break
     self.logg.debug(f'Collected {len(fpaths)} files...')
     return sorted(fpaths)
예제 #5
0
    def test_edgar(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            print("created temporary directory", tmpdirname)
            edgar.download_index(tmpdirname, 2019)
            file_name = tmpdirname + "/2019-QTR1.tsv"

            with open(file_name, "r", encoding="utf-8") as f:
                first_line = f.readline()
                self.assertEqual(
                    first_line,
                    "1000045|NICHOLAS FINANCIAL INC|10-Q|2019-02-14|edgar/data/1000045/0001193125-19-039489.txt|edgar/data/1000045/0001193125-19-039489-index.html\n",
                )
예제 #6
0
    def get_index(self, since_year):
        """This function retrieves the complete index with all filing from the EDGAR SEC archives

        Parameters
        ----------
        since_year : int
            The year as int, since when the filings should be loaded (lowest is 1993)
        path : str, optional
            The path where the data should be saved, by default "./data/SEC/"
        """
        edgar.download_index(self.path,
                             since_year,
                             skip_all_present_except_last=False)
예제 #7
0
def main(argv):
    config_path = ''
    try:
        opts, args = getopt.getopt(argv, 'hc:', ['config_path='])
    except getopt.GetoptError:
        info()
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            info()
            sys.exit()
        elif opt in ('-c', '--config_path'):
            config_path = arg

    config = Config(config_path)
    edgar.download_index(config.master_path, config.since_year)
예제 #8
0
    if sys.version_info[0] < 3:
        raise Exception("Must be using Python 3")

    parser = ArgumentParser()
    parser.add_argument(
        "-y",
        "--from-year",
        type=int,
        dest="year",
        help="The year from which to start downloading " +
        "the filing index. Default to current year",
        default=datetime.date.today().year,
    )

    parser.add_argument(
        "-d",
        "--directory",
        dest="directory",
        help="A directory where the filing index files will" +
        "be downloaded to. Default to a temporary directory",
        default=tempfile.mkdtemp(),
    )

    args = parser.parse_args()

    logger.debug("downloads will be saved to %s" % args.directory)

    edgar.download_index(args.directory, args.year)
    logger.info("Files downloaded in %s" % args.directory)
예제 #9
0
def main():
    import edgar
    edgar.download_index("./data/index",
                         2015,
                         skip_all_present_except_last=False)
# Install edgar package if not not already installed using "pip install edgar"
import edgar

edgar.download_index('C:\\Users\\Jayashree RAMAN\\Documents\\DellDocs\\Capstone\\EDGAR_Downloads', 2018)
## Import required packages
## Install the packages first if required
import edgar
import pandas as pd
import requests
import re
import matplotlib.pyplot as plt

## Download the index file that consists list of companies with url to download the filings for a given year to the index_files folder
## Downloaded in tsv format
edgar.download_index("./index_files/", 2018)

## Convert the tsv to a dataframe
edgar_directories = pd.read_csv('./index_files/2018-QTR1.tsv',
                                sep='|',
                                header=None)

## Select only the 10K and 10Q filings
edgar_10q_10k = edgar_directories[(edgar_directories[2] == '10-Q') |
                                  (edgar_directories[2] == '10-K')]

## Write the filtered filings list to a csv file for future use
edgar_10q_10k.to_csv('EDGAR_10K_10Q.csv')

## Read the csv file to a dataframe
Edgar_10K_DF = pd.read_csv('./index_files/EDGAR_10K_10Q.csv')

## Rename the column headers
Edgar_10K_DF.columns = [
    'S.No', 'CompanyID', 'CompanyName', 'TypeOfFiling', 'DateFiled', 'TextURL',
    'HTMLURL'
예제 #12
0
#This will clean up the SEC tsv files after running the script and extracting the relevant information.
def purge_tsv_files():
    for file in tsv_files:
        os.remove(directory_path + file)
        print('{} deleted'.format(file))


#Downloads the list of all company files including the location of those files for us to look through.
if os.path.exists(directory_path + str(year) + '-' + 'QTR1.tsv') == True:
    print('No new files downloaded')
    tsv_files = os.listdir(directory_path)
    tsv_name_formatter()
else:
    edgar.download_index(directory_path,
                         year,
                         skip_all_present_except_last=False)
    print('Files downloaded successfully')
    tsv_files = os.listdir(directory_path)
    tsv_name_formatter()

#This for loop will take all the tsv files that were downloaded for the year and compile the different
#financial reports
for file in tsv_file_names:
    csv = pd.read_csv('/home/user/Documents/python_webscraper/' + file +
                      '.tsv',
                      sep='\t',
                      lineterminator='\n',
                      names=None)

    csv.columns.values[0] = 'Item'
예제 #13
0
def download_files(path):
    """
    Returns filings made by SEC-controlled companies.
    """
    return edgar.download_index(path, 2019, skip_all_present_except_last=False)
예제 #14
0
def get_indices(since_year_arg):
  if not os.path.exists("./indices"):
    os.makedirs("./indices")
  download_directory = "./indices/"
  since_year = since_year_arg
  edgar.download_index(download_directory, since_year)
예제 #15
0
    parser.add_argument(
        "-ua",
        "--user-agent",
        dest="ua",
        help="The User Agent to set. This must be set properly "
        + "else the SEC may temporarily ban you. See https://www.sec.gov/os/accessing-edgar-data"
    )

    parser.add_argument(
        "-s",
        "--skip-all-present-except-last",
        action="store_true",
        dest="skip",
        help="Specify this flag to skip downloading filing index"
        + " files that are already present. Only the most recent"
        + " file is downloaded. If not specified all files are"
        + " downloaded again."
    )
    
    args = parser.parse_args()

    if args.ua is None:
        logger.error("A user agent is required. See https://www.sec.gov/os/accessing-edgar-data")
        sys.exit(1)
        
    logger.debug("downloads will be saved to %s" % args.directory)

    edgar.download_index(args.directory, args.year, args.ua, args.skip)
    logger.info("Files downloaded in %s" % args.directory)
예제 #16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: yxy
"""

import edgar

# Download filings strating from 1994
edgar.download_index("/Users/yxy/Downloads/secfilings", 1994)
예제 #17
0
import argparse
import edgar
import pdfkit

def update_index()
  edgar.download_index(dir, since_year)



def __main__():
  parser = argparse.ArgumentParser(description='Interact with EDGAR https://www.sec.gov/edgar.shtml')
  parser.add_argument('--config', help='Path to an edgarcli config file')

  subparsers = parser.add_subparsers(dest='subcommand')
  download_parser = subparsers.add_parser('download', help='download filings from EDGAR')
  search_parser = subparsers.add_parser('search', help='search stuff from EDGAR')

  download_parser.add_argument('cik', help='CIK number')
  download_parser.add_argument()

  args = parser.parse_args()
  print(args.accumulate(args.integers))
예제 #18
0
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "-y",
        "--from-year",
        type=int,
        dest="year",
        help='The year from which to start downloading ' +
             'the filing index. Default to current year',
        default=datetime.date.today().year)

    parser.add_argument(
        "-d",
        "--directory",
        dest="directory",
        help='A directory where the filing index files will' +
             'be downloaded to. Default to a temporary directory',
        default=tempfile.mkdtemp())

    args = parser.parse_args()

    logger.debug("downloads will be saved to %s" % args.directory)

    edgar.download_index(args.directory, args.year)
    logger.info("Files downloaded in %s" % args.directory)