예제 #1
0
    def read(self):

        #edgar = edgar.Edgar()
        #print(edgar.getCikByCompanyName("APPLE INC"))
        companies = [('APPLE INC.', '0000320193'),
                     ('GOOGLE INC.', '0001288776')]

        for c in companies:
            culm = [0, 0]
            strArray = c[0].split()
            name = self.getName(c[0])
            company = edgar.Company(c[0], c[1])
            tree = company.getAllFilings(filingType='8-K')
            docs = edgar.getDocuments(tree, noOfDocuments=5)
            for doc in docs:
                #print('***' + str(c) + '***')
                totals = phraseFreq(doc)
                culm[0] += totals[0]
                culm[1] += totals[1]
            print('SEC: ' + str(c) + ' Totals: ' + str(culm))
            decision = 'Hold'
            if culm[0] + culm[1] >= 6:
                decision = 'Buy'
            elif culm[0] + culm[1] <= -6:
                decision = 'Sell'
            #print(str(c) + ': ' + decision)
            self.companyMap[name] = culm
예제 #2
0
def extract_docs(comp_name, cik):
    company = edgar.Company(comp_name, cik)
    tree = company.getAllFilings(filingType="10-K")
    docs = edgar.getDocuments(tree, noOfDocuments=3)
    if ("This application relies heavily on JavaScript, you" in docs[0]):
        return docs[1]
    else:
        return docs[0]
예제 #3
0
def getUSFinanceData(cmd=None):
    import sys
    import os
    import django
    import edgar
    getConfig()
    sys.path.append(django_path)
    sys.path.append(main_path)
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "MainBoard.settings")
    django.setup()
    import detective_app.models as detective_db
    # workDir = r'%s'

    yyyymmdd = str(datetime.now())[:10]
    # url = "http://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp"
    reportType = {
        # 101: 'snapshotUS',
        103: 'financeReportUS'
        # 104: 'financeRatioUS'
    }  # 101 : snapshot, 103 : financeReport, 104 : financeRatio
    # urlInfo = {
    #     101: 'http://comp.fnguide.com/SVO2/ASP/SVD_Main.asp',
    #     103: 'http://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp',
    #     104: 'http://comp.fnguide.com/SVO2/ASP/SVD_FinanceRatio.asp'
    # }
    # url = 'https://finance.yahoo.com/quote/%s/financials?p=%s'

    # stockInfo = detective_db.USStocks.objects.filter(listing='Y')
    stockInfo = detective_db.USStocks.objects.filter(security='Oracle Corp.',
                                                     listing='Y')
    # stockInfo = detective_db.Stocks.objects.filter(code='005930', listing='Y')
    for key in reportType.keys():
        # print(cmd, cmd and key != cmd)
        if cmd and key != cmd:
            continue
        # workDir = r'%s\%s\%s' % (report_path, reportType[key], yyyymmdd)
        workDir = r'C:\Github\Waver\detective\reports\%s\%s' % (
            reportType[key], yyyymmdd)
        if not os.path.exists(workDir):
            os.makedirs(workDir)
        for s in stockInfo:
            company = edgar.Company(s.security, s.cik)
            tree = company.getAllFilings(filingType="10-K")
            docs = edgar.getXMLDocuments(tree, noOfDocuments=1)
            # print(docs)
            for xml in docs:
                saveFile(workDir, s.cik, s.security.replace(' ', '_'),
                         reportType[key], xml, 'w')
예제 #4
0
def get10KByNameAndCIK(companyName, CIKNumber, noOfDocuments=1):
    """
    @companyName: the name of the company
    @CIKNumber: the cik number of the company
    @noOfDocuments: the number of differenct files for different years
    @return: the cleared file, decode from unicode and remove white space, in a list
    
    The user have to provide the CIK number by themselves
    """
    company = edgar.Company(companyName, CIKNumber)
    tree = company.getAllFilings(filingType="10-K")
    docs = edgar.getDocuments(tree, noOfDocuments=noOfDocuments)
    if isinstance(docs, list):
        file_lists = [clear_file(doc) for doc in docs]
        return file_lists
    else:
        return clear_file(docs)
예제 #5
0
companies

ciks = df['CIK']
ciks

docs = df['DOC']
docs

dfout = pd.DataFrame(columns=['DATE', 'CIK', 'DOC' , 'POLARITY' , 'SUBJECTIVITY'])
dfnull = pd.DataFrame(columns=['DATE', 'CIK', 'DOC'])
dfout
dfnull

itlen = len(df.index)
for x in range(itlen):
  company = edgar.Company(str(companies[x]), str(ciks[x]))
  tree = company.getAllFilings(filingType = str(docs[x]), priorTo = str(dates[x]))
  filings = edgar.getDocuments(tree, noOfDocuments=1)
  filingstr = str(filings)
  article = TextBlob(filingstr)
  articlepolarity = article.sentiment.polarity
  articlesubjectivity = article.sentiment.subjectivity
  if(articlepolarity != 0 and articlesubjectivity != 0):
    dfout = dfout.append({'DATE': dates[x], 'CIK': ciks[x], 'DOC': docs[x], 'POLARITY' : articlepolarity, 'SUBJECTIVITY' : articlesubjectivity}, ignore_index = True)
  if(articlepolarity == 0 and articlesubjectivity == 0):
    dfnull = dfnull.append({'DATE' : dates[x], 'CIK' : ciks[x], 'DOC' : docs[x]}, ignore_index = True)

dfout

dfnull
예제 #6
0
import edgar

"""https://pypi.org/project/edgar/

This url contains the documentation for the edgar package used to retrieve SEC filings.

**The code block below consists of three variable assignments which will retrieve the specified data.**

The first line of code uses the *Company* function which requires two parameters: company name and CIK. The variable *company* is set to the company of interest using these two fields.

The second line of code uses the *getAllFilings* function which returns a *lxml.html* form. There are four possible parameters for this function (filingType, priorTo, ownership, noOfEntries), however given the structure of the code block only the first two parameters are required in this instance. The *filingType* parameter can be specified for the type of document to be retrieved (10-K, S-8, 8-K). The *priorTo* parameter must be specified in a string as *YYYY-MM-DD*.

The third line of code uses the *getDocuments* function of the *Edgar*-class which returns a list of strings in which each string contains the body of a specified document. The first parameter of this function takes a *lxml.html* form which is returned from the *getAllFilings* function from the line prior. The second parameter is *noOfDocuments* which is the number of documents to be returned; for usage in this IDE, this number should always be set to 1 to avoid crashing.
"""

company = edgar.Company("AMAZON COM INC", "0001018724")
tree = company.getAllFilings(filingType = "10-K", priorTo = '2014-12-31')
docs = edgar.getDocuments(tree, noOfDocuments=1)

print(tree)

print(str(docs))

docstr = str(docs)
type(docs)

article = TextBlob(docs)

articlepolarity = article.sentiment.polarity
articlesubjectivity = article.sentiment.subjectivity
print(articlepolarity)
예제 #7
0
def get_edgar_filing_text(comp_tuples,f_type,n_docs,file_dir,dates_dir):
    '''
    ---------------------------------------------------------------------------
    Scraping function to get the text from company filings from EDGAR. 
    
    Inputs:
        
    - comp_tuples : A list with pairwise company tuples. The first element must 
                    be a string with the company name as listed on the EDGAR
                    database. The second element must be a string with the CIK
                    identifier. See get_sp500_tickers_cik_industry(argin) to 
                    easily get the tuples from the S&P500 listed firms.
                
    - f_type      : A string with the filing type.
    
    - n_docs      : Number of filings to be fetched, in descending order, i.e.
                    n_docs = 3 will fetch the three newest filings of type 
                    f_type. As a double integer.
                    
    - file_dir    : The master directory where all filings are to be saved. As
                    a string.
                    
    - dates_dir   : The master directory where all filing dates are saved. If 
                    a directory is missing, the function will instead scrape 
                    the dates using get_edgar_filing_date(argin), and create a 
                    new folder with the dates.
                    
    Example: 
        
    comp_tuples = [['APPLE INC'     , '0000320193'],
                   ['MCDONALDS CORP', '0000063908'],
                   ['MICROSOFT CORP', '0000789019']]

    f_type      = '10-K'     [Or '10-Q']
    
    n_docs      = 3
    
    file_dir    = 'C:\\Users\\Tobias\\Dropbox\\Master\\Text Reports U.S.'
    
    dates_dir   = 'C:\\Users\\Tobias\\Dropbox\\Master\\Dates Reports U.S' 
                   
    get_edgar_filing_text(comp_tuples,f_type,n_docs,file_dir,dates_dir)
    ---------------------------------------------------------------------------
    '''
    
    print('Fetching data...')
    print('-'*80+ '\n')
    for idx, comp_tuple in enumerate(comp_tuples):
        comp = edgar.Company(comp_tuple[0],comp_tuple[1])
        tree = comp.getAllFilings(filingType = f_type)
        docs = edgar.getDocuments(tree, noOfDocuments=n_docs)
        
        # Now that we have the filings, find get the filing dates for each 
        # document. If we have them already, then great, let's load them. If 
        # not, call get_edgar_filing_date to get them for this company.
        if not os.path.exists(dates_dir+ '\\' + f_type + '\\' + comp_tuple[0]):
            print(('\nCannot find the dates for ' + comp_tuple[0] +
                   '. Attempts to download them...'))
            get_edgar_filing_date([comp_tuple],f_type,dates_dir)
        else:
            os.chdir(dates_dir + '\\' + f_type + '\\' + comp_tuple[0])
            with open(comp_tuple[0] + '.pickle', 'rb') as file:
                dates = pickle.load(file)
                dates = dates[:n_docs]
        if not os.path.exists(file_dir + '\\' + f_type + '\\'+comp_tuple[0]):
            os.mkdir(file_dir + '\\' + f_type +'\\'+comp_tuple[0])
        os.chdir(file_dir + '\\' + f_type +'\\'+comp_tuple[0])
        for date, doc in zip(dates,docs):
           f = open(date.replace('.pickle','')+'.txt','w',encoding='utf8')
           f.write(str(doc))
           f.close()
#            with open(date + '.pickle' , 'wb') as file:
#                pickle.dump(str(doc), file)
        mes=('Status: '+str( int((idx+1)/len(comp_tuples)*100) )+ '% done')
        sys.stdout.write('\r'+mes) 
예제 #8
0
파일: mf.py 프로젝트: charlesyuyue/AFP
def countStatesApperance(doc):
    table = {}
    for word in doc.split():
        if word in states:
            table[word] = 1
    return len(table)


ed = edgar.Edgar()

c = "785814"
c = c.zfill(10)

#n = ed.getCompanyNameByCik(c)
company = edgar.Company("INTEGRATED HEALTH SVCS INC", c)
tree = company.getAllFilings(filingType="10-K")
docs = edgar.getDocuments(tree, noOfDocuments=30)
with io.open("C:/Users/William/Desktop/Output.txt", "w",
             encoding="utf-8") as f:
    f.write(docs[0])
if len(docs) > 0:
    print(countStatesApperance(docs[0]))
    print(extractYear(docs[0]))

# For each 10-K
# determine which fiscal year it is

#with io.open("C:/Users/William/Desktop/Output.txt", "w", encoding="utf-8") as f:
#    f.write(docs[0])
예제 #9
0
import edgar
edgar = edgar.Edgar()
possible_companies = edgar.findCompanyName("Cisco System")
print(possible_companies)

#get Oracle Corp's last 5 form 10-K's
company = edgar.Company("Oracle Corp", "0001341439")
tree = company.getAllFilings(filingType="10-K")
docs = edgar.getDocuments(tree, noOfDocuments=5)
#docs is an array of strings, each one is the full text doc

#SIC CODES
url = "https://www.sec.gov/info/edgar/siccodes.htm"
#Developer page
#https://www.sec.gov/developer
예제 #10
0
# change Ticker into CIK
cik_dict = {}
for ticker in DEFAULT_TICKERS:
    f = requests.get(URL.format(ticker), stream=True)
    results = CIK_RE.findall(f.text)
    if len(results):
        cik = str(results[0])
        cik_dict[str(ticker).upper()] = str(results[0])
print(cik_dict)

# Use edgar to get text compilation of the lxml
# Get Company name from CIK
edgar1 = edgar.Edgar()
cmp_name = edgar1.getCompanyNameByCik(results[0])
print(cmp_name)
company = edgar.Company(cmp_name, cik)

# Creating filename and url structure
file_name = [
    f for f in os.listdir(out_path)
    if os.path.isfile(os.path.join(out_path, f))
]
switched_filename = file_name[0]
switched_filename = switched_filename.replace('-', '').replace(
    '.txt', '/index.json')
print(switched_filename)
print(file_name)

bare_url = r"https://www.sec.gov/Archives/edgar/data/"
base_url = r"https://www.sec.gov"
documents_url = bare_url + str(results[0]) + "/" + switched_filename
예제 #11
0
def annual_filings(name, ID, year, doc_num):
    '''This function finds the correct document'''
    # get filings from package
    company = edgar.Company(name, ID)
    tree = company.getAllFilings(filingType="10-K")
    doc = edgar.getDocuments(tree, noOfDocuments=14)
    year = int(year)

    # search for the right document year
    d = doc_num
    while d < len(doc):

        filing = (re.sub('\\xa0|\\n', ' ', doc[d]))
        # see if the document is amended
        if '10-K/A' in filing[1:15]:
            d += 1

        # if in the right fiscal year and remove the new lines and break
        elif re.search(
                r'FOR THE FISCAL YEAR ENDED\s*[0-9]*\s*[A-Z]*\s*' + str(year) +
                '|FOR THE\s*(FISCAL)? YEAR ENDED\s*(Commission File Number)?\W?\s*[A-Z]*\s*[0-9]*,\s*'
                + str(year), filing, re.IGNORECASE):
            filing = filing.replace('\n', '').replace('\t', '').replace(
                '\r', '').replace('Contents', ' ').upper().split('ITEM ')
            break

        elif re.search(
                r'FOR THE FISCAL YEAR ENDED\s*[0-9]*\s*[A-Z]*\s*' +
                str(year + 1) +
                '|FOR THE\s*(FISCAL)? YEAR ENDED\s*(Commission File Number)?\W?\s*[A-Z]*\s*[0-9]*,\s*'
                + str(year + 1), filing, re.IGNORECASE):
            d += 1

        elif re.search(
                r'FOR THE FISCAL YEAR ENDED\s*[0-9]*\s*[A-Z]*\s*' +
                str(year - 1) +
                '|FOR THE\s*(FISCAL)? YEAR ENDED\s*(Commission File Number)?\W?\s*[A-Z]*\s*[0-9]*,\s*'
                + str(year - 1), filing, re.IGNORECASE):
            d -= 1
        # for Google doc #
        elif re.search(
                r'FOR THE\s*(FISCAL)? YEAR ENDED\s*\W?\s*[A-Z]*\s*[0-9]*,\s*' +
                str(year - 2), filing, re.IGNORECASE):
            d -= 2

        else:
            return name, None

    new_doc = []
    start = []
    stop = []

    # remove characters from filing
    for item in filing:
        new_doc.append(re.sub('\\xa0*|(?<=[7-8])\W?s?', '', item))
    for i in range(len(new_doc)):
        if re.search(
                r'7\s*\W?\.?\s*(AND 7A.)?(COMBINED)?\s*MANAGEMENT\W?\s*S\s*DISCUSSION\s*AND\s*ANALYSIS\s*OF\s*(CONSOLIDATED)?\s*FINANCIAL\s*CONDITION\S?\s*AND\s*RESULTS\s*OF\s*OPERATION\S?',
                new_doc[i]):
            start.append(i)
        if re.search(
                r'7\s*\W?\.?\s*MANAGEMENT\W?\s*S\s*DISCUSSION\s*AND\s*ANALYSIS\s*OF\s*RESULTS\s*OF\s*OPERATIONS\s*AND\s*FINANCIAL\s*CONDITION',
                new_doc[i]):
            start.append(i)
        if re.search(r'9\s*\W?\w?\.?\s*\W?\s*CONTROLS\s*AND\s*PROCEDURES',
                     new_doc[i]):
            stop.append(i)

    return name, ID, start, stop, d
예제 #12
0
def MDA(name, ID, start_index, stop_index, doc_num, year):
    '''This function returns the MDA text of the 10-k filing using the indices provided and the correct document.'''
    # get filings from package
    company = edgar.Company(name, ID)
    tree = company.getAllFilings(filingType="10-K")
    doc = edgar.getDocuments(tree, noOfDocuments=doc_num + 1)

    if name == 'General Electric' and year > 2013:
        MDA = (re.sub('\\xa0|\\n', ' ', doc[doc_num]))
        MDA = MDA.replace('\n',
                          '').replace('\t', '').replace('\r', '').replace(
                              'Contents',
                              ' ').split('GE ' + str(year) + ' FORM 10-K')

        new_doc = []
        for item in MDA:
            new_doc.append(re.sub('\\xa0*|(?<=[7-8])\W?s?', '', item))

        for i in range(len(new_doc)):
            if re.search(
                    r'MANAGEMENT\W?\s*S\s*DISCUSSION\s*AND\s*ANALYSIS\s*OF\s*FINANCIAL\s*CONDITION\s*AND\s*RESULTS\s*OF\s*OPERATIONS\s*\(MD',
                    new_doc[i]):
                start_index = i
            if re.search(
                    r'MANAGEMENT\W?\s*S\s*ANNUAL\s*REPORT\s*ON\s*INTERNAL\s*CONTROL\s*OVER\s*FINANCIAL\s*REPORTING',
                    new_doc[i]):
                stop_index = i

        MDA_text = new_doc[start_index:stop_index]

        return name, MDA_text

    # create the same format used from the function above
    else:
        filing = (re.sub('\\xa0|\\n', ' ', doc[doc_num]))
        filing = filing.replace('\n', '').replace('\t', '').replace(
            '\r', '').replace('Contents', ' ').upper().split('ITEM ')
        new_doc = []
        for item in filing:
            new_doc.append(re.sub('\\xa0*|(?<=[7-8])\W?s?', '', item))

    # find the text using the indices
    if len(start_index) == 2 and len(stop_index) == 2:
        if stop_index[-1] - start_index[-1] > 7:
            MDA_text = new_doc[start_index[1]:stop_index[1]]
        else:
            # for JP Morgan
            for i in range(len(new_doc)):
                if re.search(r'CONTENTS\s*FINANCIAL:\s+', new_doc[i]):
                    MDA_text = new_doc[i:]
                    return name, MDA_text
                # for Chevron
                elif re.search(
                        r'FINANCIAL TABLE OF\s*|BLANK\)\s*INDEX TO MANAGEMENT\W?S DISCUSSION AND ANALYSIS',
                        new_doc[i]):
                    MDA_text = new_doc[i:]
                    return name, MDA_text
                # for Exxon Mobil
                elif re.search(
                        r'FINANCIAL SECTION\s+TABLE OF\s+CONTENTS\s+BUSINESS PROFILE',
                        new_doc[i]):
                    MDA_text = new_doc[i:]
                    return name, MDA_text
                else:
                    MDA_text = None

    elif len(start_index) > 5:
        MDA_text = new_doc[start_index[-4]:stop_index[-1]]

    elif len(start_index) > 2:
        if stop_index[-1] - start_index[-1] > 4:
            MDA_text = new_doc[start_index[-1]:stop_index[-1]]
        elif stop_index[-1] > start_index[-1]:
            MDA_text = new_doc[start_index[-2]:stop_index[-1]]
        else:
            for i in range(len(new_doc)):
                # for Exxon Mobil
                if re.search(
                        r'FINANCIAL SECTION\s+TABLE OF\s+CONTENTS\s+BUSINESS PROFILE',
                        new_doc[i]):
                    MDA_text = new_doc[i:]
                    return name, MDA_text
                # for Chevron 2013&2012
                elif re.search(
                        r'FINANCIAL TABLE OF\s*|BLANK\)\s*INDEX TO MANAGEMENT\W?S DISCUSSION AND ANALYSIS',
                        new_doc[i]):
                    MDA_text = new_doc[i:]
                    return name, MDA_text
                else:
                    MDA_text = None

    elif len(start_index) == 1 and stop_index[0] - start_index[0] > 8:
        if name != 'Wells Fargo':
            MDA_text = new_doc[start_index[0]:stop_index[0]]
        else:
            MDA_text = None

    else:
        MDA_text = None

    return name, MDA_text