コード例 #1
0
def _litigation_footnote_unit_test(CIK, filing_year, corpus_file):
    processed_website_data = CorpusAccess.get_processed_website_data_from_corpus(CIK, filing_year)
    company_name = CorpusAccess.get_company_name_from_corpus(CIK)

    result = Litigation10KParsing.parse(CIK, filing_year, company_name, processed_website_data, get_litigation_footnotes_only=True)
    
    if processed_website_data is None:
        CorpusAccess.write_processed_url_data_to_file(data=result.processed_text, CIK=result.CIK, filing_year=result.filing_year)

    if company_name is None:
        CorpusAccess.write_company_name_and_cik_mapping_to_corpus(result.CIK, result.company_name)    
    
    _character_count_test(CIK, filing_year, result.legal_note_mentions, corpus_file)
コード例 #2
0
def main():
    
    CIK = Utilities.format_CIK('0000859475')
    
    for year in xrange(2004, 2012 + 1):
                
        print "Begin:\tCIK:%s\t%s" % (CIK, year)
        
        try:
            
            processed_data = CorpusAccess.get_processed_website_data_from_corpus(CIK, year)
            
            company_name = CorpusAccess.get_company_name_from_corpus(CIK)

            results = Litigation10KParsing.parse(CIK, year, company_name, processed_website_data=processed_data)

            print "Wrote mapping:",
            if CorpusAccess.get_company_name_from_corpus(CIK) is None:
                CorpusAccess.write_company_name_and_cik_mapping_to_corpus(CIK, results.company_name)
                print "\tYES"
            else:
                print "\tNO"
            
            print "Wrote Processed URL Data: ",
            if processed_data is None:        
                CorpusAccess.write_processed_url_data_to_file(data=results.processed_text, CIK=results.CIK, filing_year=results.filing_year)
                print "\tYES"
            else:
                print "\tNO"
            
            print "Wrote Legal Proceeding Data: ",
            if results.legal_proceeding_mention is not None:
                CorpusAccess.write_to_legal_proceeding_corpus(CIK=results.CIK, data=results.legal_proceeding_mention, filing_year=results.filing_year)
                print "\tYES"
            else:
                print "\tNO"
            
            print "Wrote Legal Footnote Data: ",    
            if len(results.legal_note_mentions) > 0:
                CorpusAccess.write_to_litigation_footnote_corpus(results.legal_note_mentions, results.CIK, results.filing_year)
                print "\tYES"
            else:
                print "\tNO"
            
        except Exception as exception:
            print "Exception: ", exception
            traceback.print_exc()
コード例 #3
0
def _get_results(cik, start_year, end_year):
    
    results = dict()

    cik = Utilities.format_CIK(cik)

    for year in xrange(start_year, end_year + 1):

        year = str(year)
        
        print "Processing %s %s" % (cik, year)
        
        lfp_path = os.path.join(Constants.PATH_TO_LEGAL_FOOTNOTE_CORPUS, cik, year + '.txt')
        lpp_path = os.path.join(Constants.PATH_TO_LEGAL_PROCEEDING_CORPUS, cik, year + '.txt')

        processed_data = CorpusAccess.get_processed_website_data_from_corpus(cik, year)    
        company_name = CorpusAccess.get_company_name_from_corpus(cik)

        get_lpp_only = False
        get_lfp_only = False

        if os.path.exists(lfp_path):
            get_lpp_only = True

        if os.path.exists(lpp_path):
            get_lfp_only = True

        try:
            result = Litigation10KParsing.parse(cik, year, company_name, processed_website_data=processed_data, \
                get_legal_proceeding_only=get_lpp_only, get_litigation_footnotes_only=get_lfp_only)

            if get_lpp_only:
                with open(lfp_path) as f:
                    result.legal_note_mentions = f.read()
            else:
                if result.legal_note_mentions is not None:
                    try:
                        CorpusAccess.write_to_litigation_footnote_corpus(result.legal_note_mentions, result.CIK, result.filing_year)
                    except Exception as exception:
                        print "Exception: ", exception
                        traceback.print_exc()
            if get_lfp_only:
                with open(lpp_path) as f:
                    result.legal_proceeding_mention = f.read()
            else:
                if result.legal_proceeding_mention is not None:
                    try:
                        CorpusAccess.write_to_legal_proceeding_corpus(CIK=result.CIK, \
                            data=result.legal_proceeding_mention, filing_year=result.filing_year)
                    except Exception as exception:
                        print "Exception: ", exception
                        traceback.print_exc()

            if company_name is None and result.company_name is not None:
                try:
                    CorpusAccess.write_company_name_and_cik_mapping_to_corpus(result.CIK, result.company_name)
                except Exception as exception:
                    print "Exception: ", exception
                    traceback.print_exc()

            if processed_data is None and result.processed_text is not None:  
                try:      
                    CorpusAccess.write_processed_url_data_to_file(data=result.processed_text, CIK=result.CIK, filing_year=result.filing_year)
                except Exception as exception:
                    print "Exception: ", exception
                    traceback.print_exc()
            
            results[year] = result

        except Exception as exception:
            print "Exception: ", exception
            traceback.print_exc()

    return results