def __init__(self, CIK, filing_year, company_name, processed_text):
     self.CIK = Utilities.format_CIK(CIK)
     self.filing_year = Utilities.sanitize_filing_year(filing_year)
     self.company_name = company_name
     self.legal_proceeding_mention = None
     self.legal_note_mentions = None
     self.processed_text = processed_text
예제 #2
0
def get_raw_website_data_from_corpus(CIK, filing_year):
    
    CIK = Utilities.format_CIK(CIK)
    filing_year = Utilities.sanitize_filing_year(filing_year)
    
    candidate_path = os.path.join(Constants.PATH_TO_RAW_URL_DATA, CIK, str(filing_year) + ".txt")
    
    if os.path.exists(candidate_path):
        with open(candidate_path, 'rb') as f:
            return f.read()
예제 #3
0
def write_to_legal_proceeding_corpus(data, CIK, filing_year):
    ''' 
    we'll dump our resulting data to a text file.
    it will be structured thusly:
       legal_foonotes
            CIK_1
                filing_year_1.txt
                filing_year_2.txt
    and so on. 
    '''
    CIK = Utilities.format_CIK(CIK)
    filing_year = Utilities.sanitize_filing_year(filing_year)
    
    path = os.path.join(Constants.PATH_TO_LEGAL_PROCEEDING_CORPUS, CIK)
    write_data_to_corpus(data, CIK, filing_year, path)
예제 #4
0
def write_processed_url_data_to_file(data, CIK, filing_year):
    
    CIK = Utilities.format_CIK(CIK)
    filing_year = Utilities.sanitize_filing_year(filing_year)
    
    path = os.path.join(Constants.PATH_TO_PROCESSED_URL_DATA, CIK) 
    
    if not os.path.exists(path): 
        os.mkdir(path)
    
    path_with_file = os.path.join(path, filing_year + ".txt")
    
    if not os.path.exists(path_with_file):
        with open(path_with_file, 'w') as f:
            f.writelines(data)
예제 #5
0
def write_data_to_corpus(data, CIK, filing_year, path):
    
    if data is None or len(data) == 0:
        raise Exception("Nothing to write!")
                    
    CIK = Utilities.format_CIK(CIK)
    filing_year = Utilities.sanitize_filing_year(filing_year)

    if not os.path.exists(path):
        os.makedirs(path)
        
    path_with_file = os.path.join(path, filing_year + ".txt")
    
    if os.path.exists(path_with_file) or not os.path.exists(path_with_file):
        with open(path_with_file, 'w') as f:
            f.writelines(data)     
예제 #6
0
def write_raw_url_data_to_file(data, CIK, filing_year):
    CIK = Utilities.format_CIK(CIK)
    filing_year = Utilities.sanitize_filing_year(filing_year)

    path = os.path.join(Constants.PATH_TO_RAW_URL_DATA, CIK)

    _raw_data_writing_mutex.acquire()

    if not os.path.exists(path):
        os.mkdir(path)

    path_with_file = os.path.join(path, filing_year + ".txt")

    if not os.path.exists(path_with_file):
        with open(path_with_file, 'w') as f:
            f.writelines(data)

    _raw_data_writing_mutex.release()