def __init__(self, CIK, filing_year, company_name, processed_text): self.CIK = Utilities.format_CIK(CIK) self.filing_year = Utilities.sanitize_filing_year(filing_year) self.company_name = company_name self.legal_proceeding_mention = None self.legal_note_mentions = None self.processed_text = processed_text
def get_raw_website_data_from_corpus(CIK, filing_year): CIK = Utilities.format_CIK(CIK) filing_year = Utilities.sanitize_filing_year(filing_year) candidate_path = os.path.join(Constants.PATH_TO_RAW_URL_DATA, CIK, str(filing_year) + ".txt") if os.path.exists(candidate_path): with open(candidate_path, 'rb') as f: return f.read()
def write_to_legal_proceeding_corpus(data, CIK, filing_year): ''' we'll dump our resulting data to a text file. it will be structured thusly: legal_foonotes CIK_1 filing_year_1.txt filing_year_2.txt and so on. ''' CIK = Utilities.format_CIK(CIK) filing_year = Utilities.sanitize_filing_year(filing_year) path = os.path.join(Constants.PATH_TO_LEGAL_PROCEEDING_CORPUS, CIK) write_data_to_corpus(data, CIK, filing_year, path)
def write_processed_url_data_to_file(data, CIK, filing_year): CIK = Utilities.format_CIK(CIK) filing_year = Utilities.sanitize_filing_year(filing_year) path = os.path.join(Constants.PATH_TO_PROCESSED_URL_DATA, CIK) if not os.path.exists(path): os.mkdir(path) path_with_file = os.path.join(path, filing_year + ".txt") if not os.path.exists(path_with_file): with open(path_with_file, 'w') as f: f.writelines(data)
def write_data_to_corpus(data, CIK, filing_year, path): if data is None or len(data) == 0: raise Exception("Nothing to write!") CIK = Utilities.format_CIK(CIK) filing_year = Utilities.sanitize_filing_year(filing_year) if not os.path.exists(path): os.makedirs(path) path_with_file = os.path.join(path, filing_year + ".txt") if os.path.exists(path_with_file) or not os.path.exists(path_with_file): with open(path_with_file, 'w') as f: f.writelines(data)
def write_raw_url_data_to_file(data, CIK, filing_year): CIK = Utilities.format_CIK(CIK) filing_year = Utilities.sanitize_filing_year(filing_year) path = os.path.join(Constants.PATH_TO_RAW_URL_DATA, CIK) _raw_data_writing_mutex.acquire() if not os.path.exists(path): os.mkdir(path) path_with_file = os.path.join(path, filing_year + ".txt") if not os.path.exists(path_with_file): with open(path_with_file, 'w') as f: f.writelines(data) _raw_data_writing_mutex.release()