def test_extract_report_owner_form5(test_form5): """ Validate Form5 extraction code against a single detailed example :param test_form5: :return: """ doc = Form5(test_form5) assert doc.accession_num == "0000011544-20-000013" assert doc.filename == test_form5.name fields_list = doc.report_owners assert len(fields_list) == 1 assert len(fields_list[0]) == 19 assert fields_list[0]["filename"] == test_form5.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "reportingOwner" assert fields_list[0]["index"] == "reportingOwner1" assert fields_list[0]["rpt_owner_cik"] == "0001794251" assert fields_list[0]["rpt_owner_name"] == "Talisman Jonathan" assert fields_list[0]["rpt_owner_street1"] == "W. R. BERKLEY CORPORATION" assert fields_list[0]["rpt_owner_street2"] == "475 STEAMBOAT ROAD" assert fields_list[0]["rpt_owner_city"] == "GREENWICH" assert fields_list[0]["rpt_owner_state"] == "CT" assert fields_list[0]["rpt_owner_zip_code"] == "06830" assert fields_list[0]["rpt_owner_state_descr"] is None assert fields_list[0]["is_director"] == "1" assert fields_list[0]["is_officer"] == "0" assert fields_list[0]["is_ten_percent_owner"] == "0" assert fields_list[0]["is_other"] == "0" assert fields_list[0]["officer_title"] is None assert fields_list[0]["other_text"] is None
def test_extract_doclevel_form5(test_form5): """ Validate Form5 extraction code against a single detailed example :param test_form5: :return: """ doc = Form5(test_form5) assert doc.accession_num == "0000011544-20-000013" assert doc.filename == test_form5.name fields = doc.doc_info assert len(fields) == 18 assert fields["filename"] == test_form5.name assert fields["accession_num"] == doc.accession_num assert fields["sec_accept_datetime"] == "20200213162819" assert fields["sec_file_num"] == "001-15202" assert fields["doc_count"] == "1" assert fields["filed_date"] == "20200213" assert fields["conformed_period_of_report"] == "20191231" assert fields["change_date"] == "20200213" assert fields["schema_version"] == "X0306" assert fields["document_type"] == "5" assert fields["period_of_report"] == "2019-12-31" assert fields["not_subject_to_section_16"] is None assert fields["issuer_cik"] == "0000011544" assert fields["issuer_name"] == "BERKLEY W R CORP" assert fields["issuer_trading_symbol"] == "WRB" assert fields["remarks"] is None assert fields["regcik"] == "0000011544" assert fields["regsic"] == "6331"
def test_extract_report_owner_form5_collection(test_form5_collection, doc_num: int): """ Validate Form5 extraction code against a random sample of documents :param test_form5_collection: :return: """ file = list(test_form5_collection.glob("*.txt"))[doc_num] doc = Form5(file) assert doc.filename == file.name fields_list = doc.report_owners assert len(fields_list) > 0 for idx, fields in enumerate(fields_list): assert (len(fields)) == 19 assert fields["filename"] == file.name assert fields["order"] == f"{idx+1}" assert fields["type"] == "reportingOwner" assert fields["index"] == f"reportingOwner{idx+1}" assert validate(file, fields["rpt_owner_cik"], r"\d{10}") assert validate(file, fields["rpt_owner_name"], r".+") assert validate(file, fields["rpt_owner_city"], r".+") assert validate(file, fields["is_director"], r"[10]", none_allowed=True) assert validate(file, fields["is_officer"], r"[10]", none_allowed=True) assert validate(file, fields["is_ten_percent_owner"], r"[10]", none_allowed=True) assert validate(file, fields["is_other"], r"[10]", none_allowed=True)
def test_extract_nonderivative_trans_form5(test_form5): """ Validate Form5 extraction code against a single detailed example :param test_form5: :return: """ doc = Form5(test_form5) assert doc.accession_num == "0000011544-20-000013" assert doc.filename == test_form5.name fields_list = doc.nonderivatives assert len(fields_list) == 1 assert fields_list[0]["filename"] == test_form5.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "nonDerivTrans" assert fields_list[0]["index"] == "nonDerivTrans1" assert fields_list[0]["security_title"] == "Common Stock" assert fields_list[0]["transaction_date"] == "2019-12-11" assert fields_list[0]["deemed_execution_date"] is None assert fields_list[0]["transaction_acquired_disposed_code"] == "A" assert fields_list[0]["transaction_timeliness"] is None assert fields_list[0]["transaction_price_per_share"] == "70.27" assert fields_list[0]["transaction_shares"] == "3" assert fields_list[0]["direct_or_indirect_ownership"] == "D" assert fields_list[0]["equity_swap_involved"] == "0" assert fields_list[0]["nature_of_ownership"] is None assert fields_list[0]["transaction_form_type"] == "5" assert fields_list[0]["shares_owned_following_transaction"] == "359" assert fields_list[0]["value_owned_following_transaction"] is None assert fields_list[0]["transaction_code"] == "P"
def test_extract_doclevel_form5_collection(test_form5_collection): """ Validate Form5 extraction code against a random sample of documents :param test_form5_collection: :return: """ for file in test_form5_collection.glob("*.txt"): doc = Form5(file) assert doc.filename == file.name fields = doc.doc_info assert len(fields) == 18 assert fields["filename"] == file.name assert fields["accession_num"] is not None assert fields["schema_version"] == "X0306" assert fields["document_type"] == "5" assert validate(file, fields["accession_num"], r"[\d-]+") assert validate(file, fields["sec_accept_datetime"], r"\d{14,14}") assert validate(file, fields["sec_file_num"], r"[\d-]+") assert validate(file, fields["doc_count"], r"\d+") assert validate(file, fields["filed_date"], r"\d{8}") assert validate(file, fields["conformed_period_of_report"], r"\d{8}") assert validate(file, fields["change_date"], r"\d{8}") assert validate(file, fields["period_of_report"], r"\d\d\d\d-\d\d-\d\d") assert validate( file, fields["not_subject_to_section_16"], r"[10]", none_allowed=True ) assert validate(file, fields["issuer_cik"], r"\d+") assert validate(file, fields["issuer_name"], r".+") assert validate(file, fields["issuer_trading_symbol"], r"[A-Z\.;\s\.,]+") assert validate(file, fields["regcik"], r"\d+") assert validate(file, fields["regsic"], r"\d\d\d\d", none_allowed=True)
def test_extract_derivative_trans_form5_collection(test_form5_collection): """ Validate Form5 extraction code against a random sample of documents :param test_form5_collection: :return: """ for file in test_form5_collection.glob("*.txt"): doc = Form5(file) assert doc.filename == file.name fields_list = doc.derivatives trans_fields_list = [ f for f in fields_list if f["type"] == "derivTrans" ] for idx, fields in enumerate(trans_fields_list): assert (len(fields)) == 26 assert fields["filename"] == file.name assert fields["accession_num"] == doc.accession_num assert fields["order"] == f"{idx+1}" assert fields["type"] == "derivTrans" assert fields["index"] == f"derivTrans{idx+1}" assert validate(file, fields["accession_num"], r"[\d-]+") assert validate(file, fields["security_title"], r".+") assert validate(file, fields["direct_or_indirect_ownership"], r"[DI]") assert validate(file, fields["transaction_date"], r"(\d\d\d\d-\d\d-\d\d)?") assert validate(file, fields["transaction_acquired_disposed_code"], r"[AD]") assert validate(file, fields["transaction_price_per_share"], r"[\d+\.]*") assert validate(file, fields["transaction_shares"], r"[\d+\.]+") assert validate(file, fields["equity_swap_involved"], r"[10]") assert validate(file, fields["transaction_form_type"], r"[45]") assert validate(file, fields["shares_owned_following_transaction"], r"[\d\.]+") assert validate(file, fields["transaction_code"], r"[A-Z]") holdings_fields_list = [ f for f in fields_list if f["type"] == "derivHolding" ] for idx, fields in enumerate(holdings_fields_list): assert (len(fields)) == 26 assert fields["filename"] == file.name assert fields["accession_num"] == doc.accession_num assert fields["order"] == f"{idx+1}" assert fields["type"] == "derivHolding" assert fields["index"] == f"derivHolding{idx+1}" assert validate(file, fields["accession_num"], r"[\d-]+") assert validate(file, fields["security_title"], r".+") assert validate(file, fields["direct_or_indirect_ownership"], r"[DI]")
def test_extract_derivative_trans_form5(test_form5): """ Validate Form5 extraction code against a single detailed example :param test_form5: :return: """ doc = Form5(test_form5) assert doc.accession_num == "0000011544-20-000013" assert doc.filename == test_form5.name fields_list = doc.derivatives assert len(fields_list) == 0
def create_doc(file: Path) -> Document: xmlpath = Document.xml_document_fields["document_type"] regex = re.compile(f"<{xmlpath}>(.+)</{xmlpath}>") form_type = regex.findall(file.read_text())[0] if form_type == "3": return Form3(file) elif form_type == "4": return Form4(file) elif form_type == "5": return Form5(file) else: typer.secho(f"WARNING: {form_type} not a supported form type", fg=typer.colors.RED) return None
def create_doc(file: Path) -> Union[None, Document]: """ A factory function returns a Document object with the right subtype. :param file: The file object that is the source of the Document :return: A Document object """ xmlpath = Document.xml_document_fields["document_type"] regex = re.compile(f"<{xmlpath}>(.+)</{xmlpath}>") form_type = regex.findall(file.read_text())[0] if form_type == "3": return Form3(file) elif form_type == "4": return Form4(file) elif form_type == "5": return Form5(file) else: typer.secho( f"WARNING: {form_type} not a supported form type", fg=typer.colors.RED ) return None
def test_extract_signature_form5(test_form5): """ Validate Form5 extraction code against a single detailed example :param test_form5: :return: """ doc = Form5(test_form5) assert doc.accession_num == "0000011544-20-000013" assert doc.filename == test_form5.name fields_list = doc.signatures assert len(fields_list) == 1 assert len(fields_list[0]) == 7 assert fields_list[0]["filename"] == test_form5.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "signature" assert fields_list[0]["index"] == "signature1" assert fields_list[0]["signature_name"] == "Jonathan Talisman" assert fields_list[0]["signature_date"] == "2020-02-13"
def test_extract_signature_form5_collection(test_form5_collection, doc_num: int): """ Validate Form5 extraction code against a random sample of documents :param test_form5_collection: :return: """ file = list(test_form5_collection.glob("*.txt"))[doc_num] doc = Form5(file) assert doc.filename == file.name fields_list = doc.signatures assert len(fields_list) > 0 for idx, fields in enumerate(fields_list): assert (len(fields)) == 7 assert fields["filename"] == file.name assert fields["accession_num"] == doc.accession_num assert fields["order"] == f"{idx+1}" assert fields["type"] == "signature" assert fields["index"] == f"signature{idx+1}" assert validate(file, fields["signature_name"], r".+") assert validate(file, fields["signature_date"], r"\d\d\d\d-\d\d-\d\d")