def __init__(self, file: Path, replace: Dict[str, str] = { "true": "1", "false": "0" }): """ Initialize the Form5 object from the contents of the file parameter. Note: Form 5 shares the same underlying schema as Form 4, so this class is not technically required, but is supplied for convenience, and in case Form 5 specific info needs to be added in the future. :param file: File source of the document :param replace: A dictionary that can be used to replace and normalize extracted values, e.g. "true" => "1" """ Form4.__init__(self, file, replace)
def test_extract_report_owner_form4_collection(test_form4_collection, doc_num: int): """ Validate Form4 extraction code against a random sample of documents :param test_form4_collection: :return: """ file = list(test_form4_collection.glob("*.txt"))[doc_num] doc = Form4(file) assert doc.filename == file.name fields_list = doc.report_owners assert len(fields_list) > 0 for idx, fields in enumerate(fields_list): assert (len(fields)) == 19 assert fields["filename"] == file.name assert fields["order"] == f"{idx+1}" assert fields["type"] == "reportingOwner" assert fields["index"] == f"reportingOwner{idx+1}" assert validate(file, fields["rpt_owner_cik"], r"\d{10}") assert validate(file, fields["rpt_owner_name"], r".+") assert validate(file, fields["rpt_owner_city"], r".+") assert validate(file, fields["is_director"], r"[10]", none_allowed=True) assert validate(file, fields["is_officer"], r"[10]", none_allowed=True) assert validate(file, fields["is_ten_percent_owner"], r"[10]", none_allowed=True) assert validate(file, fields["is_other"], r"[10]", none_allowed=True)
def test_extract_nonderivative_trans_form4(test_form4): """ Validate Form4 extraction code against a single detailed example :param test_form4: :return: """ doc = Form4(test_form4) assert doc.accession_num == "0001012975-17-000759" assert doc.filename == test_form4.name fields_list = doc.nonderivatives assert len(fields_list) == 1 assert fields_list[0]["filename"] == test_form4.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "nonDerivTrans" assert fields_list[0]["index"] == "nonDerivTrans1" assert fields_list[0][ "security_title"] == "Common Stock, par value $0.0001" assert fields_list[0]["transaction_date"] == "2017-10-17" assert fields_list[0]["deemed_execution_date"] is None assert fields_list[0]["transaction_acquired_disposed_code"] == "D" assert fields_list[0]["transaction_timeliness"] is None assert fields_list[0]["transaction_price_per_share"] == "0" assert fields_list[0]["transaction_shares"] == "1278471" assert fields_list[0]["direct_or_indirect_ownership"] == "I" assert fields_list[0]["equity_swap_involved"] == "0" assert fields_list[0]["nature_of_ownership"] == "See Footnote" assert fields_list[0]["transaction_form_type"] == "4" assert fields_list[0]["shares_owned_following_transaction"] == "0" assert fields_list[0]["value_owned_following_transaction"] is None assert fields_list[0]["transaction_code"] == "J"
def test_extract_doclevel_form4_collection(test_form4_collection): """ Validate Form4 extraction code against a random sample of documents :param test_form4_collection: :return: """ for file in test_form4_collection.glob("*.txt"): doc = Form4(file) assert doc.filename == file.name fields = doc.doc_info assert len(fields) == 18 assert fields["filename"] == file.name assert fields["accession_num"] is not None assert fields["schema_version"] == "X0306" assert fields["document_type"] == "4" assert validate(file, fields["accession_num"], r"[\d-]+") assert validate(file, fields["sec_accept_datetime"], r"\d{14,14}") assert validate(file, fields["sec_file_num"], r"[\d-]+") assert validate(file, fields["doc_count"], r"\d+") assert validate(file, fields["filed_date"], r"\d{8,8}") assert validate(file, fields["conformed_period_of_report"], r"\d{8,8}") assert validate(file, fields["change_date"], r"\d{8,8}") assert validate(file, fields["period_of_report"], r"\d\d\d\d-\d\d-\d\d") assert validate( file, fields["not_subject_to_section_16"], r"[10]", none_allowed=True ) assert validate(file, fields["issuer_cik"], r"\d+") assert validate(file, fields["issuer_name"], r".+") assert validate(file, fields["issuer_trading_symbol"], r"[A-Z\.]+") assert validate(file, fields["regcik"], r"\d+") assert validate(file, fields["regsic"], r"\d\d\d\d", none_allowed=True)
def test_extract_doclevel_form4(test_form4): """ Validate Form4 extraction code against a single detailed example :param test_form4: :return: """ doc = Form4(test_form4) assert doc.accession_num == "0001012975-17-000759" assert doc.filename == test_form4.name fields = doc.doc_info assert len(fields) == 18 assert fields["filename"] == test_form4.name assert fields["accession_num"] == doc.accession_num assert fields["sec_accept_datetime"] == "20171017200436" assert fields["sec_file_num"] == "001-32587" assert fields["doc_count"] == "1" assert fields["filed_date"] == "20171017" assert fields["conformed_period_of_report"] == "20171013" assert fields["change_date"] == "20171017" assert fields["schema_version"] == "X0306" assert fields["document_type"] == "4" assert fields["period_of_report"] == "2017-10-13" assert fields["not_subject_to_section_16"] is None assert fields["issuer_cik"] == "0001326190" assert fields["issuer_name"] == "Altimmune, Inc." assert fields["issuer_trading_symbol"] == "ALT" assert fields["remarks"] is None assert fields["regcik"] == "0001326190" assert fields["regsic"] == "2834"
def test_extract_signature_form4(test_form4): """ Validate Form4 extraction code against a single detailed example :param test_form4: :return: """ doc = Form4(test_form4) assert doc.accession_num == "0001012975-17-000759" assert doc.filename == test_form4.name fields_list = doc.signatures assert len(fields_list) == 2 assert len(fields_list[0]) == 7 assert fields_list[0]["filename"] == test_form4.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "signature" assert fields_list[0]["index"] == "signature1" assert (fields_list[0]["signature_name"] == "/s/ Ori Solomon, Attorney in fact for Philip Hodges") assert fields_list[0]["signature_date"] == "2017-10-17" assert fields_list[1]["filename"] == test_form4.name assert fields_list[1]["accession_num"] == doc.accession_num assert fields_list[1]["order"] == "2" assert fields_list[1]["type"] == "signature" assert fields_list[1]["index"] == "signature2" assert ( fields_list[1]["signature_name"] == "/s/ Ori Solomon, Attorney in fact for Redmont VAXN Capital Holdings, LLC" ) assert fields_list[1]["signature_date"] == "2017-10-17"
def test_extract_footnotes_form4(test_form4): """ Validate Form4 extraction code against a single detailed example :param test_form4: :return: """ doc = Form4(test_form4) assert doc.accession_num == "0001012975-17-000759" assert doc.filename == test_form4.name fields_list = doc.footnotes assert len(fields_list) == 3 assert len(fields_list[0]) == 6 assert fields_list[0]["filename"] == test_form4.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["footnote"] == "F1" assert fields_list[0]["index"] == "nonDerivTrans1" assert fields_list[0]["field"] == "transactionCoding" assert fields_list[0]["text"][:26] == "Redmont VAXN Capital Holdi" assert fields_list[1]["filename"] == test_form4.name assert fields_list[1]["accession_num"] == doc.accession_num assert fields_list[1]["footnote"] == "F2" assert fields_list[1]["index"] == "nonDerivTrans1" assert fields_list[1]["field"] == "natureOfOwnership" assert fields_list[1]["text"][:26] == "Consists of shares of Comm" assert fields_list[2]["filename"] == test_form4.name assert fields_list[2]["accession_num"] == doc.accession_num assert fields_list[2]["footnote"] == "F3" assert fields_list[2]["index"] == "derivTrans1" assert fields_list[2]["field"] == "directOrIndirectOwnership" assert fields_list[2]["text"][:26] == "Held by Philip Hodges."
def test_extract_report_owner_form4(test_form4): """ Validate Form4 extraction code against a single detailed example :param test_form4: :return: """ doc = Form4(test_form4) assert doc.accession_num == "0001012975-17-000759" assert doc.filename == test_form4.name fields_list = doc.report_owners assert len(fields_list) == 2 assert len(fields_list[0]) == 19 assert fields_list[0]["filename"] == test_form4.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "reportingOwner" assert fields_list[0]["index"] == "reportingOwner1" assert fields_list[0]["rpt_owner_cik"] == "0001705562" assert fields_list[0]["rpt_owner_name"] == "Hodges Philip" assert fields_list[0]["rpt_owner_street1"] == "C/O ALTIMMUNE, INC." assert fields_list[0][ "rpt_owner_street2"] == "19 FIRSTFIELD ROAD, SUITE 200" assert fields_list[0]["rpt_owner_city"] == "GAITHERSBURG" assert fields_list[0]["rpt_owner_state"] == "MD" assert fields_list[0]["rpt_owner_zip_code"] == "20878" assert fields_list[0]["rpt_owner_state_descr"] is None assert fields_list[0]["is_director"] == "1" assert fields_list[0]["is_officer"] == "0" assert fields_list[0]["is_ten_percent_owner"] == "0" assert fields_list[0]["is_other"] == "0" assert fields_list[0]["officer_title"] is None assert fields_list[0]["other_text"] is None assert len(fields_list[1]) == 19 assert fields_list[1]["filename"] == test_form4.name assert fields_list[1]["accession_num"] == doc.accession_num assert fields_list[1]["order"] == "2" assert fields_list[1]["type"] == "reportingOwner" assert fields_list[1]["index"] == "reportingOwner2" assert fields_list[1]["rpt_owner_cik"] == "0001705638" assert fields_list[1][ "rpt_owner_name"] == "Redmont VAXN Capital Holdings, LLC" assert fields_list[1]["rpt_owner_street1"] == "C/O ALTIMMUNE, INC." assert fields_list[1][ "rpt_owner_street2"] == "19 FIRSTFIELD ROAD, SUITE 200" assert fields_list[1]["rpt_owner_city"] == "GAITHERSBURG" assert fields_list[1]["rpt_owner_state"] == "MD" assert fields_list[1]["rpt_owner_zip_code"] == "20878" assert fields_list[1]["rpt_owner_state_descr"] is None assert fields_list[1]["is_director"] == "1" assert fields_list[1]["is_officer"] == "0" assert fields_list[1]["is_ten_percent_owner"] == "0" assert fields_list[1]["is_other"] == "0" assert fields_list[1]["officer_title"] is None assert fields_list[1]["other_text"] is None
def test_extract_derivative_trans_form4_collection(test_form4_collection): """ Validate Form4 extraction code against a random sample of documents :param test_form4_collection: :return: """ for file in test_form4_collection.glob("*.txt"): doc = Form4(file) assert doc.filename == file.name fields_list = doc.derivatives trans_fields_list = [ f for f in fields_list if f["type"] == "derivTrans" ] for idx, fields in enumerate(trans_fields_list): assert (len(fields)) == 26 assert fields["filename"] == file.name assert fields["accession_num"] == doc.accession_num assert fields["order"] == f"{idx+1}" assert fields["type"] == "derivTrans" assert fields["index"] == f"derivTrans{idx+1}" assert validate(file, fields["accession_num"], r"[\d-]+") assert validate(file, fields["security_title"], r".+") assert validate(file, fields["direct_or_indirect_ownership"], r"[DI]") assert validate(file, fields["transaction_date"], r"(\d\d\d\d-\d\d-\d\d)?") assert validate(file, fields["transaction_acquired_disposed_code"], r"[AD]") assert validate(file, fields["transaction_price_per_share"], r"[\d+\.]*") assert validate(file, fields["transaction_shares"], r"[\d+\.]+") assert validate(file, fields["equity_swap_involved"], r"[10]") assert validate(file, fields["transaction_form_type"], r"4") assert validate(file, fields["shares_owned_following_transaction"], r"[\d\.]+") assert validate(file, fields["transaction_code"], r"[ACMPS]") holdings_fields_list = [ f for f in fields_list if f["type"] == "derivHolding" ] for idx, fields in enumerate(holdings_fields_list): assert (len(fields)) == 26 assert fields["filename"] == file.name assert fields["accession_num"] == doc.accession_num assert fields["order"] == f"{idx+1}" assert fields["type"] == "derivHolding" assert fields["index"] == f"derivHolding{idx+1}" assert validate(file, fields["accession_num"], r"[\d-]+") assert validate(file, fields["security_title"], r".+") assert validate(file, fields["direct_or_indirect_ownership"], r"[DI]")
def create_doc(file: Path) -> Document: xmlpath = Document.xml_document_fields["document_type"] regex = re.compile(f"<{xmlpath}>(.+)</{xmlpath}>") form_type = regex.findall(file.read_text())[0] if form_type == "3": return Form3(file) elif form_type == "4": return Form4(file) elif form_type == "5": return Form5(file) else: typer.secho(f"WARNING: {form_type} not a supported form type", fg=typer.colors.RED) return None
def test_extract_derivative_trans_form4(test_form4): """ Validate Form4 extraction code against a single detailed example :param test_form4: :return: """ doc = Form4(test_form4) assert doc.accession_num == "0001012975-17-000759" assert doc.filename == test_form4.name fields_list = doc.derivatives assert len(fields_list) == 1 assert len(fields_list[0]) == 26 assert fields_list[0]["filename"] == test_form4.name assert fields_list[0]["accession_num"] == doc.accession_num assert fields_list[0]["order"] == "1" assert fields_list[0]["type"] == "derivTrans" assert fields_list[0]["index"] == "derivTrans1" assert fields_list[0]["security_title"] == "Stock Options (option to buy)" assert fields_list[0]["transaction_date"] == "2017-10-13" assert fields_list[0]["deemed_execution_date"] is None assert fields_list[0]["transaction_acquired_disposed_code"] == "A" assert fields_list[0]["transaction_timeliness"] is None assert fields_list[0]["transaction_price_per_share"] == "0" assert fields_list[0]["transaction_shares"] == "20000" assert fields_list[0]["direct_or_indirect_ownership"] == "D" assert fields_list[0]["equity_swap_involved"] == "0" assert fields_list[0]["nature_of_ownership"] == "" assert fields_list[0]["transaction_form_type"] == "4" assert fields_list[0]["shares_owned_following_transaction"] == "20000" assert fields_list[0]["value_owned_following_transaction"] is None assert fields_list[0]["transaction_code"] == "A" assert fields_list[0]["conversion_or_exercise_price"] == "2.50" assert fields_list[0]["transaction_total_value"] is None assert fields_list[0]["exercise_date"] == "2018-04-30" assert fields_list[0]["expiration_date"] == "2027-10-13" assert ( fields_list[0]["underlying_security_title"] == "Common Stock, par value $0.0001" ) assert fields_list[0]["underlying_security_shares"] == "20000" assert fields_list[0]["underlying_security_value"] is None
def create_doc(file: Path) -> Union[None, Document]: """ A factory function returns a Document object with the right subtype. :param file: The file object that is the source of the Document :return: A Document object """ xmlpath = Document.xml_document_fields["document_type"] regex = re.compile(f"<{xmlpath}>(.+)</{xmlpath}>") form_type = regex.findall(file.read_text())[0] if form_type == "3": return Form3(file) elif form_type == "4": return Form4(file) elif form_type == "5": return Form5(file) else: typer.secho( f"WARNING: {form_type} not a supported form type", fg=typer.colors.RED ) return None
def test_extract_signature_form4_collection(test_form4_collection, doc_num: int): """ Validate Form4 extraction code against a random sample of documents :param test_form4_collection: :return: """ file = list(test_form4_collection.glob("*.txt"))[doc_num] doc = Form4(file) assert doc.filename == file.name fields_list = doc.signatures assert len(fields_list) > 0 for idx, fields in enumerate(fields_list): assert (len(fields)) == 7 assert fields["filename"] == file.name assert fields["accession_num"] == doc.accession_num assert fields["order"] == f"{idx+1}" assert fields["type"] == "signature" assert fields["index"] == f"signature{idx+1}" assert validate(file, fields["signature_name"], r".+") assert validate(file, fields["signature_date"], r"\d\d\d\d-\d\d-\d\d")
def test_extract_many_footnotes_example(test_form4_collection): file = test_form4_collection / "1363364_2_0001638599-20-000500.txt" assert file.exists() doc = Form4(file) footnotes = doc.footnotes assert len(footnotes) == 39