def test_with_wo_apostrophe(self): text = 'McDonalds Inc.: Burgers, Fries & More. Quality Ingredients.' comps = list(get_company_annotations(text)) self.assertEqual(1, len(comps)) text = 'McDonald\'s Inc.: Burgers, Fries & More. Quality Ingredients.' comps = list(get_company_annotations(text)) self.assertEqual(1, len(comps))
def test_with_apostrophe(self): text = 'DTC is a wholly-owned subsidiary of The Depository Trust and Clearing Corporation ("DTCC").' comps = list(get_company_annotations(text)) self.assertEqual('The Depository Trust', comps[0].name) text = 'DTC is a wholly-owned subsidiary of The Depository Trust & Clearing Corporation ("DTCC").' comps = list(get_company_annotations(text)) self.assertEqual('The Depository Trust & Clearing', comps[0].name)
def test_banlisted(self): text = 'Depository Bank is a wholly-owned subsidiary of The Depository Trust and Clearing Corporation ("DTCC").' comps = list(get_company_annotations(text)) self.assertEqual(len(comps), 2) comps = list(get_company_annotations( text, banlist_usage=BanListUsage(use_default_banlist=False))) self.assertEqual(len(comps), 3)
def test_custom_banlisted(self): text = 'Depository Bank is a wholly-owned subsidiary of The Depository Trust and Clearing Corporation ("DTCC").' custom_bl = [EntityBanListItem('Clearing')] comps = list(get_company_annotations( text, banlist_usage=BanListUsage(banlist=custom_bl, append_to_default=True))) self.assertEqual(1, len(comps)) comps = list(get_company_annotations( text, banlist_usage=BanListUsage(banlist=custom_bl, use_default_banlist=False, append_to_default=False))) self.assertEqual(len(comps), 2)
def test_reg_back(self): # here we check that the test doesn't hang text = """ /NOR <FEFF004200720075006b00200064006900730073006500200069006e006e007300740069006c006c0069006e00670065006e006500. """ res = list(get_company_annotations(text)) self.assertEqual(0, len(res))
def test_copyright(self): text = "Copyright (c) 2019, Moody's Corporation, Moody's Investors Service, Inc., " + \ "Moody's Analytics, Inc. and/or their licensors and affiliates (collectively, \"MOODY's\")." comps = list(get_company_annotations(text)) self.assertEqual(2, len(comps)) self.assertEqual("Moody\'s Corporation, Moody\'s Investors Service", comps[0].name) self.assertEqual('Moody\'s Analytics', comps[1].name)
def test_get_unpreffixed_companies(self): texts = ["MI 48226 From: Company City of Detroit, Contact ROMONA JONES Address COLEMAN", "Order Amount 51.000.00 USD, Sold To City of Detroit, COLEMAN A YOUNG MUNICIPAL CENTER 2", "MI 48226  . Supplier GAYANGA Inc. / CO AMERIFACTORS LACRESHA"] all_companies = [] for text in texts: comps = list(get_company_annotations( text, banlist_usage=BanListUsage(use_default_banlist=False))) all_companies += comps self.assertEqual(1, len(all_companies))
def test_wrong_pos(self): text = '''This Commercial Lease Agreement ("Lease") is made and effective June 1, 2010, by and between Powdermet, inc. ("Landlord/Tenant") and Mesocoat, inc ("Sub- Tenant"). This is a sublease to the current lease held by Powdermet, Inc. with Sherman Properties, LLC..''' comps = list(get_company_annotations(text)) self.assertEqual(4, len(comps)) self.assertEqual('Powdermet', comps[0].name) self.assertEqual('Mesocoat', comps[1].name) self.assertEqual((94, 108), comps[0].coords) self.assertEqual((134, 147), comps[1].coords) self.assertEqual('Corporation', comps[0].company_type_label) self.assertEqual('Corporation', comps[1].company_type_label)
def test_with_colon(self): text = 'this is McDonald\'s Incorporated: Burgers, blah-blah' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ("McDonald's", 'Incorporated', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Inc' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Inc', 'CORP', 'Corporation', None, None)) text = 'Sitwell Marketing Inc' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Marketing', 'Inc', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Inc.' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Inc', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Incorporated.' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Incorporated', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Incorporated' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Incorporated', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Corp' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Corp', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Corp.' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Corp', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Corp: good old company.' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Corp', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Corp - good old company.' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Corp', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Incorporated: good old company' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Incorporated', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Incorporated - good old company' res = list(get_company_annotations(text))[0] self.assertEqual( (res.name, res.company_type_full, res.company_type_abbr, res.company_type_label, res.name_abbr, res.description), ('Sitwell Housing', 'Incorporated', 'CORP', 'Corporation', None, None)) text = 'Sitwell Housing Busted' res = list(get_company_annotations(text)) self.assertEqual(0, len(res))
def test_with_forwardslash(self): # TODO: this should eventually extract both companies text = 'Supplier GAYANGA CO / CO AMERIFACTORS' comps = list(get_company_annotations(text)) self.assertEqual('Supplier GAYANGA', comps[0].name) self.assertEqual('CO', comps[0].company_type)
def test_default_banlisted(self): text = 'Depository Bank is a wholly-owned subsidiary of The Depository Trust and Clearing Agency ("DTCA").' comps = list(get_company_annotations(text)) self.assertEqual(1, len(comps))
def test_mixed_banlisted(self): text = """Hereinafter, the Issuing Bank is a wholly-owned subsidiary of The Depository Trust and Clearing Corporation ("DTCC").""" comps = list(get_company_annotations(text)) self.assertEqual(len(comps), 2)