def test_check_sdn_fallback_other_fields(self): """ Verify that country, type and source need to match for checkSDNFallback to return True """ # wrong country # pylint: disable=line-too-long csv_string = """_id,source,entity_number,type,programs,name,title,addresses,federal_register_notice,start_date,end_date,standard_order,license_requirement,license_policy,call_sign,vessel_type,gross_tonnage,gross_registered_tonnage,vessel_flag,vessel_owner,remarks,source_list_url,alt_names,citizenships,dates_of_birth,nationalities,places_of_birth,source_information_url,ids 94734218,Specially Designated Nationals (SDN) - Treasury Department,96663868,Individual,material,Juan M. de la Cruz,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI""" # pylint: enable=line-too-long metadata_entry = populate_sdn_fallback_data_and_metadata(csv_string) self.assertEqual(checkSDNFallback("Juan", "Kristinaport", 'AB'), 0) # wrong type # pylint: disable=line-too-long csv_string = """_id,source,entity_number,type,programs,name,title,addresses,federal_register_notice,start_date,end_date,standard_order,license_requirement,license_policy,call_sign,vessel_type,gross_tonnage,gross_registered_tonnage,vessel_flag,vessel_owner,remarks,source_list_url,alt_names,citizenships,dates_of_birth,nationalities,places_of_birth,source_information_url,ids 94734218,Specially Designated Nationals (SDN) - Treasury Department,96663868,foo,material,Juan M. de la Cruz,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI""" # pylint: enable=line-too-long metadata_entry = populate_sdn_fallback_data_and_metadata(csv_string) self.assertEqual(checkSDNFallback("Juan", "Kristinaport", 'SN'), 0) # wrong source # pylint: disable=line-too-long csv_string = """_id,source,entity_number,type,programs,name,title,addresses,federal_register_notice,start_date,end_date,standard_order,license_requirement,license_policy,call_sign,vessel_type,gross_tonnage,gross_registered_tonnage,vessel_flag,vessel_owner,remarks,source_list_url,alt_names,citizenships,dates_of_birth,nationalities,places_of_birth,source_information_url,ids 94734218,bar,96663868,Individual,material,Juan M. de la Cruz,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI""" # pylint: enable=line-too-long metadata_entry = populate_sdn_fallback_data_and_metadata(csv_string) self.assertEqual(checkSDNFallback("Juan", "Kristinaport", 'SN'), 0)
def test_file_format(self, csv_string): populate_sdn_fallback_data_and_metadata(csv_string) records = SDNFallbackData.get_current_records_and_filter_by_source_and_type('Specially Designated Nationals (SDN) - Treasury Department', 'Individual') self.assertEqual(len(records), 1) self.assertEqual(process_text(records.first().names), process_text('Victor Conrad Wendy Brock')) self.assertEqual(process_text(records.first().addresses), process_text('17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN')) self.assertEqual(records.first().countries, 'SN')
def test_sdn_fallback_multiple_hits(self): """ Verify SDNFallback can handle returning multiple hits if it finds a hit more than once. """ csv_string = """_id,source,entity_number,type,programs,name,title,addresses,federal_register_notice,start_date,end_date,standard_order,license_requirement,license_policy,call_sign,vessel_type,gross_tonnage,gross_registered_tonnage,vessel_flag,vessel_owner,remarks,source_list_url,alt_names,citizenships,dates_of_birth,nationalities,places_of_birth,source_information_url,ids 94734218,Specially Designated Nationals (SDN) - Treasury Department,96663868,Individual,material,Juan M. de la Cruz,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.cruz.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://juan.org/,CI 94734219,Specially Designated Nationals (SDN) - Treasury Department,96663869,Individual,material,Juan Cruz,Dr.,"123 Main Street North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI 37539856,Specially Designated Nationals (SDN) - Treasury Department,55159852,Individual,hotel,Sarah Jones,Mrs.,"3699 Daniel Highway Port Andrewport, OR 39456, EE",,,,,,,,,,,,,,http://douglas.com/,Misty Johnson,CV,1998-02-15,Ukraine,BO,https://townsend.com/,TM""" populate_sdn_fallback_data_and_metadata(csv_string) sdn_fallback_hit_count = checkSDNFallback('Juan Cruz', 'North Kristinaport', 'SN') self.assertEqual(sdn_fallback_hit_count, 2)
def test_checksum_check(self): """ Verify that files with the same checksum are not imported Verify that files with different checksums are imported. """ file = self.csv_header + """94734218,Specially Designated Nationals (SDN) - Treasury Department,96663868,Individual,material,Victor Conrad,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI""" metadata = populate_sdn_fallback_data_and_metadata(file) import_timestamp = metadata.import_timestamp self.assertNotEqual(metadata.import_timestamp, None) metadata = populate_sdn_fallback_data_and_metadata(file) self.assertEqual(metadata, None) self.assertEqual(import_timestamp, SDNFallbackMetadata.objects.get(import_state="Current").import_timestamp) file2 = self.csv_header + """94734219,Specially Designated Nationals (SDN) - Treasury Department,96663868,Individual,material,Victor Conrad,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI""" metadata = populate_sdn_fallback_data_and_metadata(file2) self.assertNotEqual(metadata.import_timestamp, None) self.assertNotEqual(import_timestamp, SDNFallbackMetadata.objects.get(import_state="Current").import_timestamp)
def test_on_examples(self): """ Verify the behavior of populate_sdn_fallback_data_and_metadata on some test data """ csv_string = """_id,source,entity_number,type,programs,name,title,addresses,federal_register_notice,start_date,end_date,standard_order,license_requirement,license_policy,call_sign,vessel_type,gross_tonnage,gross_registered_tonnage,vessel_flag,vessel_owner,remarks,source_list_url,alt_names,citizenships,dates_of_birth,nationalities,places_of_birth,source_information_url,ids 94734218,Specially Designated Nationals (SDN) - Treasury Department,96663868,Individual,material,Victor Conrad,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI 37539856,Specially Designated Nationals (SDN) - Treasury Department,55159852,Individual,hotel,Sarah Jones,Mrs.,"3699 Daniel Highway Port Andrewport, OR 39456, EE",,,,,,,,,,,,,,http://douglas.com/,Misty Johnson,CV,1998-02-15,Ukraine,BO,https://townsend.com/,TM 12650118,Specially Designated Nationals (SDN) - Treasury Department,06283056,Individual,west,Jordan King,Dr.,"0916 Matthew Stream Nathanhaven, KS 70796, NE",,,,,,,,,,,,,,https://edwards.com/,Luke Soto,VA,2013-12-23,Liechtenstein,AG,http://www.garner.org/,ET 83041181,Specially Designated Nationals (SDN) - Treasury Department,68151959,Individual,together,Joseph Rodriguez,Miss,"Unit 7378 Box 6650 DPO AA 56444, BD",,,,,,,,,,,,,,http://rodriguez.com/,Debra Mcdonald,LS,1990-10-16,Niger,SN,http://howard.net/,BD 53500519,Specially Designated Nationals (SDN) - Treasury Department,96028582,Individual,so,Joshua Weaver,Dr.,"6378 Robin River Conniechester, AK 67491, CH",,,,,,,,,,,,,,http://www.rose.org/,Chad Carter,PE,1952-02-22,Netherlands Antilles,HN,https://www.fox.com/,SR 02372174,Specially Designated Nationals (SDN) - Treasury Department,28705260,Individual,remain,Joseph Knight,Dr.,"19789 Sims Lodge North Kara, IL 18127, MX",,,,,,,,,,,,,,http://rodriguez.org/,Katelyn Weaver,MZ,1963-09-28,Germany,BN,https://www.williams.com/,KH 01157291,Specially Designated Nationals (SDN) - Treasury Department,26428901,Individual,happen,Derek Washington,Ms.,"6343 James Circle New Timothyton, WV 09301, BN",,,,,,,,,,,,,,http://www.davis.com/,Andrew Cordova,PW,1949-10-02,South Africa,LV,http://www.crawford.com/,SR 54139046,Specially Designated Nationals (SDN) - Treasury Department,84007582,Individual,morning,Michelle Fletcher,Dr.,"9402 Nathan Points Apt. 735 Kelleyfort, CA 29232, BB",,,,,,,,,,,,,,http://www.buck.com/,Christopher Tanner,EE,1965-05-06,Israel,BF,http://www.richardson-hill.com/,MA 62040891,Specially Designated Nationals (SDN) - Treasury Department,39940476,Individual,affect,Christopher Adams,Mx.,"1106 Collins Path Masonfurt, CO 94809, TT",,,,,,,,,,,,,,https://www.lee.com/,Rebecca Romero,TL,1953-12-13,Nepal,HU,https://robinson.org/,OM 09853119,Specially Designated Nationals (SDN) - Treasury Department,89203866,Individual,write,Corey Jacobs,Mx.,"410 Carroll Station Suite 723 Claytonshire, ID 19778, AE",,,,,,,,,,,,,,http://swanson-richardson.com/,Mark Hancock,AM,1977-03-14,Isle of Man,NZ,https://www.mccormick.info/,TV 43915637,Specially Designated Nationals (SDN) - Treasury Department,98733927,Individual,certainly,Ronald Gallagher,Mx.,"9699 Joseph Hill North Marcusburgh, KS 75288, DJ",,,,,,,,,,,,,,http://brown.biz/,Julie Miller,IR,1971-05-06,Montserrat,HR,https://smith.info/,BZ 45297281,Specially Designated Nationals (SDN) - Treasury Department,00673440,Individual,movement,Bobby Drake,Dr.,"441 Jennifer Brooks Joshuafort, MD 72104, TH",,,,,,,,,,,,,,https://banks-bender.com/,Michael Anderson,BI,1914-11-05,French Guiana,ST,https://henry.info/,CD""" populate_sdn_fallback_data_and_metadata(csv_string) self.assertEqual(len(SDNFallbackMetadata.objects.filter()), 2) expected_records = [ ({'victor', 'brock', 'wendy', 'conrad'}, {'91033', '976', 'hi', 'christie', 'apt', 'north', 'kristinaport', 'sn', 'stream', '17472'}, {'SN'}), ({'misty', 'sarah', 'johnson', 'jones'}, {'port', 'ee', 'daniel', 'andrewport', '39456', 'highway', 'or', '3699'}, {'EE'}), ({'soto', 'luke', 'jordan', 'king'}, {'0916', 'matthew', 'nathanhaven', 'ne', '70796', 'ks', 'stream'}, {'NE'}), ({'joseph', 'rodriguez', 'debra', 'mcdonald'}, {'bd', 'dpo', 'unit', '7378', 'box', '56444', '6650', 'aa'}, {'BD'}), ({'chad', 'carter', 'joshua', 'weaver'}, {'67491', '6378', 'conniechester', 'ak', 'river', 'ch', 'robin'}, {'CH'}), ({'weaver', 'knight', 'joseph', 'katelyn'}, {'il', '19789', 'north', 'kara', 'mx', 'lodge', 'sims', '18127'}, {'MX'}), ({'andrew', 'washington', 'derek', 'cordova'}, {'james', 'circle', 'wv', '09301', 'timothyton', 'bn', 'new', '6343'}, {'BN'}), ({'christopher', 'michelle', 'tanner', 'fletcher'}, {'points', '735', 'bb', 'apt', 'nathan', '29232', 'kelleyfort', 'ca', '9402'}, {'BB'}), ({'adams', 'christopher', 'romero', 'rebecca'}, {'collins', '94809', 'tt', 'co', 'path', 'masonfurt', '1106'}, {'TT'}), ({'mark', 'corey', 'jacobs', 'hancock'}, {'claytonshire', '723', '19778', 'station', 'id', '410', 'suite', 'ae', 'carroll'}, {'AE'}), ({'gallagher', 'miller', 'ronald', 'julie'}, {'north', 'dj', 'joseph', '9699', 'hill', '75288', 'ks', 'marcusburgh'}, {'DJ'}), ({'anderson', 'michael', 'bobby', 'drake'}, {'md', 'joshuafort', '72104', 'brooks', 'jennifer', '441', 'th'}, {'TH'}) ] records = [(set(str(record.names).split()), set(str(record.addresses).split()), set(str(record.countries).split())) for record in SDNFallbackData.objects.all()] self.maxDiff = None self.assertCountEqual(expected_records, records)
def handle(self, *args, **options): # download the csv locally, to check size and pass along to import threshold = options['threshold'] url = 'http://api.trade.gov/static/consolidated_screening_list/consolidated.csv' timeout = settings.SDN_CHECK_REQUEST_TIMEOUT with requests.Session() as s: try: download = s.get(url, timeout=timeout) status_code = download.status_code except Timeout as e: logger.warning( "SDNFallback: DOWNLOAD FAILURE: Timeout occurred trying to download SDN csv. " "Timeout threshold (in seconds): %s", timeout) raise except Exception as e: # pylint: disable=broad-except logger.warning( "SDNFallback: DOWNLOAD FAILURE: Exception occurred: [%s]", e) raise if download.status_code != 200: logger.warning( "SDNFallback: DOWNLOAD FAILURE: Status code was: [%s]", status_code) raise Exception( "CSV download url got an unsuccessful response code: ", status_code) with tempfile.TemporaryFile() as temp_csv: temp_csv.write(download.content) file_size_in_bytes = temp_csv.tell( ) # get current position in the file (number of bytes) file_size_in_MB = file_size_in_bytes / 10**6 if file_size_in_MB > threshold: sdn_file_string = download.content.decode('utf-8') with transaction.atomic(): metadata_entry = populate_sdn_fallback_data_and_metadata( sdn_file_string) if metadata_entry: logger.info( 'SDNFallback: IMPORT SUCCESS: Imported SDN CSV. Metadata id %s', metadata_entry.id) logger.info( 'SDNFallback: DOWNLOAD SUCCESS: Successfully downloaded the SDN CSV.' ) self.stdout.write( self.style.SUCCESS( 'SDNFallback: Imported SDN CSV into the SDNFallbackMetadata and SDNFallbackData models.' )) else: logger.warning( "SDNFallback: DOWNLOAD FAILURE: file too small! " "(%f MB vs threshold of %s MB)", file_size_in_MB, threshold) raise Exception( "CSV file download did not meet threshold given")
def test_check_sdn_fallback_address(self, address, match): """ Verify that the following properties are true for addresses: 1. Order of words doesn’t matter 2. Number of times that a given word appears doesn’t matter 3. Punctuation between words or at the beginning/end of a given word doesn’t matter 4. If a subset of words match, it still counts as a match 5. Capitalization doesn’t matter """ # pylint: disable=line-too-long csv_string = """_id,source,entity_number,type,programs,name,title,addresses,federal_register_notice,start_date,end_date,standard_order,license_requirement,license_policy,call_sign,vessel_type,gross_tonnage,gross_registered_tonnage,vessel_flag,vessel_owner,remarks,source_list_url,alt_names,citizenships,dates_of_birth,nationalities,places_of_birth,source_information_url,ids 94734218,Specially Designated Nationals (SDN) - Treasury Department,96663868,Individual,material,Juan M. de la Cruz,Dr.,"17472 Christie Stream Apt. 976 North Kristinaport João 中国, HI 91033, SN",,,,,,,,,,,,,,https://www.juarez-collier.org/,Wendy Brock,DJ,1944-03-05,Faroe Islands,PK,http://richardson-richardson.org/,CI""" # pylint: enable=line-too-long metadata_entry = populate_sdn_fallback_data_and_metadata(csv_string) self.assertEqual(checkSDNFallback("Juan", address, 'SN'), match)