def test_execute_zip(self, mock_after_chain_invoke): chain = StetlTestCase.get_chain(self.etl) chain.run() # ZIP file contains two GML files, both should be extracted; count is 3 because of final # call, so the ZipArchiveExpander can indicate that no more files can be found. self.assertTrue(mock_after_chain_invoke.called) self.assertEqual(2, mock_after_chain_invoke.call_count) # Check if temp dir exists section = StetlTestCase.get_section(chain, 2) target_dir = self.etl.configdict.get(section, 'target_dir') self.assertTrue(os.path.exists(target_dir)) file_objects = os.listdir(target_dir) # 3 XML files in archive self.assertEqual(3, len(file_objects)) for file_object in file_objects: file_object_path = os.path.join(target_dir, file_object) self.assertTrue(file_object.startswith('0221LIG')) self.assertTrue(file_object.endswith('.xml')) self.assertTrue(os.path.exists(file_object_path)) os.remove(file_object_path)
def test_class(self): chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain, 1) class_name = self.etl.configdict.get(section, 'class') self.assertEqual('filters.stringfilter.StringSubstitutionFilter', class_name)
def test_execute(self, mock_call): chain = StetlTestCase.get_chain(self.etl) chain.run() self.assertTrue(mock_call.called) self.assertEqual(1, mock_call.call_count) # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 5) self.assertEqual(list[0], 'ogr2ogr') # Compare command line arguments with config section = StetlTestCase.get_section(chain, -1) file_path = self.etl.configdict.get(StetlTestCase.get_section(chain), 'file_path') # Certain options should not occur self.assertFalse('-spat' in list) self.assertFalse('-lco' in list) # Destination format self.assertTrue('-f' in list) f_idx = list.index('-f') dest_format = self.etl.configdict.get(section, 'dest_format') self.assertEqual(list[f_idx + 1], dest_format) # Destination datasource dest_data_source = self.etl.configdict.get(section, 'dest_data_source') self.assertEqual(list[f_idx + 2], dest_data_source) # Source datasource self.assertEqual(list[-1], file_path)
def test_class(self): chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain) class_name = self.etl.configdict.get(section, 'class') self.assertEqual('stetl.inputs.fileinput.XmlElementStreamerFileInput', class_name)
def test_class(self): chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain, -1) class_name = self.etl.configdict.get(section, 'class') self.assertEqual('stetl.outputs.execoutput.Ogr2OgrExecOutput', class_name)
def test_class(self): chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain, 1) class_name = self.etl.configdict.get(section, 'class') self.assertEqual('filters.zipfileextractor.ZipFileExtractor', class_name)
def test_class(self): chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain, 1) class_name = self.etl.configdict.get(section, 'class') self.assertEqual('stetl.filters.sieve.AttrValueRecordSieve', class_name)
def test_execute_vsizip(self, mock_after_chain_invoke): chain = StetlTestCase.get_chain(self.etl, index=1) chain.run() # ZIP file contains one XML file and is extracted; count is 2 because of final # call, so the VsiFileExtractor can indicate that no more files can be found. self.assertTrue(mock_after_chain_invoke.called) self.assertEqual(2, mock_after_chain_invoke.call_count) # Check if temp file exists section = StetlTestCase.get_section(chain, 2) target_dir = self.etl.configdict.get(section, 'target_dir') self.assertTrue(os.path.exists(target_dir)) file_objects = os.listdir(target_dir) # 2 XML files in archive self.assertEqual(2, len(file_objects)) for file_object in file_objects: file_object_path = os.path.join(target_dir, file_object) self.assertTrue(file_object.startswith('0221WPL')) self.assertTrue(file_object.endswith('.xml')) self.assertTrue(os.path.exists(file_object_path)) os.remove(file_object_path)
def test_class(self): chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain, 1) class_name = self.etl.configdict.get(section, 'class') self.assertEqual('stetl.filters.execfilter.CommandExecFilter', class_name)
def test_execute_no_cleanup(self, mock_call): chain = StetlTestCase.get_chain(self.etl, 7) file_path = self.etl.configdict.get(StetlTestCase.get_section(chain), 'file_path') chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 5) # Check if input file still exists self.assertTrue(os.path.exists(file_path))
def test_execute(self): # Read content of input file chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain) fn = self.etl.configdict.get(section, 'file_path') with open(fn, 'r') as f: contents = f.read() self.etl.run() self.assertGreater(len(sys.stdout.getvalue()), 0) # Assert includes last linebreak from stdout, due to print function self.assertEqual(sys.stdout.getvalue(), contents + '\n')
def test_execute(self, mock_after_chain_invoke): chain = StetlTestCase.get_chain(self.etl) chain.run() # ZIP file contains two GML files, both should be extracted; count is 3 because of final # call, so the ZipFileExtractor can indicate that no more files can be found. self.assertTrue(mock_after_chain_invoke.called) self.assertEqual(3, mock_after_chain_invoke.call_count) # Check if temp file exists section = StetlTestCase.get_section(chain, 1) file_path = self.etl.configdict.get(section, 'file_path') self.assertTrue(os.path.exists(file_path)) os.remove(file_path)
def test_execute(self, mock_tx_execute): # Read content of input file chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain) fn = self.etl.configdict.get(section, 'file_path') with open(fn, 'r') as f: contents = f.read() self.etl.run() self.assertTrue(mock_tx_execute.called) self.assertEqual(1, mock_tx_execute.call_count) args, kwargs = mock_tx_execute.call_args self.assertEqual(contents, args[1])
def test_execute_all_xml(self, mock_after_chain_invoke): chain = StetlTestCase.get_chain(self.etl, index=2) chain.run() # ZIP file contains 18 XML files in various zipfiles and is extracted; count is 19 because of final # call, so the VsiFileExtractor can indicate that no more files can be found. self.assertTrue(mock_after_chain_invoke.called) self.assertEqual(19, mock_after_chain_invoke.call_count) # Check if temp file exists section = StetlTestCase.get_section(chain, 1) file_path = self.etl.configdict.get(section, 'file_path') self.assertTrue(os.path.exists(file_path)) os.remove(file_path)
def test_execute(self, mock_call): # Read content of input file chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain) fn = self.etl.configdict.get(section, 'file_path') with open(fn, 'r') as f: contents = f.read() self.etl.run() self.assertTrue(mock_call.called) self.assertEqual(1, mock_call.call_count) args, kwargs = mock_call.call_args self.assertEqual(contents, args[0])
def test_execute_extent(self, mock_call): chain = StetlTestCase.get_chain(self.etl, 2) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 10) # Check spatial extent section = StetlTestCase.get_section(chain, -1) self.assertTrue('-spat' in list) spat_idx = list.index('-spat') spatial_extent = self.etl.configdict.get(section, 'spatial_extent') self.assertEqual(spatial_extent.split(), list[spat_idx + 1:spat_idx + 5])
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list # most Packets are empty, but we need to find 2 filled with etree docs doc_packet_list = [] for packet in packet_list: if packet.data is not None: doc_packet_list.append(packet) # Assertion: we need to see 2 documents self.assertEqual(len(doc_packet_list), 4) namespaces={'gml': 'http://www.opengis.net/gml/3.2', 'top10nl': 'http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0'} # Assertion: first doc has two FeatureMember elements with proper Namespaces xml_doc1 = doc_packet_list[0].data feature_elms = xml_doc1.xpath('/gml:FeatureCollectionT10NL/top10nl:FeatureMember', namespaces=namespaces) self.assertEqual(len(feature_elms), 2) # Assertion: last doc has one FeatureMember with proper Namespaces last = len(doc_packet_list) - 1 xml_doc2 = doc_packet_list[last].data feature_elms = xml_doc2.xpath('/gml:FeatureCollectionT10NL/top10nl:FeatureMember', namespaces=namespaces) self.assertEqual(len(feature_elms), 1) # Assertion: first doc has end_of_doc but not end_of_stream set self.assertTrue(doc_packet_list[0].end_of_doc, msg='doc1: end_of_doc if False') self.assertFalse(doc_packet_list[0].end_of_stream, msg='doc1: end_of_stream is True') # Assertion: last doc has end_of_doc and end_of_stream set self.assertTrue(doc_packet_list[last].end_of_doc, msg='doc2: end_of_doc if False') self.assertTrue(doc_packet_list[last].end_of_stream, msg='doc2: end_of_stream if False')
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() result = sys.stdout.getvalue().strip().split('\n') self.assertEqual(len(result), 431)
def test_execute(self): # Read content of input file chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain) fn = self.etl.configdict.get(section, 'file_path') with open(fn, 'r') as f: contents = f.read() # Invoke first component of chain chain.first_comp.do_init() packet = Packet() packet.init() packet.component = chain.first_comp chain.first_comp.before_invoke(packet) packet = chain.first_comp.invoke(packet) self.assertEqual(packet.data, contents)
def test_legacy_class(self): cfg_dict = { 'config_file': os.path.join(self.curr_dir, 'configs/zipfileextractordeprecated.cfg') } self.etl = ETL(cfg_dict) chain = StetlTestCase.get_chain(self.etl) section = StetlTestCase.get_section(chain, 1) class_name = self.etl.configdict.get(section, 'class') # Deprecated class name in config self.assertEqual('stetl.filters.zipfileextractor.ZipFileExtractor', class_name) # Assigned to new version in fileextractor module! self.assertTrue(isinstance(chain.first_comp.next, ZipFileExtractor))
def test_execute_cleanup(self, mock_call): # Copy test file to temporary location, because it will be cleaned up orig_file_path = self.etl.configdict.get(StetlTestCase.get_section(StetlTestCase.get_chain(self.etl)), 'file_path') orig_file_ext = os.path.splitext(orig_file_path) temp_file_path = orig_file_ext[0] + "_temp" + orig_file_ext[1] shutil.copy(orig_file_path, temp_file_path) chain = StetlTestCase.get_chain(self.etl, 5) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 5) # Check if temp file has been removed self.assertFalse(os.path.exists(temp_file_path))
def test_execute_gfs(self, mock_call): chain = StetlTestCase.get_chain(self.etl, 4) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 5) # Check if GFS file exists, and clean it up file_path = self.etl.configdict.get(StetlTestCase.get_section(chain), 'file_path') file_ext = os.path.splitext(file_path) gfs_path = file_ext[0] + '.gfs' self.assertTrue(os.path.exists(gfs_path)) os.remove(gfs_path) self.assertFalse(os.path.exists(gfs_path))
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual(packet_list[0].data, 'Hello NLExtract!')
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual(packet_list[0].data, '/vsizip/temp/bagobject.zip/pand.xml')
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual(packet_list[0].data.strip(), "foo/bar")
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual(str(packet_list[0].data), "{'elemtype': 'BuildingInstallation', 'featurecount': '1162'}")
def test_instance(self): # Chain #1 - Simple Case chain = StetlTestCase.get_chain(self.etl) merger_comp = chain.first_comp self.assertTrue(isinstance(merger_comp, Merger)) self.assertEqual(len(merger_comp.children), 2) self.assertTrue(isinstance(merger_comp.children[0][0], LineStreamerFileInput), "Next is not LineStreamerFileInput") self.assertTrue(isinstance(merger_comp.children[1][0], LineStreamerFileInput), "Next is not LineStreamerFileInput") self.assertTrue(isinstance(merger_comp.children[0][1], LineStreamerFileInput), "Next is not LineStreamerFileInput") self.assertTrue(isinstance(merger_comp.children[1][1], LineStreamerFileInput), "Next is not LineStreamerFileInput") self.assertTrue(isinstance(merger_comp.children[0][1].next, StandardOutput), "Next is not StandardOutput") self.assertTrue(isinstance(merger_comp.children[1][1].next, StandardOutput), "Next is not StandardOutput") # Flag for End-of-Stream 2 subcomps self.assertEqual(merger_comp.end_count, 2) # Chain #2 - SubChain Case chain = StetlTestCase.get_chain(self.etl, index=1) merger_comp = chain.first_comp children = merger_comp.children self.assertTrue(isinstance(merger_comp, Merger)) self.assertEqual(len(merger_comp.children), 2, "Child count is not 2") self.assertTrue(isinstance(merger_comp.first(children[0]), LineStreamerFileInput), "Next is not LineStreamerFileInput") self.assertTrue(isinstance(merger_comp.first(children[1]), LineStreamerFileInput), "Next is not LineStreamerFileInput") self.assertTrue(isinstance(merger_comp.children[0][0].next, NullFilter), "Next is not NullFilter") self.assertTrue(isinstance(merger_comp.first(children[1]).next, NullFilter), "Next is not NullFilter") self.assertTrue(isinstance(merger_comp.last(children[1]).next, StandardOutput), "Next is not StandardOutput") self.assertTrue(isinstance(merger_comp.children[1][1].next, StandardOutput), "Next is not StandardOutput") # Flag for End-of-Stream 2 subcomps self.assertEqual(merger_comp.end_count, 2)
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual(packet_list[0].data.get('elemtype'), "BuildingInstallation") self.assertEqual(int(packet_list[0].data.get('featurecount')), 1162)
def test_format_args(self): chain = StetlTestCase.get_chain(self.etl, 1) chain.first_comp.do_init() packet = Packet() packet.init() packet.component = chain.first_comp chain.first_comp.before_invoke(packet) packet = chain.first_comp.invoke(packet) self.assertEqual(packet.data, 'Hello NLExtract!')
def test_instance(self): chain = StetlTestCase.get_chain(self.etl) splitter_comp = chain.first_comp.next self.assertTrue(isinstance(splitter_comp, Splitter)) # The next is a list of multiple Outputs self.assertEqual(len(splitter_comp.next), 2) self.assertTrue(isinstance(splitter_comp.next[0], StandardOutput)) self.assertTrue(isinstance(splitter_comp.next[1], StandardOutput))
def test_xml_single_name_nested_filter(self): chain = StetlTestCase.get_chain(self.etl, 5) chain.run() result = sys.stdout.getvalue().strip().split('\n') self.assertEqual(len(result), 1) self.assertEqual( result[0], '/vsizip/{/vsizip/{tests/data/vsizipinput.zip}/9999WPL15092020.zip}/0221WPL15092020-000001.xml' )
def test_no_namespace(self): chain = StetlTestCase.get_chain(self.etl, 2) chain.run() # Check the number of elements result = sys.stdout.getvalue().strip().split('\n') self.assertEqual(len(result), 3) # Check the actual elements self.check_execution_result(chain, result, False)
def test_vsizipfilter(self, mock_call): chain = StetlTestCase.get_chain(self.etl, 1) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) # Check whether the vsizip filter has been applied to the input file self.assertTrue(list[-1].startswith('/vsizip/tests/data/bestuurlijkegrenzen.zip/'))
def test_ogrexecoutput_lco_first(self, mock_call): chain = StetlTestCase.get_chain(self.etl) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) # Check whether layer creation options are absent self.assertFalse('-lco' in list)
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual( str(packet_list[0].data), "{'elemtype': 'BuildingInstallation', 'featurecount': '1162'}")
def test_xml_single_name__nested_filter(self): chain = StetlTestCase.get_chain(self.etl, 4) chain.run() result = sys.stdout.getvalue().strip().split('\n') self.assertEqual(len(result), 1) self.assertEqual( result[0], '/vsizip/{tests/data/vsizipinput.zip}/Leveringsdocument-BAG-Extract.xml' )
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() # Check if temp file exists section = StetlTestCase.get_section(chain, 1) file_path = self.etl.configdict.get(section, 'file_path') self.assertTrue(os.path.exists(file_path)) # Compare temp file to input file byte by byte section = StetlTestCase.get_section(chain) orig_file_path = self.etl.configdict.get(section, 'file_path') with open(file_path, 'rb') as f: file = f.read() with open(orig_file_path, 'rb') as f: orig_file = f.read() self.assertEqual(file, orig_file) # Remove temp file os.remove(file_path)
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() result = sys.stdout.getvalue().split('\n') # The total number of lines written to stdout is twice the number of lines in the text file, # because the print statement is used. This causes an extra linebreak to written. The number # to assert, is even one higher, because of the split statement above. The last "line" is an # empty string. self.assertEqual(len(result), 37)
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list # Two city records filtered out self.assertEqual(len(packet_list[0].data), 2) self.assertEqual(str(packet_list[0].data[0]['city']), "amsterdam") self.assertEqual(str(packet_list[0].data[1]['city']), "otterlo")
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() # Result should be merged lines from both files result = sys.stdout.getvalue().split('\n') # Strip empty lines result = [s for s in result if (s or len(s) > 0)] # Total should be twice of linecount non-empty lines in input file self.assertEqual(len(result), 36)
def test_execute_options(self, mock_call): chain = StetlTestCase.get_chain(self.etl, 3) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 12) # Check spatial extent self.assertTrue('-append' in list) self.assertTrue('-gt' in list) self.assertTrue('--config' in list)
def test_chain_assembly(self): chain = StetlTestCase.get_chain(self.etl, assemble=False) chain.assemble() self.assertIsNotNone(chain.first_comp) self.assertIsNotNone(chain.cur_comp) self.assertIsNotNone(chain.first_comp.next) self.assertIsNone(chain.cur_comp.next) comp = chain.first_comp while comp.next is not None: comp = comp.next self.assertIs(comp, chain.cur_comp)
def test_execute_lco(self, mock_call): chain = StetlTestCase.get_chain(self.etl, 1) chain.run() # Check command line args, kwargs = mock_call.call_args list = self.parse_command(args[0]) self.assertEqual(len(list), 9) # Check layer creation options self.assertTrue('-lco' in list) lco_indices = [i for i, x in enumerate(list) if x == '-lco'] self.assertEqual(len(lco_indices), 2) self.assertEqual(list[lco_indices[0] + 1], 'LAUNDER=YES') self.assertEqual(list[lco_indices[1] + 1], 'PRECISION=NO')
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.run() buffer_filter = chain.get_by_class(PacketBuffer) packet_list = buffer_filter.packet_list self.assertEqual(len(packet_list), 1) # Inspect the result result = sys.stdout.getvalue().strip().split('\n') self.assertEqual(len(result), 6) pattern = r'<funcgeb id="[^"]+"/>' for i in range(2, 5): self.assertIsNotNone(re.match(pattern, result[i].strip())) self.assertTrue(result[1].strip().startswith('<dummy ')) self.assertEqual(result[5].strip(), '</dummy>')
def test_execute(self): chain = StetlTestCase.get_chain(self.etl) chain.first_comp.do_init() packet = Packet() packet.init() packet.component = chain.first_comp chain.first_comp.before_invoke(packet) packet = chain.first_comp.invoke(packet) self.assertIsNotNone(packet.data) self.assertIsInstance(packet.data, dict) self.assertTrue('menu' in packet.data) self.assertIsNotNone(packet.data['menu']) mydict = packet.data['menu'] self.assertEqual(len(mydict), 3) self.assertTrue('id' in mydict) self.assertTrue('value' in mydict) self.assertTrue('popup' in mydict)
def test_instance(self): chain = StetlTestCase.get_chain(self.etl) self.assertTrue(isinstance(chain.get_by_index(1), StringSubstitutionFilter))
def test_instance(self): chain = StetlTestCase.get_chain(self.etl) self.assertTrue(isinstance(chain.cur_comp, StandardOutput))