def test_extract_with_tsv_snippets(tmpdir: py._path.local.LocalPath, base_data: typing.Dict[str, str]) -> None: xml_data = """ <HouseFile> <ProgramInformation> <Client> <StreetAddress> <PostalCode>H0H 0H0</PostalCode> </StreetAddress> </Client> </ProgramInformation> <Program> <Results> <Tsv> <ERSRating value='257' /> </Tsv> </Results> </Program> </HouseFile> """ base_data['RAW_XML'] = xml_data input_file = tmpdir.join('input.csv') with open(input_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=list(base_data.keys())) writer.writeheader() writer.writerow(base_data) output = next(extractor.extract_data(str(input_file))) assert output assert output['ersRating'] == '257' assert output['forwardSortationArea'] == 'H0H'
def energuide_zip_fixture(tmpdir: py._path.local.LocalPath, energuide_fixture: str) -> str: outfile = os.path.join(tmpdir, 'scrubbed_random_sample_xml.zip') data = extractor.extract_data(energuide_fixture) extractor.write_data(data, outfile) return outfile
def test_extract_with_snippets(tmpdir: py._path.local.LocalPath, base_data: typing.Dict[str, str]) -> None: xml_data = """ <HouseFile><House><Components><Ceiling> <Label>Attic</Label> <Construction> <Type> <English>Attic/gable</English> <French>Combles/pignon</French> </Type> <CeilingType idref="Code 3" rValue="2.9463" nominalInsulation="2.864">2401191000</CeilingType> </Construction> </Ceiling></Components></House></HouseFile> """ base_data['RAW_XML'] = xml_data input_file = tmpdir.join('input.csv') with open(input_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=list(base_data.keys())) writer.writeheader() writer.writerow(base_data) output = next(extractor.extract_data(str(input_file))) assert output assert output['ceilings'] assert output['upgrades'] == []
def test_empty_to_none( tmpdir: py._path.local.LocalPath, nullable_data: typing.Dict[str, typing.Optional[str]]) -> None: filepath = os.path.join(tmpdir, 'sample.csv') _write_csv(filepath, nullable_data) output = extractor.extract_data(filepath) row = next(output) assert row assert row['MODIFICATIONDATE'] is None
def extract(infile: str, outfile: str, progress: bool) -> None: LOGGER.info(f'Extracting data from {infile} into {outfile}') if os.path.exists(outfile): LOGGER.warning(f'Warning: file {outfile} exists. Overwriting.') extracted = extractor.extract_data(infile, show_progress=progress) records_written, records_failed = extractor.write_data(extracted, outfile) LOGGER.info( f'Finished extracting data into {outfile}. ' f'Successfully written: {records_written}. Failed: {records_failed}')
def energuide_zip_fixture(tmpdir: py._path.local.LocalPath, energuide_fixture: str) -> str: outfile = f'{tmpdir}/randomized_energuide_data.zip' data = extractor.extract_data(energuide_fixture) extractor.write_data(data, outfile) return outfile
def test_extract_missing(missing_filepath: str) -> None: output = extractor.extract_data(missing_filepath) result = [x for x in output] assert result == [None]
def test_purge_unknown(extra_filepath: str) -> None: output = next(extractor.extract_data(extra_filepath)) assert output item = dict(output) assert 'other_1' not in item
def test_extract_valid(valid_filepath: str) -> None: output = next(extractor.extract_data(valid_filepath)) assert output item = dict(output) assert 'EVAL_ID' in item