Esempio n. 1
0
def test_fastavro_compatibility_deserialize(
        schema_root: str, schema_identifier: str,
        schemata: cerializer.schemata.CerializerSchemata) -> None:
    # patch for not working avro codec
    cerializer.tests.dev_utils.init_fastavro()
    namespace = schema_identifier.split('.')[0]
    schema_name = schema_identifier.split('.')[1]
    cerializer_codec = cerializer.cerializer.Cerializer(
        cerializer_schemata=schemata,
        namespace=namespace,
        schema_name=schema_name,
    )
    try:
        # mypy things yaml has no attribute unsafe_load_all, which is not true
        data_all = yaml.unsafe_load_all(  # type: ignore
            open(os.path.join(schema_root, 'example.yaml')))
        SCHEMA_FAVRO = yaml.load(open(os.path.join(schema_root,
                                                   'schema.yaml')),
                                 Loader=yaml.Loader)
        for data in data_all:
            output_fastavro = io.BytesIO()
            fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data)
            output_fastavro.seek(0)
            deserialized = cerializer_codec.deserialize(
                output_fastavro.getvalue())
            output_fastavro.seek(0)
            assert deserialized == fastavro.schemaless_reader(
                output_fastavro, SCHEMA_FAVRO)
    except FileNotFoundError:
        logging.warning(
            'Missing schema or Example file for schema == %s',
            schema_name,
        )
        assert False
Esempio n. 2
0
    def loadHDF5(fn, **kwargs):

        with h5py.File(fn, 'r') as f:
            locs = f['locs'][:]

            info_fn = os.path.splitext(fn)[0] + ".yaml"
            with open(info_fn, "r") as file:
                if hasattr(yaml, 'unsafe_load'):
                    obj = yaml.unsafe_load_all(file)
                else:
                    obj = yaml.load_all(file)
                obj = list(obj)[0]
                imgshape = np.array([obj['Height'], obj['Width']])

            if 'z' in locs.dtype.fields:
                dims = 3
            else:
                dims = 2

            ds = Dataset(len(locs),
                         dims,
                         imgshape,
                         haveSigma='sx' in locs.dtype.fields,
                         **kwargs)
            ds.photons[:] = locs['photons']
            ds.background[:] = locs['bg']
            ds.pos[:, 0] = locs['x']
            ds.pos[:, 1] = locs['y']
            if dims == 3:
                ds.pos[:, 2] = locs['z']
                ds.crlb.pos[:, 2] = locs['lpz']

            ds.crlb.pos[:, 0] = locs['lpx']
            ds.crlb.pos[:, 1] = locs['lpy']
            ds.crlb.photons = locs['lI']
            ds.crlb.bg = locs['lbg']

            if 'lsx' in locs.dtype.fields:  # picasso doesnt save crlb for the sigma fits
                ds.crlb.sigma[:, 0] = locs['lsx']
                ds.crlb.sigma[:, 1] = locs['lsy']

            if ds.hasPerSpotSigma():
                ds.data.estim.sigma[:, 0] = locs['sx']
                ds.data.estim.sigma[:, 1] = locs['sy']

            ds.frame[:] = locs['frame']

            if 'chisq' in locs.dtype.fields:
                ds.data.chisq = locs['chisq']

            if 'group' in locs.dtype.fields:
                ds.data.group = locs['group']

        ds['locs_path'] = fn
        return ds
Esempio n. 3
0
def test_load_yaml_config_raises_error_if_unsafe_yaml():
    """Test error raised if unsafe YAML."""
    with open(YAML_PATH, "w") as fp:
        fp.write("- !!python/object/apply:os.system []")

    with patch.object(
            os,
            "system") as system_mock, contextlib.suppress(HomeAssistantError):
        config_util.load_yaml_config_file(YAML_PATH)

    assert len(system_mock.mock_calls) == 0

    # Here we validate that the test above is a good test
    # since previously the syntax was not valid
    with open(YAML_PATH) as fp, patch.object(os, "system") as system_mock:
        list(yaml.unsafe_load_all(fp))

    assert len(system_mock.mock_calls) == 1
Esempio n. 4
0
def benchmark(number: int = 1000, preheat_number: int = 10) -> str:
    results = []
    report = []
    report_json = []
    for schema_identifier, path in tqdm.tqdm(list(
            cerializer.utils.iterate_over_schemata()),
                                             desc='Benchmarking'):
        setup = f'''
import cerializer.tests.dev_utils as t
import cerializer.tests.benchmark as b
import yaml
import cerializer.cerializer
import fastavro
import os
import io
import json


from __main__ import schemata as CERIALIZER_SCHEMATA


t.init_fastavro()
schema_identifier = '{schema_identifier}'
#CERIALIZER_SCHEMATA = b.schemata()
cerializer_codec = cerializer.cerializer.Cerializer(
	cerializer_schemata = CERIALIZER_SCHEMATA,
	namespace = schema_identifier.split('.')[0],
	schema_name = schema_identifier.split('.')[1]
)
data = list(yaml.unsafe_load_all(open(os.path.join('{path}', 'example.yaml'))))[0]  # type: ignore
SCHEMA_FAVRO = fastavro.parse_schema(
	yaml.load(open(os.path.join('{path}', 'schema.yaml')), Loader = yaml.Loader)
)

for i in range({preheat_number}):
	output_cerializer = cerializer_codec.serialize(data)
	deserialized_data = cerializer_codec.deserialize(output_cerializer)
	output_fastavro = io.BytesIO()
	fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data)
	x = output_fastavro.getvalue()
	output_fastavro.seek(0)
	res = fastavro.schemaless_reader(output_fastavro, SCHEMA_FAVRO)


import io
import json
import yaml
import avro
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
import datetime
schema = yaml.load(open(os.path.join('{path}', 'schema.yaml')), Loader = yaml.Loader)
try:
	data_avro = yaml.load(open(os.path.join('{path}', 'example_avro.yaml')), Loader = yaml.Loader)
except:
	data_avro = data
parsed_schema = avro.schema.parse(json.dumps(schema))


		'''

        stmt_avro = '''
output_avro = io.BytesIO()
writer = avro.io.DatumWriter(parsed_schema)
encoder = avro.io.BinaryEncoder(output_avro)
writer.write(data_avro, encoder)
raw_bytes = output_avro.getvalue()
bytes_reader = io.BytesIO(raw_bytes)
decoder = avro.io.BinaryDecoder(bytes_reader)
reader = avro.io.DatumReader(parsed_schema)
data_deserialized = reader.read(decoder)
		'''

        stmt_cerializer = '''
output_cerializer = cerializer_codec.serialize(data)
deserialized_data = cerializer_codec.deserialize(output_cerializer)
		'''

        stmt_fastavro = '''
output_fastavro = io.BytesIO()
fastavro.schemaless_writer(output_fastavro, SCHEMA_FAVRO, data)
x = output_fastavro.getvalue()
output_fastavro.seek(0)
res = fastavro.schemaless_reader(output_fastavro, SCHEMA_FAVRO)
		'''

        stmt_json = '''
json_encoded = json.dumps(data)
json_decoded = json.loads(json_encoded)
		'''
        # we do this since we do first one half and the second half
        half_number = int(number / 2)
        result_cerializer_1 = timeit.timeit(stmt=stmt_cerializer,
                                            setup=setup,
                                            number=half_number)
        result_fastavro_1 = timeit.timeit(stmt=stmt_fastavro,
                                          setup=setup,
                                          number=half_number)
        result_avro_1 = timeit.timeit(stmt=stmt_avro,
                                      setup=setup,
                                      number=half_number)
        result_fastavro_2 = timeit.timeit(stmt=stmt_fastavro,
                                          setup=setup,
                                          number=half_number)
        result_avro_2 = timeit.timeit(stmt=stmt_avro,
                                      setup=setup,
                                      number=half_number)
        result_cerializer_2 = timeit.timeit(stmt=stmt_cerializer,
                                            setup=setup,
                                            number=half_number)

        data = list(
            yaml.unsafe_load_all(open(os.path.join(path, 'example.yaml'))))[0]
        try:
            import json
            # this will fail if the data has components that are not JSON serializable such as datetime
            serialized = json.dumps(data)
            result_json_1 = 0 if True else timeit.timeit(
                stmt=stmt_json, setup=setup, number=half_number)
            result_json_2 = 0 if True else timeit.timeit(
                stmt=stmt_json, setup=setup, number=half_number)
            result_json = result_json_1 + result_json_2
        except:
            result_json = 0

        result_cerializer = result_cerializer_1 + result_cerializer_2
        result_fastavro = result_fastavro_1 + result_fastavro_2
        result_avro = result_avro_1 + result_avro_2
        maximum = max(result_cerializer, result_fastavro, result_avro)
        max_json = max(result_cerializer, result_json)

        if result_json:
            report_json.append(
                f'{schema_identifier.ljust(36, " ")},{result_cerializer/max_json},{result_json/max_json}'
            )

        results.append(
            f'{schema_identifier.ljust(36, " ")},{result_cerializer/maximum},{result_fastavro/maximum},{result_avro/maximum}'
        )
    for r in results:
        report.append(r)
    benchmark_header = '============================== BENCHMARK RESULTS =============================='
    benchmark_header_json = '=========================== BENCHMARK RESULTS JSON ============================'
    return benchmark_header + '\n' + '\n'.join(
        report) + '\n' + '\n' + benchmark_header_json + '\n' + '\n'.join(
            report_json)
Esempio n. 5
0
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=yaml.unsafe_load(..) decodeFormat=YAML decodeMayExecuteInput
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=yaml.full_load(..) decodeFormat=YAML decodeMayExecuteInput

# Safe:
yaml.load(payload, yaml.SafeLoader)  # $ decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML
yaml.load(payload, Loader=yaml.SafeLoader)  # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML
yaml.load(payload, yaml.BaseLoader)  # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=yaml.safe_load(..) decodeFormat=YAML

################################################################################
# load_all variants
################################################################################

# Unsafe:
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=yaml.load_all(..) decodeFormat=YAML decodeMayExecuteInput
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=yaml.unsafe_load_all(..) decodeFormat=YAML decodeMayExecuteInput
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=yaml.full_load_all(..) decodeFormat=YAML decodeMayExecuteInput

# Safe:
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=yaml.safe_load_all(..) decodeFormat=YAML

################################################################################
# C-based loaders with `libyaml`
################################################################################

# Unsafe:
yaml.load(payload, yaml.CLoader)  # $ decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
yaml.load(payload, yaml.CFullLoader)  # $ decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML decodeMayExecuteInput

# Safe:
yaml.load(payload, yaml.CSafeLoader)  # $decodeInput=payload decodeOutput=yaml.load(..) decodeFormat=YAML