Ejemplo n.º 1
0
 def __init__(self, schema_json=None, key_json=None, value_json=None):
     """
     This class is initiated with a json string representation of a
     RADAR-base schema.
     Parameters
     __________
     schema_json: string (json)
         A json string representation of a key-value pair RADAR-base schema
     key_json: string (json)
         A json string representation of a key RADAR-base schema
     value_json: string (json)
         A json string representation of a value RADAR-base schema
     __________
     Either schema_json or value_json must be specified. key_json may also
     be given alongside value_json.
     """
     if schema_json:
         self.schema = schema.Parse(schema_json)
     elif value_json:
         if key_json:
             self.schema = schema.Parse(
                 combine_key_value_schemas(key_json, value_json))
         else:
             self.schema = self._FakeSchema()
             self.schema.fields = [self._FakeSchema()]
             self.schema.fields[0].type = schema.Parse(value_json)
             self.schema.fields[0].name = 'value'
     else:
         raise ValueError('Please provide json representation of a'
                          'key-value schema or a value schema with or'
                          'without a seperate key schema.')
Ejemplo n.º 2
0
 def testOtherAttributes(self):
     correct = 0
     props = {}
     for example in OTHER_PROP_EXAMPLES:
         original_schema = schema.Parse(example.schema_string)
         round_trip_schema = schema.Parse(str(original_schema))
         self.assertEqual(original_schema.other_props,
                          round_trip_schema.other_props)
         if original_schema.type == "record":
             field_props = 0
             for f in original_schema.fields:
                 if f.other_props:
                     props.update(f.other_props)
                     field_props += 1
             self.assertEqual(field_props, len(original_schema.fields))
         if original_schema.other_props:
             props.update(original_schema.other_props)
             correct += 1
     for k in props:
         v = props[k]
         if k == "cp_boolean":
             self.assertEqual(type(v), bool)
         elif k == "cp_int":
             self.assertEqual(type(v), int)
         elif k == "cp_object":
             self.assertEqual(type(v), dict)
         elif k == "cp_float":
             self.assertEqual(type(v), float)
         elif k == "cp_array":
             self.assertEqual(type(v), list)
     self.assertEqual(correct, len(OTHER_PROP_EXAMPLES))
Ejemplo n.º 3
0
    def test_schema_versions(self):
        logger.info("Schemas")
        name = 'property'
        self.register = Registry(path_configs)

        r_schema_1 = self.register.get(name, version=1)
        r_schema_2 = self.register.get(name, version=2)
        r_schema_3 = self.register.get(name, version=3)
        r_schema_4 = self.register.get(name, version=4)
        r_schema_5 = self.register.get(name, version=5)

        _file_test_1 = schema.Parse(_avro_test_1)
        _file_test_2 = schema.Parse(_avro_test_2)
        _file_test_3 = schema.Parse(_avro_test_3)
        _file_test_4 = schema.Parse(_avro_test_4)
        _file_test_5 = schema.Parse(_avro_test_5)

        self.assertEqual(r_schema_1, _file_test_1)
        self.assertEqual(r_schema_2, _file_test_2)
        self.assertEqual(r_schema_3, _file_test_3)
        self.assertEqual(r_schema_4, _file_test_4)
        self.assertEqual(r_schema_5, _file_test_5)

        self.assertRaises(SchemaVersionNotFound,
                          lambda: self.register.get(name, version=6))
Ejemplo n.º 4
0
    def testValidCastToStringAfterParse(self):
        """
    Test that the string generated by an Avro Schema object
    is, in fact, a valid Avro schema.
    """
        correct = 0
        for example in VALID_EXAMPLES:
            schema_data = schema.Parse(example.schema_string)
            schema.Parse(str(schema_data))
            correct += 1

        fail_msg = "Cast to string success on %d out of %d schemas" % \
          (correct, len(VALID_EXAMPLES))
        self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg)
Ejemplo n.º 5
0
  def testUnknownSymbol(self):
    writer_schema = schema.Parse("""\
      {"type": "enum", "name": "Test",
       "symbols": ["FOO", "BAR"]}""")
    datum_to_write = 'FOO'

    reader_schema = schema.Parse("""\
      {"type": "enum", "name": "Test",
       "symbols": ["BAR", "BAZ"]}""")

    writer, encoder, datum_writer = write_datum(datum_to_write, writer_schema)
    reader = io.BytesIO(writer.getvalue())
    decoder = avro_io.BinaryDecoder(reader)
    datum_reader = avro_io.DatumReader(writer_schema, reader_schema)
    self.assertRaises(avro_io.SchemaResolutionException, datum_reader.read, decoder)
def check_skip_number(number_type):
    logging.debug('Testing skip number for %s', number_type)
    correct = 0
    for value_to_skip, hex_encoding in BINARY_ENCODINGS:
        VALUE_TO_READ = 6253
        logging.debug('Value to Skip: %d', value_to_skip)

        # write the value to skip and a known value
        writer_schema = schema.Parse('"%s"' % number_type.lower())
        writer, encoder, datum_writer = write_datum(value_to_skip,
                                                    writer_schema)
        datum_writer.write(VALUE_TO_READ, encoder)

        # skip the value
        reader = io.BytesIO(writer.getvalue())
        decoder = avro_io.BinaryDecoder(reader)
        decoder.skip_long()

        # read data from string buffer
        datum_reader = avro_io.DatumReader(writer_schema)
        read_value = datum_reader.read(decoder)

        logging.debug('Read Value: %d', read_value)
        if read_value == VALUE_TO_READ: correct += 1
    return correct
Ejemplo n.º 7
0
    def testMetadata(self):
        file_path = self.NewTempFile()

        # Test the writer with a 'with' statement.
        with open(file_path, 'wb') as writer:
            datum_writer = io.DatumWriter()
            sample_schema, sample_datum = SCHEMAS_TO_VALIDATE[1]
            schema_object = schema.Parse(sample_schema)
            with datafile.DataFileWriter(writer, datum_writer,
                                         schema_object) as dfw:
                dfw.SetMeta('test.string', 'foo')
                dfw.SetMeta('test.number', '1')
                dfw.append(sample_datum)
            self.assertTrue(writer.closed)

        # Test the reader with a 'with' statement.
        datums = []
        with open(file_path, 'rb') as reader:
            datum_reader = io.DatumReader()
            with datafile.DataFileReader(reader, datum_reader) as dfr:
                self.assertEqual(b'foo', dfr.GetMeta('test.string'))
                self.assertEqual(b'1', dfr.GetMeta('test.number'))
                for datum in dfr:
                    datums.append(datum)
            self.assertTrue(reader.closed)
Ejemplo n.º 8
0
    def test_container(self):
        writer = open('data.avro', 'wb')
        datum_writer = io.DatumWriter()
        schema_object = schema.Parse("""\
{ "type": "record",
  "name": "StringPair",
  "doc": "A pair of strings.",
  "fields": [
    {"name": "left", "type": "string"},
    {"name": "right", "type": "string"}
  ]
}
    """)
        dfw = datafile.DataFileWriter(writer, datum_writer, schema_object)
        datum = {'left': 'L', 'right': 'R'}
        dfw.append(datum)
        dfw.close()
        reader = open('data.avro', 'rb')
        datum_reader = io.DatumReader()
        dfr = datafile.DataFileReader(reader, datum_reader)
        data = []
        for datum in dfr:
            data.append(datum)
        dfr.close()
        self.assertEquals(1, len(data))
        self.assertEquals(datum, data[0])
Ejemplo n.º 9
0
 def testSchemaPromotion(self):
   # note that checking writer_schema.type in read_data
   # allows us to handle promotion correctly
   promotable_schemas = ['"int"', '"long"', '"float"', '"double"']
   incorrect = 0
   for i, ws in enumerate(promotable_schemas):
     writer_schema = schema.Parse(ws)
     datum_to_write = 219
     for rs in promotable_schemas[i + 1:]:
       reader_schema = schema.Parse(rs)
       writer, enc, dw = write_datum(datum_to_write, writer_schema)
       datum_read = read_datum(writer, writer_schema, reader_schema)
       logging.debug('Writer: %s Reader: %s', writer_schema, reader_schema)
       logging.debug('Datum Read: %s', datum_read)
       if datum_read != datum_to_write: incorrect += 1
   self.assertEqual(incorrect, 0)
Ejemplo n.º 10
0
    def __init__(self, reader, datum_reader):
        """Initializes a new data file reader.

    Args:
      reader: Open file to read from.
      datum_reader: Avro datum reader.
    """
        self._reader = reader
        self._raw_decoder = avro_io.BinaryDecoder(reader)
        self._datum_decoder = None  # Maybe reset at every block.
        self._datum_reader = datum_reader

        # read the header: magic, meta, sync
        self._read_header()

        # ensure codec is valid
        avro_codec_raw = self.GetMeta('avro.codec')
        if avro_codec_raw is None:
            self.codec = "null"
        else:
            self.codec = avro_codec_raw.decode('utf-8')
        if self.codec not in VALID_CODECS:
            raise DataFileException('Unknown codec: %s.' % self.codec)

        self._file_length = self._GetInputFileLength()

        # get ready to read
        self._block_count = 0
        self.datum_reader.writer_schema = (schema.Parse(
            self.GetMeta(SCHEMA_KEY).decode('utf-8')))
Ejemplo n.º 11
0
def update(topic, schema_config, force=False):
    """Given a topic, update (or create) a schema"""
    client = CachedSchemaRegistryClient(schema_config)

    if topic == 'all':
        schema_files = Path(__file__).parent.glob('**/*.avsc')
    else:
        schema_files = Path(__file__).parent.glob(f'**/{topic}-*.avsc')

    for schema_file in schema_files:
        with open(schema_file) as f:
            schema_str = f.read()
        schema_dict = json.loads(schema_str)
        avro_schema = schema.Parse(schema_str)

        subject = schema_dict['namespace'].replace('.', '-') + '-' + schema_dict['name']
        if force:
            client.update_compatibility('NONE', subject=subject)
        else:
            client.update_compatibility('BACKWARD', subject=subject)

        try:
            schema_id = client.register(subject, avro_schema)
            log.info(f'Added/updated {schema_file}\t Schema ID {schema_id}')
        except avro_error.ClientError as error:
            log.error(f'Error adding/updating {schema_file}: {error.message}')
Ejemplo n.º 12
0
    def testParse(self):
        correct = 0
        for iexample, example in enumerate(EXAMPLES):
            logging.debug('Testing example #%d\n%s', iexample,
                          example.schema_string)
            try:
                schema.Parse(example.schema_string)
                if example.valid:
                    correct += 1
                else:
                    self.fail('Invalid schema was parsed:\n%s' %
                              example.schema_string)
            except Exception as exn:
                if example.valid:
                    self.fail('Valid schema failed to parse: %r\n%s' %
                              (example.schema_string, traceback.format_exc()))
                else:
                    if logging.getLogger().getEffectiveLevel() <= 5:
                        logging.debug('Expected error:\n%s',
                                      traceback.format_exc())
                    else:
                        logging.debug('Expected error: %r', exn)
                    correct += 1

        self.assertEqual(
            correct,
            len(EXAMPLES),
            'Parse behavior correct on %d out of %d schemas.' %
            (correct, len(EXAMPLES)),
        )
Ejemplo n.º 13
0
    def testValidateUnion(self):
        example_schema = """\
    ["int", "null"]
    """
        datum = None
        result = avro_io.Validate(schema.Parse(example_schema), datum)

        self.assertTrue(result)
Ejemplo n.º 14
0
def deserialize(x):
    schema_path = "data/files/fb_scheam.avsc"
    schema1 = schema.Parse(open(schema_path).read())
    bytes_reader = io2.BytesIO(x)
    decoder = io.BinaryDecoder(bytes_reader)
    reader = io.DatumReader(schema1)
    message = reader.read(decoder)
    return message
Ejemplo n.º 15
0
def _write_items(base_name, schema_str, items):
    avro_schema = schema.Parse(schema_str)
    avro_file = base_name + '.avro'
    with DataFileWriter(open(avro_file, "w"), DatumWriter(), avro_schema) as writer:
        for i in items:
            writer.append(i)
    writer.close
    return (avro_file)
Ejemplo n.º 16
0
 def testTypeException(self):
   writer_schema = schema.Parse("""\
     {"type": "record", "name": "Test",
      "fields": [{"name": "F", "type": "int"},
                 {"name": "E", "type": "int"}]}""")
   datum_to_write = {'E': 5, 'F': 'Bad'}
   self.assertRaises(
       avro_io.AvroTypeException, write_datum, datum_to_write, writer_schema)
Ejemplo n.º 17
0
    def testValidateUnionError(self):
        example_schema = """\
    ["int", "null"]
    """
        datum = "there should not be a string here"
        expected_regex = "datum should be one of following: \['int', 'null']"

        with self.assertRaisesRegex(avro_io.AvroTypeException, expected_regex):
            avro_io.Validate(schema.Parse(example_schema), datum)
Ejemplo n.º 18
0
 def testValidate(self):
   passed = 0
   for example_schema, datum in SCHEMAS_TO_VALIDATE:
     logging.debug('Schema: %r', example_schema)
     logging.debug('Datum: %r', datum)
     validated = avro_io.Validate(schema.Parse(example_schema), datum)
     logging.debug('Valid: %s', validated)
     if validated: passed += 1
   self.assertEqual(passed, len(SCHEMAS_TO_VALIDATE))
Ejemplo n.º 19
0
 def testDuplicateRecordField(self):
     schema_string = """{
   "type": "record",
   "name": "Test",
   "fields": [{"name": "foo", "type": "int"}, {"name": "foo", "type": "string"}]
 }"""
     with self.assertRaises(schema.SchemaParseException) as e:
         schema.Parse(schema_string)
     self.assertRegex(str(e.exception), 'Duplicate.*field name.*foo')
Ejemplo n.º 20
0
def generate(schema_file, output_path):
    interop_schema = schema.Parse(open(schema_file, 'r').read())
    datum_writer = io.DatumWriter()
    for codec in datafile.VALID_CODECS:
        filename = 'py3'
        if codec != 'null':
            filename += '_' + codec
        with Path(output_path, filename).with_suffix('.avro').open('wb') as writer, \
          datafile.DataFileWriter(writer, datum_writer, interop_schema, codec) as dfw:
            dfw.append(DATUM)
Ejemplo n.º 21
0
def consumer2():
    consumer = KafkaConsumer('test')
    schema_path = "data/files/fb_scheam.avsc"
    schema1 = schema.Parse(open(schema_path).read())
    for msg in consumer:
        bytes_reader = io2.BytesIO(msg.value)
        decoder = io.BinaryDecoder(bytes_reader)
        reader = io.DatumReader(schema1)
        message = reader.read(decoder)
        return (message)
Ejemplo n.º 22
0
def dict_to_json(data: Dict):
    # to JSON

    # avro_schema = schema.SchemaFromJSONData(schema_dict)
    avro_schema = schema.Parse(open("rate.avsc", "rb").read())

    serializer = AvroJsonSerializer(avro_schema)
    json_str = serializer.to_json(data)

    pretty_print(json_str)
Ejemplo n.º 23
0
 def test_write_data(self):
     writer = open('pairs.avro', 'wb')
     datum_writer = io.DatumWriter()
     schema_object = schema.Parse(open('Pair.avsc').read())
     dfw = datafile.DataFileWriter(writer, datum_writer, schema_object)
     dfw.append({'left': 'a', 'right': '1'})
     dfw.append({'left': 'c', 'right': '2'})
     dfw.append({'left': 'b', 'right': '3'})
     dfw.append({'left': 'b', 'right': '2'})
     dfw.close()
Ejemplo n.º 24
0
    def testValidateShouldRaiseFormattedError(self):
        example_schema = '{"type": "int"}'
        datum = "aaa"

        expected_regex = "datum should be int type," \
                         " but as value we got 'aaa'"

        with self.assertRaisesRegex(avro_io.AvroPrimitiveTypeException,
                                    expected_regex):
            avro_io.Validate(schema.Parse(example_schema), datum)
Ejemplo n.º 25
0
    def __init__(
        self,
        writer,
        datum_writer,
        writer_schema=None,
        codec='null',
    ):
        """Constructs a new DataFileWriter instance.

    If the schema is not present, presume we're appending.

    Args:
      writer: File-like object to write into.
      datum_writer:
      writer_schema: Schema
      codec:
    """
        self._writer = writer
        self._encoder = avro_io.BinaryEncoder(writer)
        self._datum_writer = datum_writer
        self._buffer_writer = io.BytesIO()
        self._buffer_encoder = avro_io.BinaryEncoder(self._buffer_writer)
        self._block_count = 0
        self._meta = {}

        # Ensure we have a writer that accepts bytes:
        self._writer.write(b'')

        # Whether the header has already been written:
        self._header_written = False

        if writer_schema is not None:
            if codec not in VALID_CODECS:
                raise DataFileException('Unknown codec: %r' % codec)
            self._sync_marker = DataFileWriter.GenerateSyncMarker()
            self.SetMeta('avro.codec', codec)
            self.SetMeta('avro.schema', str(writer_schema).encode('utf-8'))
            self.datum_writer.writer_schema = writer_schema
        else:
            # open writer for reading to collect metadata
            dfr = DataFileReader(writer, avro_io.DatumReader())

            # TODO: collect arbitrary metadata
            # collect metadata
            self._sync_marker = dfr.sync_marker
            self.SetMeta('avro.codec', dfr.GetMeta('avro.codec'))

            # get schema used to write existing file
            schema_from_file = dfr.GetMeta('avro.schema').decode('utf-8')
            self.SetMeta('avro.schema', schema_from_file)
            self.datum_writer.writer_schema = schema.Parse(schema_from_file)

            # seek to the end of the file and prepare for writing
            writer.seek(0, 2)
            self._header_written = True
Ejemplo n.º 26
0
    def serialize(self, items):
        schema_path = "fb_scheam.avsc"
        SCHEMA = schema.Parse(open(schema_path).read())
        writer = io.DatumWriter(SCHEMA)
        bytes_writer = io2.BytesIO()
        encoder = io.BinaryEncoder(bytes_writer)
        # There must be a better way of writing this item that isn't so long
        writer.write(get_as_json(items), encoder)
        raw_bytes = bytes_writer.getvalue()

        return raw_bytes
Ejemplo n.º 27
0
 def callback(ch, method, properties, body):
     start_time = time.clock()
     bytes_reader = BytesIO(body)
     decoder = avro_io.BinaryDecoder(bytes_reader)
     reader = avro_io.DatumReader(
         schema.Parse(open(f"schemas/{exchange}.avsc", "rb").read()))
     event_body = reader.read(decoder)
     time.sleep(0.1)  # Mock feature computing time
     print(f"Event received:"
           f"size: {sys.getsizeof(event_body)} bytes,"
           f"time: {time.clock() - start_time} secs")
Ejemplo n.º 28
0
  def testAppend(self):
    correct = 0
    codecs_to_validate = get_codecs_to_validate()
    for iexample, (writer_schema, datum) in enumerate(SCHEMAS_TO_VALIDATE):
      for codec in codecs_to_validate:
        file_path = self.NewTempFile()

        logging.debug(
            'Performing append with codec %r in file %s for example #%d\n'
            'Writing datum: %r using writer schema:\n%s',
            codec, file_path, iexample,
            datum, writer_schema)

        logging.debug('Creating data file %r', file_path)
        with open(file_path, 'wb') as writer:
          datum_writer = io.DatumWriter()
          schema_object = schema.Parse(writer_schema)
          with datafile.DataFileWriter(
              writer=writer,
              datum_writer=datum_writer,
              writer_schema=schema_object,
              codec=codec,
          ) as dfw:
            dfw.append(datum)

        logging.debug('Appending data to %r', file_path)
        for i in range(9):
          with open(file_path, 'ab+') as writer:
            with datafile.DataFileWriter(writer, io.DatumWriter()) as dfw:
              dfw.append(datum)

        logging.debug('Reading appended data from %r', file_path)
        with open(file_path, 'rb') as reader:
          datum_reader = io.DatumReader()
          with datafile.DataFileReader(reader, datum_reader) as dfr:
            appended_data = list(dfr)

        logging.debug(
            'Appended data has %d items: %r',
            len(appended_data), appended_data)

        if ([datum] * 10) == appended_data:
          correct += 1
        else:
          logging.error(
              'Appended data does not match:\n'
              'Expect: %r\n'
              'Actual: %r',
              [datum] * 10,
              appended_data)

    self.assertEqual(
        correct,
        len(codecs_to_validate) * len(SCHEMAS_TO_VALIDATE))
Ejemplo n.º 29
0
 def testDocAttributes(self):
     correct = 0
     for example in DOC_EXAMPLES:
         original_schema = schema.Parse(example.schema_string)
         if original_schema.doc is not None:
             correct += 1
         if original_schema.type == 'record':
             for f in original_schema.fields:
                 if f.doc is None:
                     self.fail("Failed to preserve 'doc' in fields: " +
                               example.schema_string)
     self.assertEqual(correct, len(DOC_EXAMPLES))
Ejemplo n.º 30
0
 def testCorrectRecursiveExtraction(self):
     parsed = schema.Parse("""
   {
     "type": "record",
     "name": "X",
     "fields": [{
       "name": "y",
       "type": {
         "type": "record",
         "name": "Y",
         "fields": [{"name": "Z", "type": "X"}, {"name": "W", "type": "X"}]
       }
     }]
   }
 """)
     logging.debug('Parsed schema:\n%s', parsed)
     logging.debug('Fields: %s', parsed.fields)
     t = schema.Parse(str(parsed.fields[0].type))
     # If we've made it this far, the subschema was reasonably stringified;
     # it could be reparsed.
     self.assertEqual("X", t.fields[0].type.name)