Beispiel #1
0
    def test_es_sink_dynamic(self):
        ds = self.env.from_collection([{
            'name': 'ada',
            'id': '1'
        }, {
            'name': 'luna',
            'id': '2'
        }],
                                      type_info=Types.MAP(
                                          Types.STRING(), Types.STRING()))

        es_dynamic_index_sink = Elasticsearch7SinkBuilder() \
            .set_emitter(ElasticsearchEmitter.dynamic_index('name', 'id')) \
            .set_hosts(['localhost:9200']) \
            .build()

        j_emitter = get_field_value(es_dynamic_index_sink.get_java_function(),
                                    'emitter')
        self.assertTrue(
            is_instance_of(
                j_emitter,
                'org.apache.flink.connector.elasticsearch.sink.SimpleElasticsearchEmitter'
            ))

        ds.sink_to(es_dynamic_index_sink).name('es dynamic index sink')
Beispiel #2
0
def _create_parquet_map_row_and_data() -> Tuple[RowType, RowTypeInfo, List[Row]]:
    row_type = DataTypes.ROW([
        DataTypes.FIELD('map', DataTypes.MAP(DataTypes.INT(), DataTypes.STRING())),
    ])
    row_type_info = Types.ROW_NAMED(['map'], [Types.MAP(Types.INT(), Types.STRING())])
    data = [Row(
        map={0: 'a', 1: 'b', 2: 'c'}
    )]
    return row_type, row_type_info, data
Beispiel #3
0
    def __init__(self, name: str, key_type_info: TypeInformation, value_type_info: TypeInformation):
        """
        Constructor of the MapStateDescriptor.

        :param name: The name of the state.
        :param key_type_info: The type information of the key.
        :param value_type_info: the type information of the value.
        """
        super(MapStateDescriptor, self).__init__(name, Types.MAP(key_type_info, value_type_info))
Beispiel #4
0
    def __init__(self, name: str, key_type_info: TypeInformation,
                 value_type_info: TypeInformation):
        """
        Constructor of the MapStateDescriptor.

        :param name: The name of the state.
        :param key_type_info: The type information of the key.
        :param value_type_info: the type information of the value.
        """
        if not isinstance(key_type_info, PickledBytesTypeInfo):
            raise ValueError(
                "The type information of the key could only be PickledBytesTypeInfo "
                "(created via Types.PICKLED_BYTE_ARRAY()) currently, got %s" %
                type(key_type_info))
        if not isinstance(value_type_info, PickledBytesTypeInfo):
            raise ValueError(
                "The type information of the value could only be PickledBytesTypeInfo "
                "(created via Types.PICKLED_BYTE_ARRAY()) currently, got %s" %
                type(value_type_info))
        super(MapStateDescriptor,
              self).__init__(name, Types.MAP(key_type_info, value_type_info))
Beispiel #5
0
    def test_from_java_type(self):
        basic_int_type_info = Types.INT()
        self.assertEqual(basic_int_type_info,
                         _from_java_type(basic_int_type_info.get_java_type_info()))

        basic_short_type_info = Types.SHORT()
        self.assertEqual(basic_short_type_info,
                         _from_java_type(basic_short_type_info.get_java_type_info()))

        basic_long_type_info = Types.LONG()
        self.assertEqual(basic_long_type_info,
                         _from_java_type(basic_long_type_info.get_java_type_info()))

        basic_float_type_info = Types.FLOAT()
        self.assertEqual(basic_float_type_info,
                         _from_java_type(basic_float_type_info.get_java_type_info()))

        basic_double_type_info = Types.DOUBLE()
        self.assertEqual(basic_double_type_info,
                         _from_java_type(basic_double_type_info.get_java_type_info()))

        basic_char_type_info = Types.CHAR()
        self.assertEqual(basic_char_type_info,
                         _from_java_type(basic_char_type_info.get_java_type_info()))

        basic_byte_type_info = Types.BYTE()
        self.assertEqual(basic_byte_type_info,
                         _from_java_type(basic_byte_type_info.get_java_type_info()))

        basic_big_int_type_info = Types.BIG_INT()
        self.assertEqual(basic_big_int_type_info,
                         _from_java_type(basic_big_int_type_info.get_java_type_info()))

        basic_big_dec_type_info = Types.BIG_DEC()
        self.assertEqual(basic_big_dec_type_info,
                         _from_java_type(basic_big_dec_type_info.get_java_type_info()))

        basic_sql_date_type_info = Types.SQL_DATE()
        self.assertEqual(basic_sql_date_type_info,
                         _from_java_type(basic_sql_date_type_info.get_java_type_info()))

        basic_sql_time_type_info = Types.SQL_TIME()
        self.assertEqual(basic_sql_time_type_info,
                         _from_java_type(basic_sql_time_type_info.get_java_type_info()))

        basic_sql_timestamp_type_info = Types.SQL_TIMESTAMP()
        self.assertEqual(basic_sql_timestamp_type_info,
                         _from_java_type(basic_sql_timestamp_type_info.get_java_type_info()))

        row_type_info = Types.ROW([Types.INT(), Types.STRING()])
        self.assertEqual(row_type_info, _from_java_type(row_type_info.get_java_type_info()))

        tuple_type_info = Types.TUPLE([Types.CHAR(), Types.INT()])
        self.assertEqual(tuple_type_info, _from_java_type(tuple_type_info.get_java_type_info()))

        primitive_int_array_type_info = Types.PRIMITIVE_ARRAY(Types.INT())
        self.assertEqual(primitive_int_array_type_info,
                         _from_java_type(primitive_int_array_type_info.get_java_type_info()))

        object_array_type_info = Types.OBJECT_ARRAY(Types.SQL_DATE())
        self.assertEqual(object_array_type_info,
                         _from_java_type(object_array_type_info.get_java_type_info()))

        pickled_byte_array_type_info = Types.PICKLED_BYTE_ARRAY()
        self.assertEqual(pickled_byte_array_type_info,
                         _from_java_type(pickled_byte_array_type_info.get_java_type_info()))

        sql_date_type_info = Types.SQL_DATE()
        self.assertEqual(sql_date_type_info,
                         _from_java_type(sql_date_type_info.get_java_type_info()))

        map_type_info = Types.MAP(Types.INT(), Types.STRING())
        self.assertEqual(map_type_info,
                         _from_java_type(map_type_info.get_java_type_info()))

        list_type_info = Types.LIST(Types.INT())
        self.assertEqual(list_type_info,
                         _from_java_type(list_type_info.get_java_type_info()))
Beispiel #6
0
    def test_es_sink(self):
        ds = self.env.from_collection([{
            'name': 'ada',
            'id': '1'
        }, {
            'name': 'luna',
            'id': '2'
        }],
                                      type_info=Types.MAP(
                                          Types.STRING(), Types.STRING()))

        es_sink = Elasticsearch7SinkBuilder() \
            .set_emitter(ElasticsearchEmitter.static_index('foo', 'id')) \
            .set_hosts(['localhost:9200']) \
            .set_delivery_guarantee(DeliveryGuarantee.AT_LEAST_ONCE) \
            .set_bulk_flush_max_actions(1) \
            .set_bulk_flush_max_size_mb(2) \
            .set_bulk_flush_interval(1000) \
            .set_bulk_flush_backoff_strategy(FlushBackoffType.CONSTANT, 3, 3000) \
            .set_connection_username('foo') \
            .set_connection_password('bar') \
            .set_connection_path_prefix('foo-bar') \
            .set_connection_request_timeout(30000) \
            .set_connection_timeout(31000) \
            .set_socket_timeout(32000) \
            .build()

        j_emitter = get_field_value(es_sink.get_java_function(), 'emitter')
        self.assertTrue(
            is_instance_of(
                j_emitter,
                'org.apache.flink.connector.elasticsearch.sink.SimpleElasticsearchEmitter'
            ))
        self.assertEqual(
            get_field_value(es_sink.get_java_function(),
                            'hosts')[0].toString(), 'http://localhost:9200')
        self.assertEqual(
            get_field_value(es_sink.get_java_function(),
                            'deliveryGuarantee').toString(), 'at-least-once')

        j_build_bulk_processor_config = get_field_value(
            es_sink.get_java_function(), 'buildBulkProcessorConfig')
        self.assertEqual(
            j_build_bulk_processor_config.getBulkFlushMaxActions(), 1)
        self.assertEqual(j_build_bulk_processor_config.getBulkFlushMaxMb(), 2)
        self.assertEqual(j_build_bulk_processor_config.getBulkFlushInterval(),
                         1000)
        self.assertEqual(
            j_build_bulk_processor_config.getFlushBackoffType().toString(),
            'CONSTANT')
        self.assertEqual(
            j_build_bulk_processor_config.getBulkFlushBackoffRetries(), 3)
        self.assertEqual(
            j_build_bulk_processor_config.getBulkFlushBackOffDelay(), 3000)

        j_network_client_config = get_field_value(es_sink.get_java_function(),
                                                  'networkClientConfig')
        self.assertEqual(j_network_client_config.getUsername(), 'foo')
        self.assertEqual(j_network_client_config.getPassword(), 'bar')
        self.assertEqual(j_network_client_config.getConnectionRequestTimeout(),
                         30000)
        self.assertEqual(j_network_client_config.getConnectionTimeout(), 31000)
        self.assertEqual(j_network_client_config.getSocketTimeout(), 32000)
        self.assertEqual(j_network_client_config.getConnectionPathPrefix(),
                         'foo-bar')

        ds.sink_to(es_sink).name('es sink')