def test_chunks(self, chunk_size, input, expected_chunks): metrics = TestingMetricsBackend() table_name = "mysterious_inexistent_table" writer = HTTPBatchWriter( table_name=table_name, host="0:0:0:0", port=9000, user="******", password="", database="default", chunk_size=chunk_size, metrics=metrics, ) chunks = writer._prepare_chunks(input) for chunk, expected in zip(chunks, expected_chunks): assert chunk == expected assert metrics.calls == [ Timing("writer.chunk.size", len(chunk), {"table_name": table_name}) for chunk in expected_chunks ] + [ Timing( "writer.total.size", sum(map(len, expected_chunks)), {"table_name": table_name}, ) ]
def get_writer(self, options=None, table_name=None) -> BatchWriter: from snuba import settings from snuba.clickhouse.http import HTTPBatchWriter def default(value): if isinstance(value, datetime): return value.strftime(DATETIME_FORMAT) else: raise TypeError return HTTPBatchWriter( self.get_dataset_schemas().get_write_schema_enforce(), settings.CLICKHOUSE_HOST, settings.CLICKHOUSE_HTTP_PORT, lambda row: json.dumps(row, default=default).encode("utf-8"), options, table_name, )
def get_batch_writer( self, table_name: str, metrics: MetricsBackend, options: ClickhouseWriterOptions, chunk_size: Optional[int], ) -> BatchWriter[JSONRow]: return HTTPBatchWriter( table_name, host=self.__query_node.host_name, port=self.__http_port, user=self.__user, password=self.__password, database=self.__database, metrics=metrics, options=options, chunk_size=chunk_size, )
def get_bulk_writer(self, options=None, table_name=None) -> BatchWriter: """ This is a stripped down verison of the writer designed for better performance when loading data in bulk. """ # TODO: Consider using rapidjson to encode everywhere # once we will be confident it is reliable enough. from snuba import settings from snuba.clickhouse.http import HTTPBatchWriter return HTTPBatchWriter( self.get_dataset_schemas().get_write_schema_enforce(), settings.CLICKHOUSE_HOST, settings.CLICKHOUSE_HTTP_PORT, lambda row: rapidjson.dumps(row).encode("utf-8"), options, table_name, chunk_size=settings.BULK_CLICKHOUSE_BUFFER, )
def get_batch_writer( self, metrics: MetricsBackend, insert_statement: InsertStatement, encoding: Optional[str], options: ClickhouseWriterOptions, chunk_size: Optional[int], buffer_size: int, ) -> BatchWriter[JSONRow]: return HTTPBatchWriter( host=self.__query_node.host_name, port=self.__http_port, user=self.__user, password=self.__password, metrics=metrics, statement=insert_statement.with_database(self.__database), encoding=encoding, options=options, chunk_size=chunk_size, buffer_size=buffer_size, )
def get_writer(self, options=None, table_name=None, rapidjson_serialize=False) -> BatchWriter: from snuba import settings from snuba.clickhouse.http import HTTPBatchWriter def default(value): if isinstance(value, datetime): return value.strftime(DATETIME_FORMAT) else: raise TypeError return HTTPBatchWriter( self.__table_schema, settings.CLICKHOUSE_HOST, settings.CLICKHOUSE_HTTP_PORT, lambda row: (rapidjson.dumps(row, default=default) if rapidjson_serialize else json.dumps( row, default=default)).encode("utf-8"), options, table_name, chunk_size=settings.CLICKHOUSE_HTTP_CHUNK_SIZE, )