Beispiel #1
0
    def test_chunks(self, chunk_size, input, expected_chunks):
        metrics = TestingMetricsBackend()

        table_name = "mysterious_inexistent_table"
        writer = HTTPBatchWriter(
            table_name=table_name,
            host="0:0:0:0",
            port=9000,
            user="******",
            password="",
            database="default",
            chunk_size=chunk_size,
            metrics=metrics,
        )

        chunks = writer._prepare_chunks(input)
        for chunk, expected in zip(chunks, expected_chunks):
            assert chunk == expected

        assert metrics.calls == [
            Timing("writer.chunk.size", len(chunk), {"table_name": table_name})
            for chunk in expected_chunks
        ] + [
            Timing(
                "writer.total.size",
                sum(map(len, expected_chunks)),
                {"table_name": table_name},
            )
        ]
Beispiel #2
0
    def get_writer(self, options=None, table_name=None) -> BatchWriter:
        from snuba import settings
        from snuba.clickhouse.http import HTTPBatchWriter

        def default(value):
            if isinstance(value, datetime):
                return value.strftime(DATETIME_FORMAT)
            else:
                raise TypeError

        return HTTPBatchWriter(
            self.get_dataset_schemas().get_write_schema_enforce(),
            settings.CLICKHOUSE_HOST,
            settings.CLICKHOUSE_HTTP_PORT,
            lambda row: json.dumps(row, default=default).encode("utf-8"),
            options,
            table_name,
        )
Beispiel #3
0
 def get_batch_writer(
     self,
     table_name: str,
     metrics: MetricsBackend,
     options: ClickhouseWriterOptions,
     chunk_size: Optional[int],
 ) -> BatchWriter[JSONRow]:
     return HTTPBatchWriter(
         table_name,
         host=self.__query_node.host_name,
         port=self.__http_port,
         user=self.__user,
         password=self.__password,
         database=self.__database,
         metrics=metrics,
         options=options,
         chunk_size=chunk_size,
     )
Beispiel #4
0
    def get_bulk_writer(self, options=None, table_name=None) -> BatchWriter:
        """
        This is a stripped down verison of the writer designed
        for better performance when loading data in bulk.
        """
        # TODO: Consider using rapidjson to encode everywhere
        # once we will be confident it is reliable enough.

        from snuba import settings
        from snuba.clickhouse.http import HTTPBatchWriter

        return HTTPBatchWriter(
            self.get_dataset_schemas().get_write_schema_enforce(),
            settings.CLICKHOUSE_HOST,
            settings.CLICKHOUSE_HTTP_PORT,
            lambda row: rapidjson.dumps(row).encode("utf-8"),
            options,
            table_name,
            chunk_size=settings.BULK_CLICKHOUSE_BUFFER,
        )
Beispiel #5
0
 def get_batch_writer(
     self,
     metrics: MetricsBackend,
     insert_statement: InsertStatement,
     encoding: Optional[str],
     options: ClickhouseWriterOptions,
     chunk_size: Optional[int],
     buffer_size: int,
 ) -> BatchWriter[JSONRow]:
     return HTTPBatchWriter(
         host=self.__query_node.host_name,
         port=self.__http_port,
         user=self.__user,
         password=self.__password,
         metrics=metrics,
         statement=insert_statement.with_database(self.__database),
         encoding=encoding,
         options=options,
         chunk_size=chunk_size,
         buffer_size=buffer_size,
     )
Beispiel #6
0
    def get_writer(self,
                   options=None,
                   table_name=None,
                   rapidjson_serialize=False) -> BatchWriter:
        from snuba import settings
        from snuba.clickhouse.http import HTTPBatchWriter

        def default(value):
            if isinstance(value, datetime):
                return value.strftime(DATETIME_FORMAT)
            else:
                raise TypeError

        return HTTPBatchWriter(
            self.__table_schema,
            settings.CLICKHOUSE_HOST,
            settings.CLICKHOUSE_HTTP_PORT,
            lambda row: (rapidjson.dumps(row, default=default)
                         if rapidjson_serialize else json.dumps(
                             row, default=default)).encode("utf-8"),
            options,
            table_name,
            chunk_size=settings.CLICKHOUSE_HTTP_CHUNK_SIZE,
        )