def _build_csv_job(self, schema, lines): with open(self.csv_file_name, 'w') as f: for line in lines: f.write(line) source = FileSource.for_record_stream_format( CsvReaderFormat.for_schema(schema), self.csv_file_name).build() ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(), 'csv-source') ds.map(PassThroughMapFunction(), output_type=Types.PICKLED_BYTE_ARRAY()) \ .add_sink(self.test_sink)
def _build_csv_job(self, schema: CsvSchema, lines): with open(self.csv_file_name, 'w') as f: for line in lines: f.write(line) source = FileSource.for_record_stream_format( CsvReaderFormat.for_schema(schema), self.csv_file_name).build() ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(), 'csv-source') sink = FileSink.for_bulk_format( self.csv_dir_name, CsvBulkWriters.for_schema(schema)).build() ds.sink_to(sink)