def run(self, cmd, config=None, state=None, catalog=None, **kwargs) -> Iterable[AirbyteMessage]: self._runs += 1 volumes = self._prepare_volumes(config, state, catalog) logging.debug( f"Docker run {self._image}: \n{cmd}\n" f"input: {self.input_folder}\noutput: {self.output_folder}") container = self._client.containers.run( image=self._image, command=cmd, working_dir="/data", volumes=volumes, auto_remove=True, detach=True, **kwargs, ) with open(self.output_folder / "raw", "wb+") as f: for line in self.read(container, command=cmd): f.write(line.encode()) try: yield AirbyteMessage.parse_raw(line) except ValidationError as exc: logging.warning( "Unable to parse connector's output %s, error: %s", line, exc)
def run(self, cmd, config=None, state=None, catalog=None, **kwargs) -> Iterable[AirbyteMessage]: self._runs += 1 volumes = self._prepare_volumes(config, state, catalog) logs = self._client.containers.run(image=self._image, command=cmd, working_dir="/data", volumes=volumes, network="host", stdout=True, stderr=True, **kwargs) logging.info("Docker run: \n%s\ninput: %s\noutput: %s", cmd, self.input_folder, self.output_folder) with open(str(self.output_folder / "raw"), "wb+") as f: f.write(logs) for line in logs.decode("utf-8").splitlines(): try: yield AirbyteMessage.parse_raw(line) except ValidationError as exc: logging.warning("Unable to parse connector's output %s", exc)
def _parse_input_stream(self, input_stream: io.TextIOWrapper) -> Iterable[AirbyteMessage]: """Reads from stdin, converting to Airbyte messages""" for line in input_stream: try: yield AirbyteMessage.parse_raw(line) except ValidationError: self.logger.info(f"ignoring input which can't be deserialized as Airbyte Message: {line}")
def infer_schemas(): default_folder = os.path.join(os.getcwd(), "schemas") if not os.path.exists(default_folder): os.mkdir(default_folder) builders = {} for line in sys.stdin: message = AirbyteMessage.parse_raw(line) if message.type == Type.RECORD: stream_name = message.record.stream if stream_name not in builders: builder = NoRequiredSchemaBuilder() builders[stream_name] = builder else: builder = builders[stream_name] builder.add_object(message.record.data) for stream_name, builder in builders.items(): schema = builder.to_schema() output_file_name = os.path.join(default_folder, stream_name + ".json") with open(output_file_name, "w") as outfile: json.dump(schema, outfile, indent=2, sort_keys=True)
def configure_catalog(): record = AirbyteMessage.parse_raw(input()) for stream in record.catalog.streams: stream.json_schema = {} streams = [ ConfiguredAirbyteStream( stream=stream, sync_mode=stream.supported_sync_modes[0], destination_sync_mode=DestinationSyncMode.append) for stream in record.catalog.streams ] configured_catalog = ConfiguredAirbyteCatalog(streams=streams) default_folder = os.path.join(os.getcwd(), "integration_tests") if not os.path.exists(default_folder): os.mkdir(default_folder) output_file_name = os.path.join(default_folder, "configured_catalog.json") with open(output_file_name, "w") as outfile: json.dump(json.loads(configured_catalog.json()), outfile, indent=2, sort_keys=True)
def run(self, cmd, config=None, state=None, catalog=None, **kwargs) -> Iterable[AirbyteMessage]: self._runs += 1 volumes = self._prepare_volumes(config, state, catalog) logging.info("Docker run: \n%s\ninput: %s\noutput: %s", cmd, self.input_folder, self.output_folder) try: logs = self._client.containers.run(image=self._image, command=cmd, working_dir="/data", volumes=volumes, network="host", stdout=True, stderr=True, **kwargs) except ContainerError as err: # beautify error from container patched_error = ContainerError(container=err.container, exit_status=err.exit_status, command=err.command, image=err.image, stderr=err.stderr.decode()) raise patched_error from None # get rid of any previous exception stack with open(str(self.output_folder / "raw"), "wb+") as f: f.write(logs) for line in logs.decode("utf-8").splitlines(): try: yield AirbyteMessage.parse_raw(line) except ValidationError as exc: logging.warning("Unable to parse connector's output %s", exc)
def read_input_messages(records_path: str) -> Iterable[AirbyteMessage]: with open(records_path, "rb") as f: input_stream = io.TextIOWrapper(f, encoding="utf-8") for line in input_stream: yield AirbyteMessage.parse_raw(line)