Esempio n. 1
0
    def run(self,
            cmd,
            config=None,
            state=None,
            catalog=None,
            **kwargs) -> Iterable[AirbyteMessage]:
        self._runs += 1
        volumes = self._prepare_volumes(config, state, catalog)
        logging.debug(
            f"Docker run {self._image}: \n{cmd}\n"
            f"input: {self.input_folder}\noutput: {self.output_folder}")

        container = self._client.containers.run(
            image=self._image,
            command=cmd,
            working_dir="/data",
            volumes=volumes,
            auto_remove=True,
            detach=True,
            **kwargs,
        )

        with open(self.output_folder / "raw", "wb+") as f:
            for line in self.read(container, command=cmd):
                f.write(line.encode())
                try:
                    yield AirbyteMessage.parse_raw(line)
                except ValidationError as exc:
                    logging.warning(
                        "Unable to parse connector's output %s, error: %s",
                        line, exc)
Esempio n. 2
0
    def run(self,
            cmd,
            config=None,
            state=None,
            catalog=None,
            **kwargs) -> Iterable[AirbyteMessage]:
        self._runs += 1
        volumes = self._prepare_volumes(config, state, catalog)
        logs = self._client.containers.run(image=self._image,
                                           command=cmd,
                                           working_dir="/data",
                                           volumes=volumes,
                                           network="host",
                                           stdout=True,
                                           stderr=True,
                                           **kwargs)
        logging.info("Docker run: \n%s\ninput: %s\noutput: %s", cmd,
                     self.input_folder, self.output_folder)

        with open(str(self.output_folder / "raw"), "wb+") as f:
            f.write(logs)

        for line in logs.decode("utf-8").splitlines():
            try:
                yield AirbyteMessage.parse_raw(line)
            except ValidationError as exc:
                logging.warning("Unable to parse connector's output %s", exc)
Esempio n. 3
0
 def _parse_input_stream(self, input_stream: io.TextIOWrapper) -> Iterable[AirbyteMessage]:
     """Reads from stdin, converting to Airbyte messages"""
     for line in input_stream:
         try:
             yield AirbyteMessage.parse_raw(line)
         except ValidationError:
             self.logger.info(f"ignoring input which can't be deserialized as Airbyte Message: {line}")
Esempio n. 4
0
def infer_schemas():
    default_folder = os.path.join(os.getcwd(), "schemas")
    if not os.path.exists(default_folder):
        os.mkdir(default_folder)

    builders = {}
    for line in sys.stdin:
        message = AirbyteMessage.parse_raw(line)
        if message.type == Type.RECORD:
            stream_name = message.record.stream
            if stream_name not in builders:
                builder = NoRequiredSchemaBuilder()
                builders[stream_name] = builder
            else:
                builder = builders[stream_name]
            builder.add_object(message.record.data)
    for stream_name, builder in builders.items():
        schema = builder.to_schema()
        output_file_name = os.path.join(default_folder, stream_name + ".json")
        with open(output_file_name, "w") as outfile:
            json.dump(schema, outfile, indent=2, sort_keys=True)
Esempio n. 5
0
def configure_catalog():
    record = AirbyteMessage.parse_raw(input())
    for stream in record.catalog.streams:
        stream.json_schema = {}
    streams = [
        ConfiguredAirbyteStream(
            stream=stream,
            sync_mode=stream.supported_sync_modes[0],
            destination_sync_mode=DestinationSyncMode.append)
        for stream in record.catalog.streams
    ]
    configured_catalog = ConfiguredAirbyteCatalog(streams=streams)

    default_folder = os.path.join(os.getcwd(), "integration_tests")
    if not os.path.exists(default_folder):
        os.mkdir(default_folder)
    output_file_name = os.path.join(default_folder, "configured_catalog.json")
    with open(output_file_name, "w") as outfile:
        json.dump(json.loads(configured_catalog.json()),
                  outfile,
                  indent=2,
                  sort_keys=True)
Esempio n. 6
0
    def run(self,
            cmd,
            config=None,
            state=None,
            catalog=None,
            **kwargs) -> Iterable[AirbyteMessage]:
        self._runs += 1
        volumes = self._prepare_volumes(config, state, catalog)
        logging.info("Docker run: \n%s\ninput: %s\noutput: %s", cmd,
                     self.input_folder, self.output_folder)
        try:
            logs = self._client.containers.run(image=self._image,
                                               command=cmd,
                                               working_dir="/data",
                                               volumes=volumes,
                                               network="host",
                                               stdout=True,
                                               stderr=True,
                                               **kwargs)
        except ContainerError as err:
            # beautify error from container
            patched_error = ContainerError(container=err.container,
                                           exit_status=err.exit_status,
                                           command=err.command,
                                           image=err.image,
                                           stderr=err.stderr.decode())
            raise patched_error from None  # get rid of any previous exception stack

        with open(str(self.output_folder / "raw"), "wb+") as f:
            f.write(logs)

        for line in logs.decode("utf-8").splitlines():
            try:
                yield AirbyteMessage.parse_raw(line)
            except ValidationError as exc:
                logging.warning("Unable to parse connector's output %s", exc)
Esempio n. 7
0
def read_input_messages(records_path: str) -> Iterable[AirbyteMessage]:
    with open(records_path, "rb") as f:
        input_stream = io.TextIOWrapper(f, encoding="utf-8")
        for line in input_stream:
            yield AirbyteMessage.parse_raw(line)