Ejemplo n.º 1
0
    def from_proto(data_source):
        """
        Convert data source config in FeatureTable spec to a DataSource class object.
        """

        if data_source.file_options.file_format and data_source.file_options.file_url:
            data_source_obj = FileSource(
                field_mapping=data_source.field_mapping,
                file_format=FileFormat.from_proto(data_source.file_options.file_format),
                file_url=data_source.file_options.file_url,
                event_timestamp_column=data_source.event_timestamp_column,
                created_timestamp_column=data_source.created_timestamp_column,
                date_partition_column=data_source.date_partition_column,
            )
        elif data_source.bigquery_options.table_ref:
            data_source_obj = BigQuerySource(
                field_mapping=data_source.field_mapping,
                table_ref=data_source.bigquery_options.table_ref,
                event_timestamp_column=data_source.event_timestamp_column,
                created_timestamp_column=data_source.created_timestamp_column,
                date_partition_column=data_source.date_partition_column,
            )
        elif (
            data_source.kafka_options.bootstrap_servers
            and data_source.kafka_options.topic
            and data_source.kafka_options.message_format
        ):
            data_source_obj = KafkaSource(
                field_mapping=data_source.field_mapping,
                bootstrap_servers=data_source.kafka_options.bootstrap_servers,
                message_format=StreamFormat.from_proto(
                    data_source.kafka_options.message_format
                ),
                topic=data_source.kafka_options.topic,
                event_timestamp_column=data_source.event_timestamp_column,
                created_timestamp_column=data_source.created_timestamp_column,
                date_partition_column=data_source.date_partition_column,
            )
        elif (
            data_source.kinesis_options.record_format
            and data_source.kinesis_options.region
            and data_source.kinesis_options.stream_name
        ):
            data_source_obj = KinesisSource(
                field_mapping=data_source.field_mapping,
                record_format=StreamFormat.from_proto(
                    data_source.kinesis_options.record_format
                ),
                region=data_source.kinesis_options.region,
                stream_name=data_source.kinesis_options.stream_name,
                event_timestamp_column=data_source.event_timestamp_column,
                created_timestamp_column=data_source.created_timestamp_column,
                date_partition_column=data_source.date_partition_column,
            )
        else:
            raise ValueError("Could not identify the source type being added")

        return data_source_obj
Ejemplo n.º 2
0
 def from_proto(data_source: DataSourceProto):
     return FileSource(
         field_mapping=dict(data_source.field_mapping),
         file_format=FileFormat.from_proto(data_source.file_options.file_format),
         path=data_source.file_options.file_url,
         event_timestamp_column=data_source.event_timestamp_column,
         created_timestamp_column=data_source.created_timestamp_column,
         date_partition_column=data_source.date_partition_column,
     )
Ejemplo n.º 3
0
    def from_proto(cls, file_options_proto: DataSourceProto.FileOptions):
        """
        Creates a FileOptions from a protobuf representation of a file option

        args:
            file_options_proto: a protobuf representation of a datasource

        Returns:
            Returns a FileOptions object based on the file_options protobuf
        """
        file_options = cls(
            file_format=FileFormat.from_proto(file_options_proto.file_format),
            file_url=file_options_proto.file_url,
        )
        return file_options