예제 #1
0
    def write_dynamic_frame_from_catalog(self, frame, database = None, table_name = None, redshift_tmp_dir = "",
                                         transformation_ctx = "", additional_options = {}, catalog_id = None, **kwargs):
        """
        Writes a DynamicFrame to a location defined in the catalog's database, table name and an optional catalog id
        :param frame: dynamic frame to be written
        :param database: database in catalog
        :param table_name: table name
        :param redshift_tmp_dir: tmp dir
        :param transformation_ctx: transformation context
        :param additional_options
        :param catalog_id catalog_id catalog id of the DataCatalog being accessed (account id of the data catalog).
                Set to None by default (None defaults to the catalog id of the calling account in the service)
        :return: dynamic frame with potential errors
        """

        if database is not None and "name_space" in kwargs:
            raise Exception("Parameter name_space and database are both specified, choose one.")
        elif database is None and "name_space" not in kwargs:
            raise Exception("Parameter name_space or database is missing.")
        elif "name_space" in kwargs:
            db = kwargs.pop("name_space")
        else:
            db = database

        if table_name is None:
            raise Exception("Parameter table_name is missing.")

        j_sink = self._ssql_ctx.getCatalogSink(db, table_name, redshift_tmp_dir, transformation_ctx,
                                               makeOptions(self._sc, additional_options), catalog_id)
        return DataSink(j_sink, self).write(frame)
예제 #2
0
    def write_dynamic_frame_from_catalog(self,
                                         frame,
                                         database=None,
                                         table_name=None,
                                         redshift_tmp_dir="",
                                         transformation_ctx="",
                                         **kwargs):
        """
        Creates a DynamicFrame with catalog database and table name
        :param frame: dynamic frame to be written
        :param database: database in catalog
        :param table_name: table name
        :param redshift_tmp_dir: tmp dir
        :param transformation_ctx: transformation context
        :return: dynamic frame with potential errors
        """

        if database is not None and "name_space" in kwargs:
            raise Exception(
                "Parameter name_space and database are both specified, choose one."
            )
        elif database is None and "name_space" not in kwargs:
            raise Exception("Parameter name_space or database is missing.")
        elif "name_space" in kwargs:
            db = kwargs.pop("name_space")
        else:
            db = database

        if table_name is None:
            raise Exception("Parameter table_name is missing.")

        j_sink = self._ssql_ctx.getCatalogSink(db, table_name,
                                               redshift_tmp_dir)
        return DataSink(j_sink, self).write(frame)
예제 #3
0
    def write_from_jdbc_conf(self, frame_or_dfc, catalog_connection, connection_options={},
                             redshift_tmp_dir = "", transformation_ctx = "", catalog_id = None):
        if isinstance(frame_or_dfc, DynamicFrameCollection):
            new_options = dict(connection_options.items()
                               + [("useFrameName", True)])
        elif isinstance(frame_or_dfc, DynamicFrame):
            new_options = connection_options
        else:
            raise TypeError("frame_or_dfc must be DynamicFrame or"
                            "DynamicFrameCollection. Got " +
                            str(type(frame_or_dfc)))

        j_sink = self._ssql_ctx.getJDBCSink(catalog_connection, makeOptions(self._sc, new_options), redshift_tmp_dir,
                                            transformation_ctx, catalog_id)
        return DataSink(j_sink, self).write(frame_or_dfc)
예제 #4
0
    def getSink(self, connection_type, format = None, transformation_ctx = "", **options):
        """Gets a DataSink object.

        This can be used to write DynamicFrames to external targets.
        Check SparkSQL format first to make sure to return the expected sink

        Example:
        >>> data_sink = context.getSink("s3")
        >>> data_sink.setFormat("json"),
        >>> data_sink.writeFrame(myFrame)
        """

        if(format and format.lower() in self.Spark_SQL_Formats):
            connection_type = format
        j_sink = self._ssql_ctx.getSink(connection_type,
                                        makeOptions(self._sc, options), transformation_ctx)
        return DataSink(j_sink, self)