Example #1
0
    def create_dynamic_frame_from_catalog(self, database = None, table_name = None, redshift_tmp_dir = "",
                                          transformation_ctx = "", push_down_predicate="", additional_options = {},
                                          catalog_id = None, **kwargs):
        """
        Creates a DynamicFrame with catalog database, table name and an optional catalog id
        :param database: database in catalog
        :param table_name: table name
        :param redshift_tmp_dir: tmp dir
        :param transformation_ctx: transformation context
        :param push_down_predicate
        :param additional_options
        :param catalog_id catalog id of the DataCatalog being accessed (account id of the data catalog).
                Set to None by default (None defaults to the catalog id of the calling account in the service)
        :return: dynamic frame with potential errors
        """
        if database is not None and "name_space" in kwargs:
            raise Exception("Parameter name_space and database are both specified, choose one.")
        elif database is None and "name_space" not in kwargs:
            raise Exception("Parameter name_space or database is missing.")
        elif "name_space" in kwargs:
            db = kwargs.pop("name_space")
        else:
            db = database

        if table_name is None:
            raise Exception("Parameter table_name is missing.")
        source = DataSource(self._ssql_ctx.getCatalogSource(db, table_name, redshift_tmp_dir, transformation_ctx,
                                                            push_down_predicate,
                                                            makeOptions(self._sc, additional_options), catalog_id),
                            self, table_name)
        return source.getFrame(**kwargs)
Example #2
0
    def create_dynamic_frame_from_catalog(self,
                                          database=None,
                                          table_name=None,
                                          redshift_tmp_dir="",
                                          transformation_ctx="",
                                          **kwargs):
        """Creates a DynamicFrame with catalog database and table name
        """
        if database is not None and "name_space" in kwargs:
            raise Exception(
                "Parameter name_space and database are both specified, choose one."
            )
        elif database is None and "name_space" not in kwargs:
            raise Exception("Parameter name_space or database is missing.")
        elif "name_space" in kwargs:
            db = kwargs.pop("name_space")
        else:
            db = database

        if table_name is None:
            raise Exception("Parameter table_name is missing.")
        source = DataSource(
            self._ssql_ctx.getCatalogSource(db, table_name, redshift_tmp_dir,
                                            transformation_ctx), self,
            table_name)
        return source.getFrame(**kwargs)
Example #3
0
    def getSource(self, connection_type, format = None, transformation_ctx = "", push_down_predicate= "", **options):
        """Creates a DataSource object.

        This can be used to read DynamicFrames from external sources.

        Example:
        >>> data_source = context.getSource("file", paths=["/in/path"])
        >>> data_source.setFormat("json")
        >>> myFrame = data_source.getFrame()
        """
        options["callSite"] = callsite()
        if(format and format.lower() in self.Spark_SQL_Formats):
            connection_type = format

        j_source = self._ssql_ctx.getSource(connection_type,
                                            makeOptions(self._sc, options), transformation_ctx, push_down_predicate)

        prefix = None
        if 'paths' in options and options['paths'] != None:
            paths = options['paths']
            prefix = os.path.commonprefix(paths)
            if prefix != None:
                prefix = prefix.split(':')[-1]
                prefix = re.sub('[:/.]', '', prefix)

        # in case paths is not in options or no common prefix
        if prefix == None:
            prefix = str(uuid.uuid1())
            prefix = re.sub('[-]', '_', prefix)

        return DataSource(j_source, self, prefix)