Example #1
0
def get_engine(
    connection: str,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    **sqlalchemy_kwargs: Any,
) -> sqlalchemy.engine.Engine:
    """Return a SQLAlchemy Engine from a Glue Catalog Connection.

    Only Redshift, PostgreSQL and MySQL are supported.

    Parameters
    ----------
    connection : str
        Connection name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    sqlalchemy_kwargs
        keyword arguments forwarded to sqlalchemy.create_engine().
        https://docs.sqlalchemy.org/en/13/core/engines.html

    Returns
    -------
    sqlalchemy.engine.Engine
        SQLAlchemy Engine.

    Examples
    --------
    >>> import awswrangler as wr
    >>> res = wr.catalog.get_engine(name='my_connection')

    """
    details: Dict[str,
                  Any] = get_connection(
                      name=connection,
                      catalog_id=catalog_id,
                      boto3_session=boto3_session)["ConnectionProperties"]
    db_type: str = details["JDBC_CONNECTION_URL"].split(":")[1].lower()
    host: str = details["JDBC_CONNECTION_URL"].split(":")[2].replace("/", "")
    port, database = details["JDBC_CONNECTION_URL"].split(":")[3].split("/")
    user: str = _quote_plus(details["USERNAME"])
    password: str = _quote_plus(details["PASSWORD"])
    if db_type == "postgresql":
        _utils.ensure_postgresql_casts()
    if db_type in ("redshift", "postgresql"):
        conn_str: str = f"{db_type}+psycopg2://{user}:{password}@{host}:{port}/{database}"
        sqlalchemy_kwargs["executemany_mode"] = "values"
        sqlalchemy_kwargs["executemany_values_page_size"] = 100_000
        return sqlalchemy.create_engine(conn_str, **sqlalchemy_kwargs)
    if db_type == "mysql":
        conn_str = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"
        return sqlalchemy.create_engine(conn_str, **sqlalchemy_kwargs)
    raise exceptions.InvalidDatabaseType(
        f"{db_type} is not a valid Database type."
        f" Only Redshift, PostgreSQL and MySQL are supported.")
Example #2
0
def get_redshift_temp_engine(
    cluster_identifier: str,
    user: str,
    database: Optional[str] = None,
    duration: int = 900,
    boto3_session: Optional[boto3.Session] = None,
) -> sqlalchemy.engine.Engine:
    """Get Glue connection details.

    Parameters
    ----------
    cluster_identifier : str
        The unique identifier of a cluster.
        This parameter is case sensitive.
    user : str, optional
        The name of a database user.
    database : str, optional
        Database name. If None, the default Database is used.
    duration : int, optional
        The number of seconds until the returned temporary password expires.
        Constraint: minimum 900, maximum 3600.
        Default: 900
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    sqlalchemy.engine.Engine
        SQLAlchemy Engine.

    Examples
    --------
    >>> import awswrangler as wr
    >>> engine = wr.db.get_redshift_temp_engine('my_cluster', 'my_user')

    """
    client_redshift: boto3.client = _utils.client(service_name="redshift",
                                                  session=boto3_session)
    res: Dict[str, Any] = client_redshift.get_cluster_credentials(
        DbUser=user,
        ClusterIdentifier=cluster_identifier,
        DurationSeconds=duration,
        AutoCreate=False)
    _user: str = _quote_plus(res["DbUser"])
    password: str = _quote_plus(res["DbPassword"])
    cluster: Dict[str, Any] = client_redshift.describe_clusters(
        ClusterIdentifier=cluster_identifier)["Clusters"][0]
    host: str = cluster["Endpoint"]["Address"]
    port: str = cluster["Endpoint"]["Port"]
    if database is None:
        database = cluster["DBName"]
    conn_str: str = f"redshift+psycopg2://{_user}:{password}@{host}:{port}/{database}"
    return sqlalchemy.create_engine(conn_str,
                                    echo=False,
                                    executemany_mode="values",
                                    executemany_values_page_size=100_000)
def quote_plus(val, safe=''):
    cls = val.__class__
    if cls is str:
        val = val.encode('utf-8')
    elif cls is not bytes:
        val = str(val).encode('utf-8')
    return _quote_plus(val, safe=safe)
Example #4
0
def quote_plus(val, safe=''):
    cls = val.__class__
    if cls is str:
        val = val.encode('utf-8')
    elif cls is not bytes:
        val = str(val).encode('utf-8')
    return _quote_plus(val, safe=safe)
Example #5
0
 def _set_done(self,value):
     """Set task as done or undone
     
     Arguments:
         value {bool} -- Done [True] or undone [False]
     """
     url = self.client.url+_endpoints.responses.format(task_id=self.id)
     data = "data="+_quote_plus(_json.dumps({
         "recipient": {
             "type": "user",
             "guid": self.student.guid
         },
         "event": {
             "type": {True:"mark-as-done",False:"mark-as-undone"}[value],
             "feedback": "",
             "sent": _datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z"),
             "author": self.student.guid
         }
     }).replace(" ",""))
     headers = {
         **_headers(url,content=data,cookies=self.client._formated_cookies,content_type="x-www-form-urlencoded; charset=UTF-8",accept="*/*"),
         "X-Requested-With": "XMLHttpRequest"
     }
     status_code = _requests.post(url,data=data,headers=headers).status_code
     if status_code < 300 and status_code >= 200:
         self.done = value
Example #6
0
 def send_comment(self,message):
     """Set task as done or undone
     
     Arguments:
         message {str} -- Message to send
     """
     url = self.client.url+_endpoints.responses.format(task_id=self.id)
     data = "data="+_quote_plus(_json.dumps({
         "recipient": {
             "type": "user",
             "guid": self.student.guid
         },
         "event": {
             "type": "comment",
             "message": message,
             "feedback": "",
             "sent": _datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z"),
             "author": self.student.guid
         }
     }).replace(" ",""))
     headers = {
         **_headers(url,content=data,cookies=self.client._formated_cookies,content_type="x-www-form-urlencoded; charset=UTF-8",accept="*/*"),
         "X-Requested-With": "XMLHttpRequest"
     }
     _requests.post(url,data=data,headers=headers)
Example #7
0
    def _urlencode(query, params):
        """
        Internal method to combine the url and params into a single url string.

        :param str query: the base url to query
        :param dict params: the parameters to send to the url
        :returns: a *str* of the full url
        """
        return query + '?' + '&'.join(key+'='+_quote_plus(str(value))
                                      for key, value in _Auxiliary._iteritems(params))
Example #8
0
    def _urlencode(query, params):
        """
        Internal method to combine the url and params into a single url string.

        :param str query: the base url to query
        :param dict params: the parameters to send to the url
        :returns: a *str* of the full url
        """
        return query + '?' + '&'.join(key+'='+_quote_plus(str(value))
                                      for key, value in _Auxiliary._iteritems(params))
    def _track(self,
               event_name,
               value=1,
               type="gauge",
               properties={},
               meta={},
               send_sys_info=False):
        """
    Internal method to actually send metrics, expected to be called from background thread only.
    """
        return
        if not self._usable:
            return
        the_properties = {}

        if send_sys_info:
            if not self._sys_info_set:
                self._set_sys_info()
            the_properties.update(self._sys_info)

        the_properties.update(properties)

        try:
            # homebrew metrics - cloudfront
            if self._metrics_url != '':
                cloudfront_props = {}
                props = _copy.deepcopy(the_properties)
                props.update(meta)

                cloudfront_props['event_name'] = event_name
                cloudfront_props['value'] = value
                cloudfront_props['distinct_id'] = self._distinct_id
                cloudfront_props['version'] = self._version
                cloudfront_props['isgpu'] = self._isgpu
                cloudfront_props['build_number'] = self._build_number
                cloudfront_props['properties'] = _quote_plus(str(props))

                # if product key is not set, then try to get it now when submitting
                if not self._product_key:
                    try:
                        # product key
                        from .. import product_key
                        self._product_key = product_key.get_product_key()
                    except Exception as e:
                        self._product_key = 'Unknown'
                        pass

                cloudfront_props['product_key'] = self._product_key

                # self.logger.debug("SENDING '%s' to %s" % (cloudfront_props, self._metrics_url))
                logging.getLogger('requests').setLevel(logging.CRITICAL)
                self._requests.get(self._metrics_url, params=cloudfront_props)
        except Exception as e:
            pass
Example #10
0
    def _track(self, event_name, value=1, type="gauge", properties={}, meta={}, send_sys_info=False):
        """
    Internal method to actually send metrics, expected to be called from background thread only.
    """
        return
        if not self._usable:
            return
        the_properties = {}

        if send_sys_info:
            if not self._sys_info_set:
                self._set_sys_info()
            the_properties.update(self._sys_info)

        the_properties.update(properties)

        try:
            # homebrew metrics - cloudfront
            if self._metrics_url != "":
                cloudfront_props = {}
                props = _copy.deepcopy(the_properties)
                props.update(meta)

                cloudfront_props["event_name"] = event_name
                cloudfront_props["value"] = value
                cloudfront_props["distinct_id"] = self._distinct_id
                cloudfront_props["version"] = self._version
                cloudfront_props["isgpu"] = self._isgpu
                cloudfront_props["build_number"] = self._build_number
                cloudfront_props["properties"] = _quote_plus(str(props))

                # if product key is not set, then try to get it now when submitting
                if not self._product_key:
                    try:
                        # product key
                        from .. import product_key

                        self._product_key = product_key.get_product_key()
                    except Exception as e:
                        self._product_key = "Unknown"
                        pass

                cloudfront_props["product_key"] = self._product_key

                # self.logger.debug("SENDING '%s' to %s" % (cloudfront_props, self._metrics_url))
                logging.getLogger("requests").setLevel(logging.CRITICAL)
                self._requests.get(self._metrics_url, params=cloudfront_props)
        except Exception as e:
            pass
Example #11
0
def get_redshift_temp_engine(
    cluster_identifier: str,
    user: str,
    database: Optional[str] = None,
    duration: int = 900,
    auto_create: bool = True,
    db_groups: Optional[List[str]] = None,
    boto3_session: Optional[boto3.Session] = None,
    **sqlalchemy_kwargs: Any,
) -> sqlalchemy.engine.Engine:
    """Get Glue connection details.

    Parameters
    ----------
    cluster_identifier : str
        The unique identifier of a cluster.
        This parameter is case sensitive.
    user : str, optional
        The name of a database user.
    database : str, optional
        Database name. If None, the default Database is used.
    duration : int, optional
        The number of seconds until the returned temporary password expires.
        Constraint: minimum 900, maximum 3600.
        Default: 900
    auto_create : bool
        Create a database user with the name specified for the user named in user if one does not exist.
    db_groups: List[str], optinal
        A list of the names of existing database groups that the user named in DbUser will join for the current session,
        in addition to any group memberships for an existing user.
        If not specified, a new user is added only to PUBLIC.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    sqlalchemy_kwargs
        keyword arguments forwarded to sqlalchemy.create_engine().
        https://docs.sqlalchemy.org/en/13/core/engines.html

    Returns
    -------
    sqlalchemy.engine.Engine
        SQLAlchemy Engine.

    Examples
    --------
    >>> import awswrangler as wr
    >>> engine = wr.db.get_redshift_temp_engine('my_cluster', 'my_user')

    """
    client_redshift: boto3.client = _utils.client(service_name="redshift",
                                                  session=boto3_session)
    args: Dict[str, Any] = {
        "DbUser": user,
        "ClusterIdentifier": cluster_identifier,
        "DurationSeconds": duration,
        "AutoCreate": auto_create,
    }
    if db_groups is not None:
        args["DbGroups"] = db_groups
    res: Dict[str, Any] = client_redshift.get_cluster_credentials(**args)
    _user: str = _quote_plus(res["DbUser"])
    password: str = _quote_plus(res["DbPassword"])
    cluster: Dict[str, Any] = client_redshift.describe_clusters(
        ClusterIdentifier=cluster_identifier)["Clusters"][0]
    host: str = cluster["Endpoint"]["Address"]
    port: str = cluster["Endpoint"]["Port"]
    if database is None:
        database = cluster["DBName"]
    conn_str: str = f"redshift+psycopg2://{_user}:{password}@{host}:{port}/{database}"
    sqlalchemy_kwargs["executemany_mode"] = "values"
    sqlalchemy_kwargs["executemany_values_page_size"] = 100_000
    return sqlalchemy.create_engine(conn_str, **sqlalchemy_kwargs)
Example #12
0
def quote_plus(s):
    return _quote_plus(to_utf8(s))
Example #13
0
def quote_plus(s):
    """
    >>> quote_plus(b'foo bar')
    'foo+bar'
    """
    return _quote_plus(to_utf8(s))
Example #14
0
def quote_plus(s):
    return _quote_plus(to_utf8(s))
Example #15
0
def quote_plus(s):
    return _quote_plus(s).replace('.', '%2E')
Example #16
0
def quote_plus(arg):
    return _quote_plus(arg)