def get_engine( connection: str, catalog_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None, **sqlalchemy_kwargs: Any, ) -> sqlalchemy.engine.Engine: """Return a SQLAlchemy Engine from a Glue Catalog Connection. Only Redshift, PostgreSQL and MySQL are supported. Parameters ---------- connection : str Connection name. catalog_id : str, optional The ID of the Data Catalog from which to retrieve Databases. If none is provided, the AWS account ID is used by default. boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. sqlalchemy_kwargs keyword arguments forwarded to sqlalchemy.create_engine(). https://docs.sqlalchemy.org/en/13/core/engines.html Returns ------- sqlalchemy.engine.Engine SQLAlchemy Engine. Examples -------- >>> import awswrangler as wr >>> res = wr.catalog.get_engine(name='my_connection') """ details: Dict[str, Any] = get_connection( name=connection, catalog_id=catalog_id, boto3_session=boto3_session)["ConnectionProperties"] db_type: str = details["JDBC_CONNECTION_URL"].split(":")[1].lower() host: str = details["JDBC_CONNECTION_URL"].split(":")[2].replace("/", "") port, database = details["JDBC_CONNECTION_URL"].split(":")[3].split("/") user: str = _quote_plus(details["USERNAME"]) password: str = _quote_plus(details["PASSWORD"]) if db_type == "postgresql": _utils.ensure_postgresql_casts() if db_type in ("redshift", "postgresql"): conn_str: str = f"{db_type}+psycopg2://{user}:{password}@{host}:{port}/{database}" sqlalchemy_kwargs["executemany_mode"] = "values" sqlalchemy_kwargs["executemany_values_page_size"] = 100_000 return sqlalchemy.create_engine(conn_str, **sqlalchemy_kwargs) if db_type == "mysql": conn_str = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}" return sqlalchemy.create_engine(conn_str, **sqlalchemy_kwargs) raise exceptions.InvalidDatabaseType( f"{db_type} is not a valid Database type." f" Only Redshift, PostgreSQL and MySQL are supported.")
def get_redshift_temp_engine( cluster_identifier: str, user: str, database: Optional[str] = None, duration: int = 900, boto3_session: Optional[boto3.Session] = None, ) -> sqlalchemy.engine.Engine: """Get Glue connection details. Parameters ---------- cluster_identifier : str The unique identifier of a cluster. This parameter is case sensitive. user : str, optional The name of a database user. database : str, optional Database name. If None, the default Database is used. duration : int, optional The number of seconds until the returned temporary password expires. Constraint: minimum 900, maximum 3600. Default: 900 boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. Returns ------- sqlalchemy.engine.Engine SQLAlchemy Engine. Examples -------- >>> import awswrangler as wr >>> engine = wr.db.get_redshift_temp_engine('my_cluster', 'my_user') """ client_redshift: boto3.client = _utils.client(service_name="redshift", session=boto3_session) res: Dict[str, Any] = client_redshift.get_cluster_credentials( DbUser=user, ClusterIdentifier=cluster_identifier, DurationSeconds=duration, AutoCreate=False) _user: str = _quote_plus(res["DbUser"]) password: str = _quote_plus(res["DbPassword"]) cluster: Dict[str, Any] = client_redshift.describe_clusters( ClusterIdentifier=cluster_identifier)["Clusters"][0] host: str = cluster["Endpoint"]["Address"] port: str = cluster["Endpoint"]["Port"] if database is None: database = cluster["DBName"] conn_str: str = f"redshift+psycopg2://{_user}:{password}@{host}:{port}/{database}" return sqlalchemy.create_engine(conn_str, echo=False, executemany_mode="values", executemany_values_page_size=100_000)
def quote_plus(val, safe=''): cls = val.__class__ if cls is str: val = val.encode('utf-8') elif cls is not bytes: val = str(val).encode('utf-8') return _quote_plus(val, safe=safe)
def _set_done(self,value): """Set task as done or undone Arguments: value {bool} -- Done [True] or undone [False] """ url = self.client.url+_endpoints.responses.format(task_id=self.id) data = "data="+_quote_plus(_json.dumps({ "recipient": { "type": "user", "guid": self.student.guid }, "event": { "type": {True:"mark-as-done",False:"mark-as-undone"}[value], "feedback": "", "sent": _datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z"), "author": self.student.guid } }).replace(" ","")) headers = { **_headers(url,content=data,cookies=self.client._formated_cookies,content_type="x-www-form-urlencoded; charset=UTF-8",accept="*/*"), "X-Requested-With": "XMLHttpRequest" } status_code = _requests.post(url,data=data,headers=headers).status_code if status_code < 300 and status_code >= 200: self.done = value
def send_comment(self,message): """Set task as done or undone Arguments: message {str} -- Message to send """ url = self.client.url+_endpoints.responses.format(task_id=self.id) data = "data="+_quote_plus(_json.dumps({ "recipient": { "type": "user", "guid": self.student.guid }, "event": { "type": "comment", "message": message, "feedback": "", "sent": _datetime.now().strftime("%Y-%m-%dT%H:%M:%S.000Z"), "author": self.student.guid } }).replace(" ","")) headers = { **_headers(url,content=data,cookies=self.client._formated_cookies,content_type="x-www-form-urlencoded; charset=UTF-8",accept="*/*"), "X-Requested-With": "XMLHttpRequest" } _requests.post(url,data=data,headers=headers)
def _urlencode(query, params): """ Internal method to combine the url and params into a single url string. :param str query: the base url to query :param dict params: the parameters to send to the url :returns: a *str* of the full url """ return query + '?' + '&'.join(key+'='+_quote_plus(str(value)) for key, value in _Auxiliary._iteritems(params))
def _track(self, event_name, value=1, type="gauge", properties={}, meta={}, send_sys_info=False): """ Internal method to actually send metrics, expected to be called from background thread only. """ return if not self._usable: return the_properties = {} if send_sys_info: if not self._sys_info_set: self._set_sys_info() the_properties.update(self._sys_info) the_properties.update(properties) try: # homebrew metrics - cloudfront if self._metrics_url != '': cloudfront_props = {} props = _copy.deepcopy(the_properties) props.update(meta) cloudfront_props['event_name'] = event_name cloudfront_props['value'] = value cloudfront_props['distinct_id'] = self._distinct_id cloudfront_props['version'] = self._version cloudfront_props['isgpu'] = self._isgpu cloudfront_props['build_number'] = self._build_number cloudfront_props['properties'] = _quote_plus(str(props)) # if product key is not set, then try to get it now when submitting if not self._product_key: try: # product key from .. import product_key self._product_key = product_key.get_product_key() except Exception as e: self._product_key = 'Unknown' pass cloudfront_props['product_key'] = self._product_key # self.logger.debug("SENDING '%s' to %s" % (cloudfront_props, self._metrics_url)) logging.getLogger('requests').setLevel(logging.CRITICAL) self._requests.get(self._metrics_url, params=cloudfront_props) except Exception as e: pass
def _track(self, event_name, value=1, type="gauge", properties={}, meta={}, send_sys_info=False): """ Internal method to actually send metrics, expected to be called from background thread only. """ return if not self._usable: return the_properties = {} if send_sys_info: if not self._sys_info_set: self._set_sys_info() the_properties.update(self._sys_info) the_properties.update(properties) try: # homebrew metrics - cloudfront if self._metrics_url != "": cloudfront_props = {} props = _copy.deepcopy(the_properties) props.update(meta) cloudfront_props["event_name"] = event_name cloudfront_props["value"] = value cloudfront_props["distinct_id"] = self._distinct_id cloudfront_props["version"] = self._version cloudfront_props["isgpu"] = self._isgpu cloudfront_props["build_number"] = self._build_number cloudfront_props["properties"] = _quote_plus(str(props)) # if product key is not set, then try to get it now when submitting if not self._product_key: try: # product key from .. import product_key self._product_key = product_key.get_product_key() except Exception as e: self._product_key = "Unknown" pass cloudfront_props["product_key"] = self._product_key # self.logger.debug("SENDING '%s' to %s" % (cloudfront_props, self._metrics_url)) logging.getLogger("requests").setLevel(logging.CRITICAL) self._requests.get(self._metrics_url, params=cloudfront_props) except Exception as e: pass
def get_redshift_temp_engine( cluster_identifier: str, user: str, database: Optional[str] = None, duration: int = 900, auto_create: bool = True, db_groups: Optional[List[str]] = None, boto3_session: Optional[boto3.Session] = None, **sqlalchemy_kwargs: Any, ) -> sqlalchemy.engine.Engine: """Get Glue connection details. Parameters ---------- cluster_identifier : str The unique identifier of a cluster. This parameter is case sensitive. user : str, optional The name of a database user. database : str, optional Database name. If None, the default Database is used. duration : int, optional The number of seconds until the returned temporary password expires. Constraint: minimum 900, maximum 3600. Default: 900 auto_create : bool Create a database user with the name specified for the user named in user if one does not exist. db_groups: List[str], optinal A list of the names of existing database groups that the user named in DbUser will join for the current session, in addition to any group memberships for an existing user. If not specified, a new user is added only to PUBLIC. boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. sqlalchemy_kwargs keyword arguments forwarded to sqlalchemy.create_engine(). https://docs.sqlalchemy.org/en/13/core/engines.html Returns ------- sqlalchemy.engine.Engine SQLAlchemy Engine. Examples -------- >>> import awswrangler as wr >>> engine = wr.db.get_redshift_temp_engine('my_cluster', 'my_user') """ client_redshift: boto3.client = _utils.client(service_name="redshift", session=boto3_session) args: Dict[str, Any] = { "DbUser": user, "ClusterIdentifier": cluster_identifier, "DurationSeconds": duration, "AutoCreate": auto_create, } if db_groups is not None: args["DbGroups"] = db_groups res: Dict[str, Any] = client_redshift.get_cluster_credentials(**args) _user: str = _quote_plus(res["DbUser"]) password: str = _quote_plus(res["DbPassword"]) cluster: Dict[str, Any] = client_redshift.describe_clusters( ClusterIdentifier=cluster_identifier)["Clusters"][0] host: str = cluster["Endpoint"]["Address"] port: str = cluster["Endpoint"]["Port"] if database is None: database = cluster["DBName"] conn_str: str = f"redshift+psycopg2://{_user}:{password}@{host}:{port}/{database}" sqlalchemy_kwargs["executemany_mode"] = "values" sqlalchemy_kwargs["executemany_values_page_size"] = 100_000 return sqlalchemy.create_engine(conn_str, **sqlalchemy_kwargs)
def quote_plus(s): return _quote_plus(to_utf8(s))
def quote_plus(s): """ >>> quote_plus(b'foo bar') 'foo+bar' """ return _quote_plus(to_utf8(s))
def quote_plus(s): return _quote_plus(s).replace('.', '%2E')
def quote_plus(arg): return _quote_plus(arg)