def read_opencypher(self, query: str, headers: Any = None) -> Any: """Execute the provided openCypher query. Parameters ---------- query : str The query to execute headers : Any, optional Any additional headers that should be associated with the query. Defaults to None. Returns ------- Any The result of the query. """ if headers is None: headers = {} if "content-type" not in headers: headers["content-type"] = "application/x-www-form-urlencoded" url = f"{HTTP_PROTOCOL}://{self.host}:{self.port}/openCypher" data = {"query": query} req = self._prepare_request("POST", url, data=data, headers=headers) res = self._http_session.send(req) _logger.debug(res) if res.ok: return res.json()["results"] raise exceptions.QueryFailed( f"Status Code: {res.status_code} Reason: {res.reason} Message: {res.text}" )
def wait_query(query_execution_id: str, boto3_session: Optional[boto3.Session] = None) -> Dict[str, Any]: """Wait for the query end. Parameters ---------- query_execution_id : str Athena query execution ID. boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. Returns ------- Dict[str, Any] Dictionary with the get_query_execution response. Examples -------- >>> import awswrangler as wr >>> res = wr.athena.wait_query(query_execution_id='query-execution-id') """ client_athena: boto3.client = _utils.client(service_name="athena", session=boto3_session) response: Dict[str, Any] = client_athena.get_query_execution(QueryExecutionId=query_execution_id) state: str = response["QueryExecution"]["Status"]["State"] while state not in _QUERY_FINAL_STATES: time.sleep(_QUERY_WAIT_POLLING_DELAY) response = client_athena.get_query_execution(QueryExecutionId=query_execution_id) state = response["QueryExecution"]["Status"]["State"] _logger.debug("state: %s", state) _logger.debug("StateChangeReason: %s", response["QueryExecution"]["Status"].get("StateChangeReason")) if state == "FAILED": raise exceptions.QueryFailed(response["QueryExecution"]["Status"].get("StateChangeReason")) if state == "CANCELLED": raise exceptions.QueryCancelled(response["QueryExecution"]["Status"].get("StateChangeReason")) return cast(Dict[str, Any], response["QueryExecution"])
def _execute_gremlin(self, query: str, headers: Any = None) -> List[Dict[str, Any]]: try: c = self._get_gremlin_connection(headers) result = c.submit(query) future_results = result.all() results = future_results.result() return GremlinParser.gremlin_results_to_dict(results) except Exception as e: if isinstance(self.gremlin_connection, client.Client): self.gremlin_connection.close() self.gremlin_connection = None _logger.error(e) raise exceptions.QueryFailed(e)
def wait_query( query_id: str, boto3_session: Optional[boto3.Session] = None) -> Dict[str, Any]: """Wait query ends. https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CWL_QuerySyntax.html Parameters ---------- query_id : str Query ID. boto3_session : boto3.Session(), optional Boto3 Session. The default boto3 session will be used if boto3_session receive None. Returns ------- Dict[str, Any] Query result payload. Examples -------- >>> import awswrangler as wr >>> query_id = wr.cloudwatch.start_query( ... log_group_names=["loggroup"], ... query="fields @timestamp, @message | sort @timestamp desc | limit 5", ... ) ... response = wr.cloudwatch.wait_query(query_id=query_id) """ final_states: List[str] = ["Complete", "Failed", "Cancelled"] client_logs: boto3.client = _utils.client(service_name="logs", session=boto3_session) response: Dict[str, Any] = client_logs.get_query_results(queryId=query_id) status: str = response["status"] while status not in final_states: time.sleep(_QUERY_WAIT_POLLING_DELAY) response = client_logs.get_query_results(queryId=query_id) status = response["status"] _logger.debug("status: %s", status) if status == "Failed": raise exceptions.QueryFailed(f"query ID: {query_id}") if status == "Cancelled": raise exceptions.QueryCancelled(f"query ID: {query_id}") return response
def write_gremlin_df(client: NeptuneClient, df: pd.DataFrame, mode: WriteDFType, batch_size: int) -> bool: """Write the provided dataframe using Gremlin. Parameters ---------- client : NeptuneClient The Neptune client to write the dataframe df : pd.DataFrame The dataframe to write mode : WriteDFType The type of dataframe to write batch_size : int The size of the batch to write Returns ------- bool True if the write operation succeeded """ g = Graph().traversal() # Loop through items in the DF for (index, row) in df.iterrows(): # build up a query if mode == WriteDFType.EDGE: g = _build_gremlin_edges(g, row.to_dict()) elif mode == WriteDFType.VERTEX: g = _build_gremlin_vertices(g, row.to_dict()) else: g = _build_gremlin_update(g, row.to_dict()) # run the query if index > 0 and index % batch_size == 0: res = _run_gremlin_insert(client, g) if res: g = Graph().traversal() else: _logger.debug(res) raise exceptions.QueryFailed( """Failed to insert part or all of the data in the DataFrame, please check the log output.""" ) return _run_gremlin_insert(client, g)
def _execute_sparql(self, query: str, headers: Any) -> Any: if headers is None: headers = {} s = SPARQLWrapper("") s.setQuery(query) query_type = s.queryType.upper() if query_type in ["SELECT", "CONSTRUCT", "ASK", "DESCRIBE"]: data = {"query": query} else: data = {"update": query} if "content-type" not in headers: headers["content-type"] = "application/x-www-form-urlencoded" uri = f"{HTTP_PROTOCOL}://{self.host}:{self.port}/sparql" req = self._prepare_request("POST", uri, data=data, headers=headers) res = self._http_session.send(req) _logger.debug(res) if res.ok: return res.json() raise exceptions.QueryFailed( f"Status Code: {res.status_code} Reason: {res.reason} Message: {res.text}" )
def _get_query_metadata( # pylint: disable=too-many-statements query_execution_id: str, boto3_session: boto3.Session, categories: Optional[List[str]] = None, query_execution_payload: Optional[Dict[str, Any]] = None, ) -> _QueryMetadata: """Get query metadata.""" if (query_execution_payload is not None) and (query_execution_payload["Status"]["State"] in _QUERY_FINAL_STATES): if query_execution_payload["Status"]["State"] != "SUCCEEDED": reason: str = query_execution_payload["Status"]["StateChangeReason"] raise exceptions.QueryFailed(f"Query error: {reason}") _query_execution_payload: Dict[str, Any] = query_execution_payload else: _query_execution_payload = wait_query(query_execution_id=query_execution_id, boto3_session=boto3_session) cols_types: Dict[str, str] = get_query_columns_types( query_execution_id=query_execution_id, boto3_session=boto3_session ) _logger.debug("cols_types: %s", cols_types) dtype: Dict[str, str] = {} parse_timestamps: List[str] = [] parse_dates: List[str] = [] converters: Dict[str, Any] = {} binaries: List[str] = [] col_name: str col_type: str for col_name, col_type in cols_types.items(): if col_type == "array": raise exceptions.UnsupportedType( "List data type is not support with ctas_approach=False. " "Please use ctas_approach=True for List columns." ) if col_type == "row": raise exceptions.UnsupportedType( "Struct data type is not support with ctas_approach=False. " "Please use ctas_approach=True for Struct columns." ) pandas_type: str = _data_types.athena2pandas(dtype=col_type) if (categories is not None) and (col_name in categories): dtype[col_name] = "category" elif pandas_type in ["datetime64", "date"]: parse_timestamps.append(col_name) if pandas_type == "date": parse_dates.append(col_name) elif pandas_type == "bytes": dtype[col_name] = "string" binaries.append(col_name) elif pandas_type == "decimal": converters[col_name] = lambda x: Decimal(str(x)) if str(x) not in ("", "none", " ", "<NA>") else None else: dtype[col_name] = pandas_type output_location: Optional[str] = None if "ResultConfiguration" in _query_execution_payload: output_location = _query_execution_payload["ResultConfiguration"].get("OutputLocation") athena_statistics: Dict[str, Union[int, str]] = _query_execution_payload.get("Statistics", {}) manifest_location: Optional[str] = str(athena_statistics.get("DataManifestLocation")) query_metadata: _QueryMetadata = _QueryMetadata( execution_id=query_execution_id, dtype=dtype, parse_timestamps=parse_timestamps, parse_dates=parse_dates, converters=converters, binaries=binaries, output_location=output_location, manifest_location=manifest_location, raw_payload=_query_execution_payload, ) _logger.debug("query_metadata:\n%s", query_metadata) return query_metadata