def emit_mce(self, mce: MetadataChangeEvent) -> None: url = self._get_ingest_endpoint(mce) raw_mce_obj = mce.proposedSnapshot.to_obj() mce_obj = _rest_li_ify(raw_mce_obj) snapshot = {"snapshot": mce_obj} payload = json.dumps(snapshot) curl_command = _make_curl_command(self._session, "POST", url, payload) logger.debug( "Attempting to emit to DataHub GMS; using curl equivalent to:\n%s", curl_command, ) try: response = self._session.post(url, data=payload) response.raise_for_status() except HTTPError as e: info = response.json() raise OperationalError( "Unable to emit metadata to DataHub GMS", info ) from e except RequestException as e: raise OperationalError( "Unable to emit metadata to DataHub GMS", {"message": str(e)} ) from e
def _emit_generic(self, url: str, payload: str) -> None: curl_command = _make_curl_command(self._session, "POST", url, payload) logger.debug( "Attempting to emit to DataHub GMS; using curl equivalent to:\n%s", curl_command, ) try: response = self._session.post( url, data=payload, timeout=(self._connect_timeout_sec, self._read_timeout_sec), ) response.raise_for_status() except HTTPError as e: try: info = response.json() raise OperationalError( "Unable to emit metadata to DataHub GMS", info) from e except JSONDecodeError: # If we can't parse the JSON, just raise the original error. raise OperationalError( "Unable to emit metadata to DataHub GMS", {"message": str(e)}) from e except RequestException as e: raise OperationalError("Unable to emit metadata to DataHub GMS", {"message": str(e)}) from e
def _get_generic(self, url: str) -> Dict: try: response = self._session.get(url) response.raise_for_status() return response.json() except HTTPError as e: try: info = response.json() raise OperationalError("Unable to get metadata from DataHub", info) from e except JSONDecodeError: # If we can't parse the JSON, just raise the original error. raise OperationalError("Unable to get metadata from DataHub", {"message": str(e)}) from e
def get_latest_timeseries_value( self, entity_urn: str, aspect_name: str, aspect_type: Type[Aspect], filter_criteria_map: Dict[str, str], ) -> Optional[Aspect]: filter_criteria = [ {"field": k, "value": v, "condition": "EQUAL"} for k, v in filter_criteria_map.items() ] query_body = { "urn": entity_urn, "entity": self._guess_entity_type(entity_urn), "aspect": aspect_name, "latestValue": True, "filter": {"or": [{"and": filter_criteria}]}, } end_point = f"{self.config.server}/aspects?action=getTimeseriesAspectValues" resp: Dict = self._post_generic(end_point, query_body) values: list = resp.get("value", {}).get("values") if values: assert len(values) == 1 aspect_json: str = values[0].get("aspect", {}).get("value") if aspect_json: return aspect_type.from_obj(json.loads(aspect_json), tuples=False) else: raise OperationalError( f"Failed to find {aspect_type} in response {aspect_json}" ) return None
def _post_generic(self, url: str, payload_dict: Dict) -> Dict: payload = json.dumps(payload_dict) logger.debug(payload) try: response: Response = self._session.post(url, payload) response.raise_for_status() return response.json() except HTTPError as e: try: info = response.json() raise OperationalError("Unable to get metadata from DataHub", info) from e except JSONDecodeError: # If we can't parse the JSON, just raise the original error. raise OperationalError("Unable to get metadata from DataHub", {"message": str(e)}) from e
def _emit_generic(self, url: str, payload: str) -> None: curl_command = _make_curl_command(self._session, "POST", url, payload) logger.debug( "Attempting to emit to DataHub GMS; using curl equivalent to:\n%s", curl_command, ) try: response = self._session.post(url, data=payload) response.raise_for_status() except HTTPError as e: info = response.json() raise OperationalError("Unable to emit metadata to DataHub GMS", info) from e except RequestException as e: raise OperationalError("Unable to emit metadata to DataHub GMS", {"message": str(e)}) from e
def _write_done_callback( self, record_envelope: RecordEnvelope, write_callback: WriteCallback, future: concurrent.futures.Future, ) -> None: if future.cancelled(): self.report.report_failure({"error": "future was cancelled"}) write_callback.on_failure(record_envelope, OperationalError("future was cancelled"), {}) elif future.done(): e = future.exception() if not e: self.report.report_record_written(record_envelope) start_time, end_time = future.result() self.report.report_downstream_latency(start_time, end_time) write_callback.on_success(record_envelope, {}) elif isinstance(e, OperationalError): # only OperationalErrors should be ignored if not self.treat_errors_as_warnings: self.report.report_failure({ "error": e.message, "info": e.info }) else: # trim exception stacktraces when reporting warnings if "stackTrace" in e.info: try: e.info["stackTrace"] = "\n".join( e.info["stackTrace"].split("\n")[0:2]) except Exception: # ignore failures in trimming pass record = record_envelope.record if isinstance(record, MetadataChangeProposalWrapper): # include information about the entity that failed entity_id = cast(MetadataChangeProposalWrapper, record).entityUrn e.info["id"] = entity_id else: entity_id = None self.report.report_warning({ "warning": e.message, "info": e.info }) write_callback.on_failure(record_envelope, e, e.info) else: self.report.report_failure({"e": e}) write_callback.on_failure(record_envelope, Exception(e), {})
def emit_mce(self, mce: MetadataChangeEvent) -> None: url = self._get_ingest_endpoint(mce) headers = {"X-RestLi-Protocol-Version": "2.0.0"} raw_mce_obj = mce.proposedSnapshot.to_obj() mce_obj = _rest_li_ify(raw_mce_obj) snapshot = {"snapshot": mce_obj} try: response = requests.post(url, headers=headers, json=snapshot) # import curlify # print(curlify.to_curl(response.request)) # breakpoint() response.raise_for_status() except HTTPError as e: info = response.json() raise OperationalError("Unable to emit metadata to DataHub GMS", info) from e except RequestException as e: raise OperationalError("Unable to emit metadata to DataHub GMS", {"message": str(e)}) from e
def get_aspect_v2( self, entity_urn: str, aspect_type: Type[Aspect], aspect: str, aspect_type_name: Optional[str] = None, ) -> Optional[Aspect]: """ Get an aspect for an entity. :param str entity_urn: The urn of the entity :param Type[Aspect] aspect_type: The type class of the aspect being requested (e.g. datahub.metadata.schema_classes.DatasetProperties) :param str aspect: The name of the aspect being requested (e.g. schemaMetadata, datasetProperties, etc.) :param Optional[str] aspect_type_name: The fully qualified classname of the aspect being requested. Typically not needed and extracted automatically from the class directly. (e.g. com.linkedin.common.DatasetProperties) :return: the Aspect as a dictionary if present, None if no aspect was found (HTTP status 404) :rtype: Optional[Aspect] :raises HttpError: if the HTTP response is not a 200 or a 404 """ url: str = f"{self._gms_server}/aspects/{Urn.url_encode(entity_urn)}?aspect={aspect}&version=0" response = self._session.get(url) if response.status_code == 404: # not found return None response.raise_for_status() response_json = response.json() if not aspect_type_name: record_schema: RecordSchema = aspect_type.__getattribute__( aspect_type, "RECORD_SCHEMA") if not record_schema: logger.warning( f"Failed to infer type name of the aspect from the aspect type class {aspect_type}. Please provide an aspect_type_name. Continuing, but this will fail." ) else: aspect_type_name = record_schema.fullname.replace( ".pegasus2avro", "") aspect_json = response_json.get("aspect", {}).get(aspect_type_name) if aspect_json: return aspect_type.from_obj(aspect_json, tuples=True) else: raise OperationalError( f"Failed to find {aspect_type_name} in response {response_json}" )
def get_aspect( self, entity_urn: str, aspect: str, aspect_type_name: str, aspect_type: Type[Aspect], ) -> Optional[Aspect]: url = f"{self._gms_server}/aspects/{urllib.parse.quote(entity_urn)}?aspect={aspect}&version=0" response = self._session.get(url) if response.status_code == 404: # not found return None response.raise_for_status() response_json = response.json() aspect_json = response_json.get("aspect", {}).get(aspect_type_name) if aspect_json: return aspect_type.from_obj(aspect_json, tuples=True) else: raise OperationalError( f"Failed to find {aspect_type_name} in response {response_json}" )