def _parse_events_as_csv( self, parsable_events: List[bytes]) -> List[Dict[Any, Any]]: """Parses a list of events as CSV. Args: parsable_events: Bytes events to parse. Returns: A list of events formatted as CSV. Raises: DataInConnectorBlobParseError: When parsing the blob was unsuccessful. """ try: fields = parsable_events[0].decode('utf-8').split(',') events = [ dict(zip(fields, event.decode('utf-8').split(','))) for event in parsable_events[1:] ] except (ValueError, UnicodeDecodeError) as error: raise errors.DataInConnectorBlobParseError( error=error, msg='Failed to parse the blob as CSV') if not all(len(event) == len(fields) for event in events): raise errors.DataInConnectorBlobParseError( msg='Failed to parse CSV, not all lines have same length.') return events
def test_events_blobs_generator_with_erroneous_blobs(self): self.mocked_list.return_value = ['blob_1'] error = errors.DataInConnectorBlobParseError(msg='bad_blob') with mock.patch.object(gcs_hook.GoogleCloudStorageHook, 'get_blob_events', autospec=True, side_effect=error): blobs_generator = self.gcs_hook.events_blobs_generator() self.assertListEqual( [(blb.events, blb.source, blb.location, blb.status, blb.status_desc) for blb in blobs_generator], [([], 'bucket', 'blob_1', blob.BlobStatus.ERROR, str(error))])
def _parse_events_as_json(self, parsable_events: List[bytes] ) -> List[Dict[Any, Any]]: """Parses a list of events as JSON. Args: parsable_events: Bytes events to parse. Returns: A list of events formatted as JSON. Raises: DataInConnectorBlobParseError: When parsing the blob was unsuccessful. """ try: return [json.loads(event.decode('utf-8')) for event in parsable_events] except (json.JSONDecodeError, UnicodeDecodeError) as error: raise errors.DataInConnectorBlobParseError( error=error, msg='Failed to parse the blob as JSON.')