def dict_to_type(o, expected_type): """ Try to parse the given dict (from json/etc) into the given NamedTuple. TODO: Doesn't handle indirectly embedded NamedTuples, such as within a list, or Optionals/Unions/etc (It's currently only used for simple cases, not complex hierarchies) >>> >>> Status.ACTIVE == dict_to_type('active', expected_type=Status) True >>> dict_to_type('eating', expected_type=Status) Traceback (most recent call last): ... ValueError: 'eating' is not a valid Status >>> # More tests in test_serialise.py """ if o is None: return None c = cattr.Converter() c.register_structure_hook(uuid.UUID, _structure_as_uuid) c.register_structure_hook(datetime.datetime, _structure_as_datetime) c.register_structure_hook(pathlib.Path, _structure_as_pathlib) # Needed for cattrs 1.0.0: our properties are a raw dict, so do nothing to them. c.register_structure_hook(dict, _passthrough) return c.structure(o, expected_type)
def from_dict(cls, d) -> "ExperimentV6": converter = cattr.Converter() converter.register_structure_hook( datetime.datetime, lambda num, _: datetime.datetime.fromisoformat( num.replace("Z", "+00:00")), ) return converter.structure(d, cls)
def attr_from_json_dict(attr_dict: Dict[str, Any]) -> attr.Attribute: """Converts a JSON dict created by |attr_to_json_dict| above into the attr object it was originally created from.""" module = importlib.import_module(attr_dict.pop("__module__")) cls = getattr(module, attr_dict.pop("__classname__")) converter = with_datetime_hooks(cattr.Converter()) return converter.structure(attr_dict, cls)
def _unstructure(self) -> typing.Any: conv = cattr.Converter( # type: ignore unstruct_strat=cattr.UnstructureStrategy.AS_DICT) conv.register_unstructure_hook(Decimal, lambda d: str(d)) conv.register_unstructure_hook(datetime, lambda d: d.isoformat()) conv.register_unstructure_hook(UUID, lambda d: str(d)) # might need a date to datetime conversion hook return conv.unstructure(self)
def from_experimenter(cls, session: requests.Session = None) -> "ExperimentCollection": session = session or requests.Session() experiments = session.get(cls.EXPERIMENTER_API_URL).json() converter = cattr.Converter() converter.register_structure_hook( dt.datetime, lambda num, _: cls._unix_millis_to_datetime(num), ) return cls([converter.structure(experiment, Experiment) for experiment in experiments])
def from_dict(cls, d) -> "ExperimentV4": converter = cattr.Converter() converter.register_structure_hook( datetime.datetime, lambda num, _: pytz.utc.localize( datetime.datetime.strptime(num, "%Y-%m-%d")), ) return converter.structure(d, cls)
def _structure(self, json_input: typing.Iterable[typing.Any]) -> typing.Any: conv = cattr.Converter( # type: ignore unstruct_strat=cattr.UnstructureStrategy.AS_DICT) conv.register_structure_hook(Decimal, lambda d, t: Decimal(d)) conv.register_structure_hook(datetime, lambda dt, t: parser.parse(dt)) conv.register_structure_hook(date, lambda dt, t: parser.parse(dt)) conv.register_structure_hook(UUID, lambda d, t: UUID(d)) return conv.structure(json_input, type(self))
def from_dict(cls, d: dict): """ Deserialize from a nested dict `d` """ converter = cattr.Converter() disambiguators = cls._get_all_disambiguators() for union_type, func in disambiguators.items(): converter.register_structure_hook( union_type, lambda o, t, hook=func: converter.structure(o, hook(o, t))) return converter.structure(d, cls)
def attr_to_json_dict(attr_obj: attr.Attribute) -> Dict[str, Any]: """Converts an attr-defined object to a JSON dict. The resulting dict should be unstructured using |attr_from_json_dict| below, which uses the __module__ and __classname__ fields to reconstruct the object using the same Converter used to unstructure it here.""" converter = with_datetime_hooks(cattr.Converter()) attr_dict = converter.unstructure(attr_obj) attr_dict["__classname__"] = attr_obj.__class__.__name__ attr_dict["__module__"] = attr_obj.__module__ return attr_dict
def _get_converter(self): if not hasattr(self, "_converter"): converter = cattr.Converter() converter.register_unstructure_hook( datetime.datetime, hooks.datetime_unstructure_hook ) uh = functools.partial(hooks.unstructure_hook, converter) converter.register_unstructure_hook(Model, uh) # type: ignore self._converter = converter return self._converter
def to_serializable(self) -> Dict[str, Any]: converter = serialization.with_datetime_hooks(cattr.Converter()) unstructured: Dict[str, Any] = converter.unstructure(self) # The structure of result_details depends on the actual type of check. It is # stored as a JSON string so that all the fields can be stored and extracted in # the query. # TODO(#7544): Make result_details responsible for constraining its fields when # serializing so they don't scale linearly. if details := unstructured["result_details"]: unstructured["result_details"] = json.dumps(details)
def attr_from_json_dict(attr_dict: Dict[str, Any]) -> attr.Attribute: """Converts a JSON dict created by |attr_to_json_dict| above into the attr object it was originally created from.""" module = importlib.import_module(attr_dict.pop('__module__')) cls = getattr(module, attr_dict.pop('__classname__')) converter = cattr.Converter() converter.register_structure_hook( datetime.datetime, lambda date_str, _: serializable_to_datetime(date_str)) return converter.structure(attr_dict, cls)
def from_dict(cls, dict_data: dict) -> "Stylespace": """Construct Stylespace from unstructured dict data.""" converter = cattr.Converter() converter.register_structure_hook( FlagList, lambda list_of_str_flags, cls: cls( [getattr(AxisValueFlag, f) for f in list_of_str_flags]), ) converter.register_structure_hook( NameRecord, lambda data, cls: cls.structure(data)) return converter.structure(dict_data, cls)
def attr_to_json_dict(attr_obj: attr.Attribute) -> Dict[str, Any]: """Converts an attr-defined object to a JSON dict. The resulting dict should be unstructured using |attr_from_json_dict| below, which uses the __module__ and __classname__ fields to reconstruct the object using the same Converter used to unstructure it here.""" converter = cattr.Converter() converter.register_unstructure_hook( datetime.datetime, lambda d, _: datetime_to_serializable(d)) attr_dict = cattr.unstructure(attr_obj) attr_dict['__classname__'] = attr_obj.__class__.__name__ attr_dict['__module__'] = attr_obj.__module__ return attr_dict
def _setup_converter() -> cattr.Converter: result = cattr.Converter() result.register_structure_hook(uuid.UUID, lambda d, _: uuid.UUID(d)) result.register_unstructure_hook(uuid.UUID, str) result.register_structure_hook(datetime.datetime, lambda d, _: dateutil.parser.parse(d)) result.register_unstructure_hook( datetime.datetime, lambda obj: obj.replace(tzinfo=datetime.timezone.utc).astimezone(). replace(microsecond=0).isoformat(), ) return result
def from_dict(cls, d: Mapping[str, Any]): converter = cattr.Converter() def discriminate_telemetry(d, type): kind = d.pop("kind") for klass in type.__args__: if kind == klass.kind: return klass(**d) raise ValueError(f"Could not discriminate telemetry kind {kind}") converter.register_structure_hook(FeatureTelemetryType, discriminate_telemetry) return converter.structure(d, cls)
def from_bytes(cls, stylespace_content: bytes): stylespace_content_parsed = fontTools.misc.plistlib.loads(stylespace_content) converter = cattr.Converter() converter.register_structure_hook( FlagList, lambda list_of_str_flags, cls: cls( [getattr(AxisValueFlag, f) for f in list_of_str_flags] ), ) converter.register_structure_hook( NameRecord, lambda data, cls: cls.structure(data) ) stylespace = converter.structure(stylespace_content_parsed, cls) return stylespace
def of_query(cls, query_file, metadata=None, dag_collection=None): """ Create task that schedules the corresponding query in Airflow. Raises FileNotFoundError if not metadata file exists for query. If `metadata` is set, then it is used instead of the metadata.yaml file that might exist alongside the query file. """ converter = cattr.Converter() if metadata is None: metadata = Metadata.of_sql_file(query_file) dag_name = metadata.scheduling.get("dag_name") if dag_name is None: raise UnscheduledTask( f"Metadata for {query_file} does not contain scheduling information." ) task_config = {"query_file": str(query_file)} task_config.update(metadata.scheduling) if len(metadata.owners) <= 0: raise TaskParseException( f"No owner specified in metadata for {query_file}." ) # Airflow only allows to set one owner, so we just take the first task_config["owner"] = metadata.owners[0] # Get default email from default_args if available default_email = [] if dag_collection is not None: dag = dag_collection.dag_by_name(dag_name) if dag is not None: default_email = dag.default_args.email email = task_config.get("email", default_email) # owners get added to the email list task_config["email"] = list(set(email + metadata.owners)) # data processed in task should be published if metadata.is_public_json(): task_config["public_json"] = True try: return converter.structure(task_config, cls) except TypeError as e: raise TaskParseException( f"Invalid scheduling information format for {query_file}: {e}" )
def main(): args = parser.parse_args() experiments = get_experiments() destination_table = ( f"{args.project}.{args.destination_dataset}.{args.destination_table}") bq_schema = ( bigquery.SchemaField("experimenter_slug", "STRING"), bigquery.SchemaField("normandy_slug", "STRING"), bigquery.SchemaField("type", "STRING"), bigquery.SchemaField("status", "STRING"), bigquery.SchemaField("start_date", "DATE"), bigquery.SchemaField("end_date", "DATE"), bigquery.SchemaField("proposed_enrollment", "INTEGER"), bigquery.SchemaField("reference_branch", "STRING"), bigquery.SchemaField("is_high_population", "BOOL"), bigquery.SchemaField( "branches", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField("slug", "STRING"), bigquery.SchemaField("ratio", "INTEGER"), ], ), bigquery.SchemaField("app_id", "STRING"), bigquery.SchemaField("app_name", "STRING"), bigquery.SchemaField("channel", "STRING"), ) job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE") job_config.schema = bq_schema converter = cattr.Converter() converter.register_unstructure_hook( datetime.datetime, lambda d: datetime.datetime.strftime(d, format="%Y-%m-%d")) blob = converter.unstructure(experiments) if args.dry_run: print(json.dumps(blob)) sys.exit(0) client = bigquery.Client(args.project) client.load_table_from_json(blob, destination_table, job_config=job_config).result() print(f"Loaded {len(experiments)} experiments")
def from_file(cls, metadata_file): """Parse metadata from the provided file and create a new Metadata instance.""" friendly_name = None description = None owners = [] labels = {} scheduling = {} bigquery = None with open(metadata_file, "r") as yaml_stream: try: metadata = yaml.safe_load(yaml_stream) friendly_name = metadata.get("friendly_name", None) description = metadata.get("description", None) if "labels" in metadata: labels = {} for key, label in metadata["labels"].items(): if isinstance(label, bool): # publish key-value pair with bool value as tag if label: labels[str(key)] = "" elif isinstance(label, list): labels[str(key)] = list(map(str, label)) else: # all other pairs get published as key-value pair label labels[str(key)] = str(label) if "scheduling" in metadata: scheduling = metadata["scheduling"] if "bigquery" in metadata and metadata["bigquery"]: converter = cattr.Converter() bigquery = converter.structure(metadata["bigquery"], BigQueryMetadata) if "owners" in metadata: owners = metadata["owners"] return cls(friendly_name, description, owners, labels, scheduling, bigquery) except yaml.YAMLError as e: raise e
def add_export_task(self, task): """Add a new task to the DAG for exporting data of the original query to GCS.""" if not task.public_json: logging.warn(f"Task {task.task_name} not marked as public JSON.") return converter = cattr.Converter() task_dict = converter.unstructure(task) del task_dict["dataset"] del task_dict["table"] del task_dict["version"] export_task = converter.structure(task_dict, Task) export_task.dag_name = self.name export_task.task_name = f"export_public_data_json_{export_task.task_name}" export_task.dependencies = [task] self.add_tasks([export_task])
def configure(cls, filename: str = "looker.ini", section: Optional[str] = None) -> PApiSettings: """Configure using a config file and/or environment variables. Environment variables will override config file settings. Neither is necessary but some combination must supply the minimum to instantiate ApiSettings. ENV variables map like this: <package-prefix>_API_VERSION -> api_version <package-prefix>_BASE_URL -> base_url <package-prefix>_VERIFY_SSL -> verify_ssl """ api_settings = cls(filename=filename, section=section) config_data = api_settings.read_config() converter = cattr.Converter() converter.register_structure_hook(bool, _convert_bool) settings = converter.structure(config_data, ApiSettings) return settings
class StatisticResultCollection: """ Represents a set of statistics result data. """ data: List[StatisticResult] = attr.Factory(list) converter = cattr.Converter() converter.register_unstructure_hook(Decimal, lambda x: str(round(x, 6).normalize())) converter.register_unstructure_hook(float, lambda x: x if math.isfinite(x) else None) def to_dict(self) -> Dict[str, Any]: """Return statistic results as dict.""" return self.converter.unstructure(self) def set_segment(self, segment: str) -> "StatisticResultCollection": """Sets the `segment` field in-place on all children.""" for result in self.data: result.segment = segment return self
def write(self, file): """Write dataset metadata information to the provided file.""" metadata_dict = self.__dict__ if metadata_dict["labels"]: for label_key, label_value in metadata_dict["labels"].items(): # handle tags if label_value == "": metadata_dict["labels"][label_key] = True if "description" in metadata_dict: metadata_dict["description"] = Literal( metadata_dict["description"]) converter = cattr.Converter() file.write_text( yaml.dump( converter.unstructure(metadata_dict), default_flow_style=False, sort_keys=False, ))
def configure(cls, filename: str = "looker.ini", section: Optional[str] = None) -> "ApiSettings": """Configure using a config file and/or environment variables. Environment variables will override config file settings. Neither is necessary but some combination must supply the minimum to instantiate ApiSettings. ENV variables map like this: <package-prefix>_API_VERSION -> api_version <package-prefix>_BASE_URL -> base_url <package-prefix>_VERIFY_SSL -> verify_ssl """ config_data = cls.read_ini(filename, section) env_api_version = cast( str, os.getenv(f"{constants.environment_prefix}_API_VERSION")) if env_api_version: config_data["api_version"] = env_api_version env_base_url = cast( str, os.getenv(f"{constants.environment_prefix}_BASE_URL")) if env_base_url: config_data["base_url"] = env_base_url env_verify_ssl = cast( str, os.getenv(f"{constants.environment_prefix}_VERIFY_SSL")) if env_verify_ssl: config_data["verify_ssl"] = env_verify_ssl if not config_data.get("base_url"): raise error.SDKError(f"Required parameter base_url not found.") converter = cattr.Converter() converter.register_structure_hook(bool, _convert_bool) settings: ApiSettings = converter.structure(config_data, cls) return settings
def make_cattr(idx_to_node: Dict[int, Node] = None) -> cattr.Converter: """Make cattr.Convert() for `Skeleton`. Make a cattr.Converter() that registers structure/unstructure hooks for Skeleton objects to handle serialization of skeletons. Args: idx_to_node: A dict that maps node index to Node objects. Returns: A cattr.Converter() instance for skeleton serialization and deserialization. """ node_to_idx = ({node: idx for idx, node in idx_to_node.items()} if idx_to_node is not None else None) _cattr = cattr.Converter() _cattr.register_unstructure_hook( Skeleton, lambda x: Skeleton.to_dict(x, node_to_idx)) _cattr.register_structure_hook( Skeleton, lambda x, cls: Skeleton.from_dict(x, idx_to_node)) return _cattr
def test_ignore_unknown_attribs(): standard = cattr.global_converter custom = cattr.Converter() @ignore_unknown_attribs(converter=custom) @attr.s(auto_attribs=True) class Foo: bar: int nofoo = dict() foo = {"bar": 1} fooplus = {"bar": 1, "bat": 2} with pytest.raises(TypeError): standard.structure(nofoo, Foo) assert Foo(1) == standard.structure(foo, Foo) with pytest.raises(TypeError): standard.structure(fooplus, Foo) with pytest.raises(TypeError): custom.structure(nofoo, Foo) assert Foo(1) == custom.structure(foo, Foo) assert Foo(1) == custom.structure(fooplus, Foo)
def export_metadata(config: AnalysisConfiguration, bucket_name: str, project_id: str): """Export experiment metadata to GCS.""" if config.experiment.normandy_slug is None: return metadata = ExperimentMetadata.from_config(config) storage_client = storage.Client(project_id) bucket = storage_client.get_bucket(bucket_name) target_file = f"metadata_{bq_normalize_name(config.experiment.normandy_slug)}" target_path = "metadata" blob = bucket.blob(f"{target_path}/{target_file}.json") logger.info(f"Uploading {target_file} to {bucket_name}/{target_path}.") converter = cattr.Converter() _datetime_to_json: Callable[[dt.datetime], str] = lambda dt: dt.strftime("%Y-%m-%d") converter.register_unstructure_hook(dt.datetime, _datetime_to_json) blob.upload_from_string( data=json.dumps(converter.unstructure(metadata), sort_keys=True, indent=4), content_type="application/json", )
def from_doc(doc: Dict, skip_validation=False, normalise_properties=False) -> DatasetDoc: """ Parse a dictionary into an EO3 dataset. By default it will validate it against the schema, which will result in far more useful error messages if fields are missing. :param doc: A dictionary, such as is returned from yaml.load or json.load :param skip_validation: Optionally disable validation (it's faster, but I hope your doc is structured correctly) """ if not skip_validation: DATASET_SCHEMA.validate(doc) # TODO: stable cattrs (<1.0) balks at the $schema variable. doc = doc.copy() del doc["$schema"] location = doc.pop("location", None) if location: doc["locations"] = [location] c = cattr.Converter() c.register_structure_hook(uuid.UUID, _structure_as_uuid) c.register_structure_hook(BaseGeometry, _structure_as_shape) c.register_structure_hook( Eo3Dict, partial(_structure_as_stac_props, normalise_properties=normalise_properties), ) c.register_structure_hook(Affine, _structure_as_affine) c.register_unstructure_hook(Eo3Dict, _unstructure_as_stac_props) return c.structure(doc, DatasetDoc)
def from_doc(doc: Dict, skip_validation=False) -> DatasetDoc: """ Convert a document to a dataset. By default it will validate it against the schema, which will result in far more useful error messages if fields are missing. """ if not skip_validation: DATASET_SCHEMA.validate(doc) # TODO: stable cattrs (<1.0) balks at the $schema variable. doc = doc.copy() del doc["$schema"] c = cattr.Converter() c.register_structure_hook(uuid.UUID, _structure_as_uuid) c.register_structure_hook(BaseGeometry, _structure_as_shape) c.register_structure_hook(StacPropertyView, _structure_as_stac_props) c.register_structure_hook(Affine, _structure_as_affine) c.register_unstructure_hook(StacPropertyView, _unstructure_as_stac_props) return c.structure(doc, DatasetDoc)