Beispiel #1
0
def dict_to_type(o, expected_type):
    """
    Try to parse the given dict (from json/etc) into the given NamedTuple.

    TODO: Doesn't handle indirectly embedded NamedTuples, such as within a list, or Optionals/Unions/etc
    (It's currently only used for simple cases, not complex hierarchies)

    >>>
    >>> Status.ACTIVE == dict_to_type('active', expected_type=Status)
    True
    >>> dict_to_type('eating', expected_type=Status)
    Traceback (most recent call last):
    ...
    ValueError: 'eating' is not a valid Status
    >>> # More tests in test_serialise.py
    """
    if o is None:
        return None

    c = cattr.Converter()
    c.register_structure_hook(uuid.UUID, _structure_as_uuid)
    c.register_structure_hook(datetime.datetime, _structure_as_datetime)
    c.register_structure_hook(pathlib.Path, _structure_as_pathlib)

    # Needed for cattrs 1.0.0: our properties are a raw dict, so do nothing to them.
    c.register_structure_hook(dict, _passthrough)
    return c.structure(o, expected_type)
Beispiel #2
0
 def from_dict(cls, d) -> "ExperimentV6":
     converter = cattr.Converter()
     converter.register_structure_hook(
         datetime.datetime,
         lambda num, _: datetime.datetime.fromisoformat(
             num.replace("Z", "+00:00")),
     )
     return converter.structure(d, cls)
Beispiel #3
0
def attr_from_json_dict(attr_dict: Dict[str, Any]) -> attr.Attribute:
    """Converts a JSON dict created by |attr_to_json_dict| above into the attr
    object it was originally created from."""
    module = importlib.import_module(attr_dict.pop("__module__"))
    cls = getattr(module, attr_dict.pop("__classname__"))

    converter = with_datetime_hooks(cattr.Converter())
    return converter.structure(attr_dict, cls)
Beispiel #4
0
 def _unstructure(self) -> typing.Any:
     conv = cattr.Converter(  # type: ignore
         unstruct_strat=cattr.UnstructureStrategy.AS_DICT)
     conv.register_unstructure_hook(Decimal, lambda d: str(d))
     conv.register_unstructure_hook(datetime, lambda d: d.isoformat())
     conv.register_unstructure_hook(UUID, lambda d: str(d))
     # might need a date to datetime conversion hook
     return conv.unstructure(self)
 def from_experimenter(cls, session: requests.Session = None) -> "ExperimentCollection":
     session = session or requests.Session()
     experiments = session.get(cls.EXPERIMENTER_API_URL).json()
     converter = cattr.Converter()
     converter.register_structure_hook(
         dt.datetime, lambda num, _: cls._unix_millis_to_datetime(num),
     )
     return cls([converter.structure(experiment, Experiment) for experiment in experiments])
Beispiel #6
0
 def from_dict(cls, d) -> "ExperimentV4":
     converter = cattr.Converter()
     converter.register_structure_hook(
         datetime.datetime,
         lambda num, _: pytz.utc.localize(
             datetime.datetime.strptime(num, "%Y-%m-%d")),
     )
     return converter.structure(d, cls)
Beispiel #7
0
 def _structure(self,
                json_input: typing.Iterable[typing.Any]) -> typing.Any:
     conv = cattr.Converter(  # type: ignore
         unstruct_strat=cattr.UnstructureStrategy.AS_DICT)
     conv.register_structure_hook(Decimal, lambda d, t: Decimal(d))
     conv.register_structure_hook(datetime, lambda dt, t: parser.parse(dt))
     conv.register_structure_hook(date, lambda dt, t: parser.parse(dt))
     conv.register_structure_hook(UUID, lambda d, t: UUID(d))
     return conv.structure(json_input, type(self))
    def from_dict(cls, d: dict):
        """ Deserialize from a nested dict `d` """
        converter = cattr.Converter()

        disambiguators = cls._get_all_disambiguators()
        for union_type, func in disambiguators.items():
            converter.register_structure_hook(
                union_type,
                lambda o, t, hook=func: converter.structure(o, hook(o, t)))
        return converter.structure(d, cls)
Beispiel #9
0
def attr_to_json_dict(attr_obj: attr.Attribute) -> Dict[str, Any]:
    """Converts an attr-defined object to a JSON dict. The resulting dict should
    be unstructured using |attr_from_json_dict| below, which uses the __module__
    and __classname__ fields to reconstruct the object using the same Converter
    used to unstructure it here."""
    converter = with_datetime_hooks(cattr.Converter())
    attr_dict = converter.unstructure(attr_obj)
    attr_dict["__classname__"] = attr_obj.__class__.__name__
    attr_dict["__module__"] = attr_obj.__module__
    return attr_dict
Beispiel #10
0
 def _get_converter(self):
     if not hasattr(self, "_converter"):
         converter = cattr.Converter()
         converter.register_unstructure_hook(
             datetime.datetime, hooks.datetime_unstructure_hook
         )
         uh = functools.partial(hooks.unstructure_hook, converter)
         converter.register_unstructure_hook(Model, uh)  # type: ignore
         self._converter = converter
     return self._converter
 def to_serializable(self) -> Dict[str, Any]:
     converter = serialization.with_datetime_hooks(cattr.Converter())
     unstructured: Dict[str, Any] = converter.unstructure(self)
     # The structure of result_details depends on the actual type of check. It is
     # stored as a JSON string so that all the fields can be stored and extracted in
     # the query.
     # TODO(#7544): Make result_details responsible for constraining its fields when
     # serializing so they don't scale linearly.
     if details := unstructured["result_details"]:
         unstructured["result_details"] = json.dumps(details)
Beispiel #12
0
def attr_from_json_dict(attr_dict: Dict[str, Any]) -> attr.Attribute:
    """Converts a JSON dict created by |attr_to_json_dict| above into the attr
    object it was originally created from."""
    module = importlib.import_module(attr_dict.pop('__module__'))
    cls = getattr(module, attr_dict.pop('__classname__'))

    converter = cattr.Converter()
    converter.register_structure_hook(
        datetime.datetime,
        lambda date_str, _: serializable_to_datetime(date_str))
    return converter.structure(attr_dict, cls)
Beispiel #13
0
 def from_dict(cls, dict_data: dict) -> "Stylespace":
     """Construct Stylespace from unstructured dict data."""
     converter = cattr.Converter()
     converter.register_structure_hook(
         FlagList,
         lambda list_of_str_flags, cls: cls(
             [getattr(AxisValueFlag, f) for f in list_of_str_flags]),
     )
     converter.register_structure_hook(
         NameRecord, lambda data, cls: cls.structure(data))
     return converter.structure(dict_data, cls)
Beispiel #14
0
def attr_to_json_dict(attr_obj: attr.Attribute) -> Dict[str, Any]:
    """Converts an attr-defined object to a JSON dict. The resulting dict should
    be unstructured using |attr_from_json_dict| below, which uses the __module__
    and __classname__ fields to reconstruct the object using the same Converter
    used to unstructure it here."""
    converter = cattr.Converter()
    converter.register_unstructure_hook(
        datetime.datetime, lambda d, _: datetime_to_serializable(d))
    attr_dict = cattr.unstructure(attr_obj)
    attr_dict['__classname__'] = attr_obj.__class__.__name__
    attr_dict['__module__'] = attr_obj.__module__
    return attr_dict
Beispiel #15
0
def _setup_converter() -> cattr.Converter:
    result = cattr.Converter()
    result.register_structure_hook(uuid.UUID, lambda d, _: uuid.UUID(d))
    result.register_unstructure_hook(uuid.UUID, str)
    result.register_structure_hook(datetime.datetime,
                                   lambda d, _: dateutil.parser.parse(d))
    result.register_unstructure_hook(
        datetime.datetime,
        lambda obj: obj.replace(tzinfo=datetime.timezone.utc).astimezone().
        replace(microsecond=0).isoformat(),
    )
    return result
Beispiel #16
0
    def from_dict(cls, d: Mapping[str, Any]):
        converter = cattr.Converter()

        def discriminate_telemetry(d, type):
            kind = d.pop("kind")
            for klass in type.__args__:
                if kind == klass.kind:
                    return klass(**d)
            raise ValueError(f"Could not discriminate telemetry kind {kind}")

        converter.register_structure_hook(FeatureTelemetryType, discriminate_telemetry)
        return converter.structure(d, cls)
Beispiel #17
0
 def from_bytes(cls, stylespace_content: bytes):
     stylespace_content_parsed = fontTools.misc.plistlib.loads(stylespace_content)
     converter = cattr.Converter()
     converter.register_structure_hook(
         FlagList,
         lambda list_of_str_flags, cls: cls(
             [getattr(AxisValueFlag, f) for f in list_of_str_flags]
         ),
     )
     converter.register_structure_hook(
         NameRecord, lambda data, cls: cls.structure(data)
     )
     stylespace = converter.structure(stylespace_content_parsed, cls)
     return stylespace
Beispiel #18
0
    def of_query(cls, query_file, metadata=None, dag_collection=None):
        """
        Create task that schedules the corresponding query in Airflow.

        Raises FileNotFoundError if not metadata file exists for query.
        If `metadata` is set, then it is used instead of the metadata.yaml
        file that might exist alongside the query file.
        """
        converter = cattr.Converter()
        if metadata is None:
            metadata = Metadata.of_sql_file(query_file)

        dag_name = metadata.scheduling.get("dag_name")
        if dag_name is None:
            raise UnscheduledTask(
                f"Metadata for {query_file} does not contain scheduling information."
            )

        task_config = {"query_file": str(query_file)}
        task_config.update(metadata.scheduling)

        if len(metadata.owners) <= 0:
            raise TaskParseException(
                f"No owner specified in metadata for {query_file}."
            )

        # Airflow only allows to set one owner, so we just take the first
        task_config["owner"] = metadata.owners[0]

        # Get default email from default_args if available
        default_email = []
        if dag_collection is not None:
            dag = dag_collection.dag_by_name(dag_name)
            if dag is not None:
                default_email = dag.default_args.email
        email = task_config.get("email", default_email)
        # owners get added to the email list
        task_config["email"] = list(set(email + metadata.owners))

        # data processed in task should be published
        if metadata.is_public_json():
            task_config["public_json"] = True

        try:
            return converter.structure(task_config, cls)
        except TypeError as e:
            raise TaskParseException(
                f"Invalid scheduling information format for {query_file}: {e}"
            )
Beispiel #19
0
def main():
    args = parser.parse_args()
    experiments = get_experiments()

    destination_table = (
        f"{args.project}.{args.destination_dataset}.{args.destination_table}")

    bq_schema = (
        bigquery.SchemaField("experimenter_slug", "STRING"),
        bigquery.SchemaField("normandy_slug", "STRING"),
        bigquery.SchemaField("type", "STRING"),
        bigquery.SchemaField("status", "STRING"),
        bigquery.SchemaField("start_date", "DATE"),
        bigquery.SchemaField("end_date", "DATE"),
        bigquery.SchemaField("proposed_enrollment", "INTEGER"),
        bigquery.SchemaField("reference_branch", "STRING"),
        bigquery.SchemaField("is_high_population", "BOOL"),
        bigquery.SchemaField(
            "branches",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("slug", "STRING"),
                bigquery.SchemaField("ratio", "INTEGER"),
            ],
        ),
        bigquery.SchemaField("app_id", "STRING"),
        bigquery.SchemaField("app_name", "STRING"),
        bigquery.SchemaField("channel", "STRING"),
    )

    job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")
    job_config.schema = bq_schema

    converter = cattr.Converter()
    converter.register_unstructure_hook(
        datetime.datetime,
        lambda d: datetime.datetime.strftime(d, format="%Y-%m-%d"))

    blob = converter.unstructure(experiments)
    if args.dry_run:
        print(json.dumps(blob))
        sys.exit(0)

    client = bigquery.Client(args.project)
    client.load_table_from_json(blob, destination_table,
                                job_config=job_config).result()
    print(f"Loaded {len(experiments)} experiments")
    def from_file(cls, metadata_file):
        """Parse metadata from the provided file and create a new Metadata instance."""
        friendly_name = None
        description = None
        owners = []
        labels = {}
        scheduling = {}
        bigquery = None

        with open(metadata_file, "r") as yaml_stream:
            try:
                metadata = yaml.safe_load(yaml_stream)

                friendly_name = metadata.get("friendly_name", None)
                description = metadata.get("description", None)

                if "labels" in metadata:
                    labels = {}

                    for key, label in metadata["labels"].items():
                        if isinstance(label, bool):
                            # publish key-value pair with bool value as tag
                            if label:
                                labels[str(key)] = ""
                        elif isinstance(label, list):
                            labels[str(key)] = list(map(str, label))
                        else:
                            # all other pairs get published as key-value pair label
                            labels[str(key)] = str(label)

                if "scheduling" in metadata:
                    scheduling = metadata["scheduling"]

                if "bigquery" in metadata and metadata["bigquery"]:
                    converter = cattr.Converter()
                    bigquery = converter.structure(metadata["bigquery"],
                                                   BigQueryMetadata)

                if "owners" in metadata:
                    owners = metadata["owners"]

                return cls(friendly_name, description, owners, labels,
                           scheduling, bigquery)
            except yaml.YAMLError as e:
                raise e
Beispiel #21
0
    def add_export_task(self, task):
        """Add a new task to the DAG for exporting data of the original query to GCS."""
        if not task.public_json:
            logging.warn(f"Task {task.task_name} not marked as public JSON.")
            return

        converter = cattr.Converter()
        task_dict = converter.unstructure(task)

        del task_dict["dataset"]
        del task_dict["table"]
        del task_dict["version"]

        export_task = converter.structure(task_dict, Task)
        export_task.dag_name = self.name
        export_task.task_name = f"export_public_data_json_{export_task.task_name}"
        export_task.dependencies = [task]

        self.add_tasks([export_task])
Beispiel #22
0
    def configure(cls,
                  filename: str = "looker.ini",
                  section: Optional[str] = None) -> PApiSettings:
        """Configure using a config file and/or environment variables.

        Environment variables will override config file settings. Neither
        is necessary but some combination must supply the minimum to
        instantiate ApiSettings.

        ENV variables map like this:
            <package-prefix>_API_VERSION -> api_version
            <package-prefix>_BASE_URL -> base_url
            <package-prefix>_VERIFY_SSL -> verify_ssl
        """
        api_settings = cls(filename=filename, section=section)
        config_data = api_settings.read_config()
        converter = cattr.Converter()
        converter.register_structure_hook(bool, _convert_bool)
        settings = converter.structure(config_data, ApiSettings)
        return settings
Beispiel #23
0
class StatisticResultCollection:
    """
    Represents a set of statistics result data.
    """

    data: List[StatisticResult] = attr.Factory(list)

    converter = cattr.Converter()
    converter.register_unstructure_hook(Decimal, lambda x: str(round(x, 6).normalize()))
    converter.register_unstructure_hook(float, lambda x: x if math.isfinite(x) else None)

    def to_dict(self) -> Dict[str, Any]:
        """Return statistic results as dict."""
        return self.converter.unstructure(self)

    def set_segment(self, segment: str) -> "StatisticResultCollection":
        """Sets the `segment` field in-place on all children."""
        for result in self.data:
            result.segment = segment
        return self
    def write(self, file):
        """Write dataset metadata information to the provided file."""
        metadata_dict = self.__dict__

        if metadata_dict["labels"]:
            for label_key, label_value in metadata_dict["labels"].items():
                # handle tags
                if label_value == "":
                    metadata_dict["labels"][label_key] = True

        if "description" in metadata_dict:
            metadata_dict["description"] = Literal(
                metadata_dict["description"])

        converter = cattr.Converter()
        file.write_text(
            yaml.dump(
                converter.unstructure(metadata_dict),
                default_flow_style=False,
                sort_keys=False,
            ))
Beispiel #25
0
    def configure(cls,
                  filename: str = "looker.ini",
                  section: Optional[str] = None) -> "ApiSettings":
        """Configure using a config file and/or environment variables.

        Environment variables will override config file settings. Neither
        is necessary but some combination must supply the minimum to
        instantiate ApiSettings.

        ENV variables map like this:
            <package-prefix>_API_VERSION -> api_version
            <package-prefix>_BASE_URL -> base_url
            <package-prefix>_VERIFY_SSL -> verify_ssl
        """

        config_data = cls.read_ini(filename, section)

        env_api_version = cast(
            str, os.getenv(f"{constants.environment_prefix}_API_VERSION"))
        if env_api_version:
            config_data["api_version"] = env_api_version

        env_base_url = cast(
            str, os.getenv(f"{constants.environment_prefix}_BASE_URL"))
        if env_base_url:
            config_data["base_url"] = env_base_url

        env_verify_ssl = cast(
            str, os.getenv(f"{constants.environment_prefix}_VERIFY_SSL"))
        if env_verify_ssl:
            config_data["verify_ssl"] = env_verify_ssl

        if not config_data.get("base_url"):
            raise error.SDKError(f"Required parameter base_url not found.")

        converter = cattr.Converter()
        converter.register_structure_hook(bool, _convert_bool)
        settings: ApiSettings = converter.structure(config_data, cls)
        return settings
Beispiel #26
0
    def make_cattr(idx_to_node: Dict[int, Node] = None) -> cattr.Converter:
        """Make cattr.Convert() for `Skeleton`.

        Make a cattr.Converter() that registers structure/unstructure
        hooks for Skeleton objects to handle serialization of skeletons.

        Args:
            idx_to_node: A dict that maps node index to Node objects.

        Returns:
            A cattr.Converter() instance for skeleton serialization
            and deserialization.
        """
        node_to_idx = ({node: idx
                        for idx, node in idx_to_node.items()}
                       if idx_to_node is not None else None)

        _cattr = cattr.Converter()
        _cattr.register_unstructure_hook(
            Skeleton, lambda x: Skeleton.to_dict(x, node_to_idx))
        _cattr.register_structure_hook(
            Skeleton, lambda x, cls: Skeleton.from_dict(x, idx_to_node))
        return _cattr
Beispiel #27
0
def test_ignore_unknown_attribs():
    standard = cattr.global_converter
    custom = cattr.Converter()

    @ignore_unknown_attribs(converter=custom)
    @attr.s(auto_attribs=True)
    class Foo:
        bar: int

    nofoo = dict()
    foo = {"bar": 1}
    fooplus = {"bar": 1, "bat": 2}

    with pytest.raises(TypeError):
        standard.structure(nofoo, Foo)
    assert Foo(1) == standard.structure(foo, Foo)
    with pytest.raises(TypeError):
        standard.structure(fooplus, Foo)

    with pytest.raises(TypeError):
        custom.structure(nofoo, Foo)
    assert Foo(1) == custom.structure(foo, Foo)
    assert Foo(1) == custom.structure(fooplus, Foo)
Beispiel #28
0
def export_metadata(config: AnalysisConfiguration, bucket_name: str, project_id: str):
    """Export experiment metadata to GCS."""
    if config.experiment.normandy_slug is None:
        return

    metadata = ExperimentMetadata.from_config(config)

    storage_client = storage.Client(project_id)
    bucket = storage_client.get_bucket(bucket_name)
    target_file = f"metadata_{bq_normalize_name(config.experiment.normandy_slug)}"
    target_path = "metadata"
    blob = bucket.blob(f"{target_path}/{target_file}.json")

    logger.info(f"Uploading {target_file} to {bucket_name}/{target_path}.")

    converter = cattr.Converter()
    _datetime_to_json: Callable[[dt.datetime], str] = lambda dt: dt.strftime("%Y-%m-%d")
    converter.register_unstructure_hook(dt.datetime, _datetime_to_json)

    blob.upload_from_string(
        data=json.dumps(converter.unstructure(metadata), sort_keys=True, indent=4),
        content_type="application/json",
    )
Beispiel #29
0
def from_doc(doc: Dict,
             skip_validation=False,
             normalise_properties=False) -> DatasetDoc:
    """
    Parse a dictionary into an EO3 dataset.

    By default it will validate it against the schema, which will result in far more
    useful error messages if fields are missing.

    :param doc: A dictionary, such as is returned from yaml.load or json.load
    :param skip_validation: Optionally disable validation (it's faster, but I hope your
            doc is structured correctly)
    """

    if not skip_validation:
        DATASET_SCHEMA.validate(doc)

    # TODO: stable cattrs (<1.0) balks at the $schema variable.
    doc = doc.copy()
    del doc["$schema"]
    location = doc.pop("location", None)
    if location:
        doc["locations"] = [location]

    c = cattr.Converter()
    c.register_structure_hook(uuid.UUID, _structure_as_uuid)
    c.register_structure_hook(BaseGeometry, _structure_as_shape)
    c.register_structure_hook(
        Eo3Dict,
        partial(_structure_as_stac_props,
                normalise_properties=normalise_properties),
    )

    c.register_structure_hook(Affine, _structure_as_affine)

    c.register_unstructure_hook(Eo3Dict, _unstructure_as_stac_props)
    return c.structure(doc, DatasetDoc)
Beispiel #30
0
def from_doc(doc: Dict, skip_validation=False) -> DatasetDoc:
    """
    Convert a document to a dataset.

    By default it will validate it against the schema, which will result in far more
    useful error messages if fields are missing.
    """

    if not skip_validation:
        DATASET_SCHEMA.validate(doc)

    # TODO: stable cattrs (<1.0) balks at the $schema variable.
    doc = doc.copy()
    del doc["$schema"]

    c = cattr.Converter()
    c.register_structure_hook(uuid.UUID, _structure_as_uuid)
    c.register_structure_hook(BaseGeometry, _structure_as_shape)
    c.register_structure_hook(StacPropertyView, _structure_as_stac_props)

    c.register_structure_hook(Affine, _structure_as_affine)

    c.register_unstructure_hook(StacPropertyView, _unstructure_as_stac_props)
    return c.structure(doc, DatasetDoc)