Exemple #1
0
def is_strict_schema_match(schema1: Schema, schema2: Schema) -> bool:
    """
    Schemas match strictly if all fields match strictly one-to-one
    """
    if set(schema1.field_names()) != set(schema2.field_names()):
        return False
    for f1 in schema1.fields:
        f2 = schema2.get_field(f1.name)
        if not is_strict_field_match(f1, f2):
            return False
    return True
Exemple #2
0
def update_matching_field_definitions(env: Environment, schema: Schema,
                                      update_with_schema: Schema) -> Schema:
    fields = []
    modified = False
    for f in schema.fields:
        new_f = f
        try:
            new_f = update_with_schema.get_field(f.name)
            modified = True
        except NameError:
            pass
        fields.append(new_f)
    if not modified:
        return schema
    schema_dict = asdict(schema)
    schema_dict["name"] = f"{schema.name}_with_{update_with_schema.name}"
    schema_dict["fields"] = fields
    updated = Schema.from_dict(schema_dict)
    env.add_new_generated_schema(updated)
    return updated
Exemple #3
0
def check_casts(
    from_schema: Schema,
    to_schema: Schema,
    cast_level: CastToSchemaLevel = CastToSchemaLevel.SOFT,
    warn_on_downcast=True,
    fail_on_downcast=False,
):
    """
    TODO: This is ok, but not really matching the reality of casting.
    Casting is really a separate ETL step: it's runtime / storage
    specific, has many different plausible approaches, and is too
    complex to be managed at one level with no context (like we don't
    even know what runtime we are on here)
    Best option for now is to give the user information on what is happening
    (via warns and logging) but not actually block anything --
    let errors arise naturally as they will, otherwise "play on".
    """
    for f in from_schema.fields:
        try:
            new_f = to_schema.get_field(f.name)
            if not is_strict_field_match(f, new_f):
                # if cast_level == CastToSchemaLevel.HARD:
                #     raise SchemaTypeError(
                #         f"Cannot cast (Cast level == HARD): {f.field_type} to {new_f.field_type} "
                #     )
                # if not f.field_type.is_castable_to_type(new_f.field_type):
                #     raise SchemaTypeError(
                #         f"Cannot cast: {f.field_type} to {new_f.field_type}"
                #     )
                if warn_on_downcast:
                    warnings.warn(
                        f"Downcasting field '{f.name}': {f.field_type} to {new_f.field_type}"
                    )
                if fail_on_downcast:
                    raise SchemaTypeError(
                        f"Cannot cast (FAIL_ON_DOWNCAST=True) '{f.name}': {f.field_type} to {new_f.field_type} "
                    )
        except NameError:
            pass
Exemple #4
0
 def create_empty(self, name, storage, schema: Schema):
     s = ",".join(schema.field_names()) + "\n"
     storage.get_api().put(name, (ln for ln in [s]))
Exemple #5
0
def has_subset_fields(sub: Schema, supr: Schema) -> bool:
    return set(sub.field_names()) <= set(supr.field_names())
Exemple #6
0
 def create_empty(self, name, storage, schema: Schema):
     # Not sure you'd really ever want to do this?
     with storage.get_api().open(name, "w") as f:
         f.write(",".join(schema.field_names()) + "\n")