예제 #1
0
def test_load():
    s = JournaledString.from_json({
        "value":
        "starter changed",
        "segments": [
            {
                "initial": "starter ",
                "current": "starter ",
                "changed": False
            },
            {
                "initial": "string",
                "current": "changed",
                "changed": True
            },
        ],
    })
    assert s.initial == "starter string"
    assert s == "starter changed"
    assert s.initial_offsets(0, 1) == (0, 1)
    assert s.initial_offsets(9, 9) == (8, 14)
예제 #2
0
def load_from_csv(
    csv_path: Path, D: Type[Dataclass], encoding: str = "utf-8",
) -> Iterator[Dataclass]:
    """
    Load data from CSV file at 'csv_path', returning an iterator over objects of type 'D'.
    This method assumes that the CSV file was written by 'append_to_csv'. Key to this assumption is
    that each row of the CSV file has all of the data needed to populate an object of type 'D'. The
    headers in the CSV file must exactly match the property names of 'D'. There can, however,
    be extra columns in the CSV file that don't correspond to the dataclass.
    """
    with open(csv_path, encoding=encoding, newline="") as csv_file:
        reader = csv.DictReader(csv_file, quoting=csv.QUOTE_MINIMAL)
        for row in reader:
            data: Dict[str, Any] = {}
            # Transfer data from the row into a dictionary of arguments. By only including the
            # fields for D, we skip over columns that can't be used to initialize D. At the
            # same time, cast each column to the intended data type.
            invalid = False
            for field in dataclasses.fields(D):

                try:
                    type_ = field.type
                    is_optional = False

                    # If the field is optional, check for the special null value. If it's not
                    # present, determine which primitive type the value should be cast to. See
                    # note for List[str] for cautions about using dynamic type-checks like this
                    # for mypy types like Optional types.
                    if type_ in [
                        Optional[bool],
                        Optional[int],
                        Optional[float],
                        Optional[str],
                    ]:
                        is_optional = True
                        type_ = (
                            bool
                            if type_ == Optional[bool]
                            else int
                            if type_ == Optional[int]
                            else float
                            if type_ == Optional[float]
                            else str
                            if type_ == Optional[str]
                            else Type[Any]
                        )
                    if is_optional and row[field.name] == "<!NULL!>":
                        data[field.name] = None

                    # Journaled strings should be loaded from JSON.
                    elif type_ == JournaledString:
                        data[field.name] = JournaledString.from_json(
                            json.loads(row[field.name])
                        )
                    # Rules for reading Booleans. Support casting of '0' and '1' or the strings
                    # 'True' and 'False'. 'True' and 'False' are the default output of CSV writer.
                    elif type_ == bool:
                        data[field.name] = bool(ast.literal_eval(row[field.name]))
                    # Handle other primitive values.
                    elif type_ in [int, float, str]:
                        data[field.name] = type_(row[field.name])
                    # XXX(andrewhead): It's not guaranteed that type-checks like this one will work
                    # as the 'typing' library evolves. At the time of writing, it looked like calls
                    # to the '__eq__' method of classes that extend GenericMeta (like List, Tuple)
                    # should work (i.e., comparing a type with '=='). See:
                    # https://github.com/python/typing/blob/c85016137eab6d0784b76252460235638087f468/src/typing.py#L1093-L1098
                    # See also this test for equality in the Tuple class.
                    # https://github.com/python/typing/blob/c85016137eab6d0784b76252460235638087f468/src/test_typing.py#L400
                    # If at some point this comparison stops working, perhaps we can define a custom
                    # type for types of interest (like StrList) and compare the ID of the newly defined type.
                    elif field.type == List[str]:
                        data[field.name] = ast.literal_eval(row[field.name])
                    else:
                        logging.warning(  # pylint: disable=logging-not-lazy
                            "Could not decode data for field %s of type %s . "
                            + "This may mean that the rules for reading CSV files need to "
                            + "be extended to support this data type.",
                            field.name,
                            field.type,
                        )
                except (ValueError, json.JSONDecodeError) as e:
                    logging.warning(  # pylint: disable=logging-not-lazy
                        "Could not read value '%s' for field '%s' of expected type %s from CSV. "
                        + "Error: %s. This row will be skipped. This value probably had an "
                        + "invalid type when the data for the row was created.",
                        row[field.name],
                        field.name,
                        field.type,
                        e,
                    )
                    invalid = True

            if not invalid:
                yield D(**data)  # type: ignore