def test_load(): s = JournaledString.from_json({ "value": "starter changed", "segments": [ { "initial": "starter ", "current": "starter ", "changed": False }, { "initial": "string", "current": "changed", "changed": True }, ], }) assert s.initial == "starter string" assert s == "starter changed" assert s.initial_offsets(0, 1) == (0, 1) assert s.initial_offsets(9, 9) == (8, 14)
def load_from_csv( csv_path: Path, D: Type[Dataclass], encoding: str = "utf-8", ) -> Iterator[Dataclass]: """ Load data from CSV file at 'csv_path', returning an iterator over objects of type 'D'. This method assumes that the CSV file was written by 'append_to_csv'. Key to this assumption is that each row of the CSV file has all of the data needed to populate an object of type 'D'. The headers in the CSV file must exactly match the property names of 'D'. There can, however, be extra columns in the CSV file that don't correspond to the dataclass. """ with open(csv_path, encoding=encoding, newline="") as csv_file: reader = csv.DictReader(csv_file, quoting=csv.QUOTE_MINIMAL) for row in reader: data: Dict[str, Any] = {} # Transfer data from the row into a dictionary of arguments. By only including the # fields for D, we skip over columns that can't be used to initialize D. At the # same time, cast each column to the intended data type. invalid = False for field in dataclasses.fields(D): try: type_ = field.type is_optional = False # If the field is optional, check for the special null value. If it's not # present, determine which primitive type the value should be cast to. See # note for List[str] for cautions about using dynamic type-checks like this # for mypy types like Optional types. if type_ in [ Optional[bool], Optional[int], Optional[float], Optional[str], ]: is_optional = True type_ = ( bool if type_ == Optional[bool] else int if type_ == Optional[int] else float if type_ == Optional[float] else str if type_ == Optional[str] else Type[Any] ) if is_optional and row[field.name] == "<!NULL!>": data[field.name] = None # Journaled strings should be loaded from JSON. elif type_ == JournaledString: data[field.name] = JournaledString.from_json( json.loads(row[field.name]) ) # Rules for reading Booleans. Support casting of '0' and '1' or the strings # 'True' and 'False'. 'True' and 'False' are the default output of CSV writer. elif type_ == bool: data[field.name] = bool(ast.literal_eval(row[field.name])) # Handle other primitive values. elif type_ in [int, float, str]: data[field.name] = type_(row[field.name]) # XXX(andrewhead): It's not guaranteed that type-checks like this one will work # as the 'typing' library evolves. At the time of writing, it looked like calls # to the '__eq__' method of classes that extend GenericMeta (like List, Tuple) # should work (i.e., comparing a type with '=='). See: # https://github.com/python/typing/blob/c85016137eab6d0784b76252460235638087f468/src/typing.py#L1093-L1098 # See also this test for equality in the Tuple class. # https://github.com/python/typing/blob/c85016137eab6d0784b76252460235638087f468/src/test_typing.py#L400 # If at some point this comparison stops working, perhaps we can define a custom # type for types of interest (like StrList) and compare the ID of the newly defined type. elif field.type == List[str]: data[field.name] = ast.literal_eval(row[field.name]) else: logging.warning( # pylint: disable=logging-not-lazy "Could not decode data for field %s of type %s . " + "This may mean that the rules for reading CSV files need to " + "be extended to support this data type.", field.name, field.type, ) except (ValueError, json.JSONDecodeError) as e: logging.warning( # pylint: disable=logging-not-lazy "Could not read value '%s' for field '%s' of expected type %s from CSV. " + "Error: %s. This row will be skipped. This value probably had an " + "invalid type when the data for the row was created.", row[field.name], field.name, field.type, e, ) invalid = True if not invalid: yield D(**data) # type: ignore