def _constructor_from_field_type(cls, field_type): if hasattr(field_type, 'from_dict'): return field_type.from_dict elif is_dataclass(field_type): return lambda data: field_type(**data) else: return field_type
if field_converter: return field_converter(value_from_dict) if type_is_list_with_item_type(field_type, dc_field): return [_convert_value_for_dataclass(item, list_item_type=field_type.__args__[0]) for item in value_from_dict] elif field_type is list: raise UnspecificListFieldError(dataclass_field=dc_field) if isinstance(value_from_dict, field_type): return value_from_dict if (default_converter_value := _use_default_converter(dc_field, field_type, value_from_dict)) is not None: return default_converter_value if is_dataclass(field_type): return dataclass_from_dict(field_type, value_from_dict) raise DictValueConversionError(dc_field, value_from_dict) def _to_camel_case(snake_str: str): """Converts the given snake_case string to camelCase""" components = snake_str.split("_") return components[0] + "".join(x.title() for x in components[1:]) def _get_value_from_dict(dc_field: Field, origin_dict: dict): # Use the dict_key in the field metadata if one was provided if (dict_key := dc_field.metadata.get("dict_key")) is not None: try:
def check_widget_dataclass(self, obj): assert dataclasses.is_dataclass(obj) (name, ) = dataclasses.fields(obj) eq_(name.name, "name")
def json_schema(cls, embeddable: bool = False, schema_type: SchemaType = DEFAULT_SCHEMA_TYPE, validate_enums: bool = True, **kwargs) -> JsonDict: """Returns the JSON schema for the dataclass, along with the schema of any nested dataclasses within the 'definitions' field. Enable the embeddable flag to generate the schema in a format for embedding into other schemas or documents supporting JSON schema such as Swagger specs. If embedding the schema into a swagger api, specify 'swagger_version' to generate a spec compatible with that version. """ if 'swagger_version' in kwargs and kwargs[ 'swagger_version'] is not None: schema_type = kwargs['swagger_version'] schema_options = SchemaOptions(schema_type, validate_enums) if schema_options.schema_type in ( SchemaType.SWAGGER_V3, SchemaType.SWAGGER_V2) and not embeddable: schema_options = SchemaOptions(SchemaType.DRAFT_06, validate_enums) warnings.warn( "'Swagger schema types unsupported when 'embeddable=False', using 'SchemaType.DRAFT_06'" ) if cls is JsonSchemaMixin: warnings.warn( "Calling 'JsonSchemaMixin.json_schema' is deprecated. Use 'JsonSchemaMixin.all_json_schemas' instead", DeprecationWarning) return cls.all_json_schemas(schema_options.schema_type, validate_enums) definitions: JsonDict = {} if schema_options not in cls.__definitions: cls.__definitions[schema_options] = definitions else: definitions = cls.__definitions[schema_options] if cls.__schema is not None and schema_options in cls.__schema: schema = cls.__schema[schema_options] else: properties = {} required = [] for f in cls._get_fields(base_fields=False): properties[f.mapped_name], is_required = cls._get_field_schema( f.field, schema_options) if f.is_property: properties[f.mapped_name]["readOnly"] = True cls._get_field_definitions(f.field.type, definitions, schema_options) # Only add 'readOnly' properties to required for OpenAPI 3 if is_required and (not f.is_property or schema_options.schema_type == SchemaType.OPENAPI_3): required.append(f.mapped_name) schema = { 'type': 'object', 'required': required, 'properties': properties } if not cls.__allow_additional_props: schema["additionalProperties"] = False if cls.__discriminator_name is not None and \ schema_options.schema_type == SchemaType.OPENAPI_3 and \ not cls.__discriminator_inherited: schema['discriminator'] = { 'propertyName': cls.__discriminator_name } properties[cls.__discriminator_name] = {"type": "string"} required.append(cls.__discriminator_name) # Needed for Draft 04 backwards compatibility if len(required) == 0: del schema["required"] dataclass_bases = [ klass for klass in cls.__bases__ if is_dataclass(klass) and issubclass(klass, JsonSchemaMixin) ] if len(dataclass_bases) > 0: schema = { "allOf": [ schema_reference(schema_options.schema_type, base.__name__) for base in dataclass_bases ] + [schema] } for base in dataclass_bases: definitions.update( base.json_schema( embeddable=True, schema_type=schema_options.schema_type, validate_enums=schema_options.validate_enums)) if cls.__doc__: schema['description'] = cls.__doc__ cls.__schema[schema_options] = schema if embeddable: return {**definitions, cls.__name__: schema} else: schema_uri = 'http://json-schema.org/draft-06/schema#' if schema_options.schema_type == SchemaType.DRAFT_04: schema_uri = 'http://json-shema.org/draft-04/schema#' full_schema = {**schema, **{'$schema': schema_uri}} if len(definitions) > 0: full_schema['definitions'] = definitions return full_schema
def _is_dataclass_instance(obj): """check whether a class is an instance of a dataclass (and not a dataclass itself)""" return is_dataclass(obj) and not isinstance(obj, type)
def normalize_dict(ordered_dict): """ Main conversion function for the output of xmltodict to the OpenLEADR representation of OpenADR contents. :param ordered_dict dict: The OrderedDict, dict or dataclass that you wish to convert. """ if is_dataclass(ordered_dict): ordered_dict = asdict(ordered_dict) def normalize_key(key): if key.startswith('oadr'): key = key[4:] elif key.startswith('ei'): key = key[2:] # Don't normalize the measurement descriptions if key in enums._MEASUREMENT_NAMESPACES: return key key = re.sub(r'([a-z])([A-Z])', r'\1_\2', key) if '-' in key: key = key.replace('-', '_') return key.lower() d = {} for key, value in ordered_dict.items(): # Interpret values from the dict if key.startswith("@"): continue key = normalize_key(key) if isinstance(value, (OrderedDict, dict)): d[key] = normalize_dict(value) elif isinstance(value, list): d[key] = [] for item in value: if isinstance(item, (OrderedDict, dict)): dict_item = normalize_dict(item) d[key].append(normalize_dict(dict_item)) else: d[key].append(item) elif key in ("duration", "startafter", "max_period", "min_period"): d[key] = parse_duration(value) elif ("date_time" in key or key == "dtstart") and isinstance( value, str): d[key] = parse_datetime(value) elif value in ('true', 'false'): d[key] = parse_boolean(value) elif isinstance(value, str): if re.match(r'^-?\d+$', value): d[key] = int(value) elif re.match(r'^-?[\d.]+$', value): d[key] = float(value) else: d[key] = value else: d[key] = value # Do our best to make the dictionary structure as pythonic as possible if key.startswith("x_ei_"): d[key[5:]] = d.pop(key) key = key[5:] # Group all targets as a list of dicts under the key "target" if key == 'target': targets = d.pop(key) new_targets = [] if targets: for ikey in targets: if isinstance(targets[ikey], list): new_targets.extend([{ ikey: value } for value in targets[ikey]]) else: new_targets.append({ikey: targets[ikey]}) d[key + "s"] = new_targets key = key + "s" # Also add a targets_by_type element to this dict # to access the targets in a more convenient way. d['targets_by_type'] = group_targets_by_type(new_targets) # Group all reports as a list of dicts under the key "pending_reports" if key == "pending_reports": if isinstance(d[key], dict) and 'report_request_id' in d[key] \ and isinstance(d[key]['report_request_id'], list): d['pending_reports'] = [{ 'report_request_id': rrid } for rrid in d['pending_reports']['report_request_id']] # Group all events al a list of dicts under the key "events" elif key == "event" and isinstance(d[key], list): events = d.pop("event") new_events = [] for event in events: new_event = event['event'] new_event['response_required'] = event['response_required'] new_events.append(new_event) d["events"] = new_events # If there's only one event, also put it into a list elif key == "event" and isinstance(d[key], dict) and "event" in d[key]: oadr_event = d.pop('event') ei_event = oadr_event['event'] ei_event['response_required'] = oadr_event['response_required'] d['events'] = [ei_event] elif key in ("request_event", "created_event") and isinstance( d[key], dict): d = d[key] # Plurarize some lists elif key in ('report_request', 'report', 'specifier_payload'): if isinstance(d[key], list): d[key + 's'] = d.pop(key) else: d[key + 's'] = [d.pop(key)] elif key in ('report_description', 'event_signal'): descriptions = d.pop(key) if not isinstance(descriptions, list): descriptions = [descriptions] for description in descriptions: # We want to make the identification of the measurement universal for measurement in enums._MEASUREMENT_NAMESPACES: if measurement in description: name, item = measurement, description.pop(measurement) break else: break item['description'] = item.pop('item_description', None) item['unit'] = item.pop('item_units', None) if 'si_scale_code' in item: item['scale'] = item.pop('si_scale_code') if 'pulse_factor' in item: item['pulse_factor'] = item.pop('pulse_factor') description['measurement'] = {'name': name, **item} d[key + 's'] = descriptions # Promote the contents of the Qualified Event ID elif key == "qualified_event_id" and isinstance( d['qualified_event_id'], dict): qeid = d.pop('qualified_event_id') d['event_id'] = qeid['event_id'] d['modification_number'] = qeid['modification_number'] # Durations are encapsulated in their own object, remove this nesting elif isinstance(d[key], dict) and "duration" in d[key] and len( d[key]) == 1: d[key] = d[key]["duration"] # In general, remove all double nesting elif isinstance(d[key], dict) and key in d[key] and len(d[key]) == 1: d[key] = d[key][key] # In general, remove the double nesting of lists of items elif isinstance(d[key], dict) and key[:-1] in d[key] and len( d[key]) == 1: if isinstance(d[key][key[:-1]], list): d[key] = d[key][key[:-1]] else: d[key] = [d[key][key[:-1]]] # Payload values are wrapped in an object according to their type. We don't need that. elif key in ("signal_payload", "current_value"): value = d[key] if isinstance(d[key], dict): if 'payload_float' in d[key] and 'value' in d[key]['payload_float'] \ and d[key]['payload_float']['value'] is not None: d[key] = float(d[key]['payload_float']['value']) elif 'payload_int' in d[key] and 'value' in d[key]['payload_int'] \ and d[key]['payload_int'] is not None: d[key] = int(d[key]['payload_int']['value']) # Report payloads contain an r_id and a type-wrapped payload_float elif key == 'report_payload': if 'payload_float' in d[key] and 'value' in d[key]['payload_float']: v = d[key].pop('payload_float') d[key]['value'] = float(v['value']) elif 'payload_int' in d[key] and 'value' in d[key]['payload_int']: v = d[key].pop('payload_float') d[key]['value'] = int(v['value']) # All values other than 'false' must be interpreted as True for testEvent (rule 006) elif key == 'test_event' and not isinstance(d[key], bool): d[key] = True # Promote the 'text' item elif isinstance(d[key], dict) and "text" in d[key] and len( d[key]) == 1: if key == 'uid': d[key] = int(d[key]["text"]) else: d[key] = d[key]["text"] # Promote a 'date-time' item elif isinstance(d[key], dict) and "date_time" in d[key] and len( d[key]) == 1: d[key] = d[key]["date_time"] # Promote 'properties' item, discard the unused? 'components' item elif isinstance(d[key], dict) and "properties" in d[key] and len(d[key]) <= 2: d[key] = d[key]["properties"] # Remove all empty dicts elif isinstance(d[key], dict) and len(d[key]) == 0: d.pop(key) return d
def _ValueFromText(key, old_val, val): """Returns the new param value from its text representation.""" val_type = type(old_val).__name__ if isinstance(old_val, str): val_type = 'str' if key in type_overrides: val_type = type_overrides[key] # Converts val (a string) to a best-guessed typed value. if val_type == 'bool': return val and (val != 'False') and (val != 'false') elif val_type == 'int': return int(val) elif val_type == 'float': return float(val) elif val_type == 'DType': return tf.as_dtype(val) elif dataclasses.is_dataclass(old_val) or _IsNamedTuple(old_val): # Maps field name to new value (or its string repr, if non-POD). name_to_new_value = ast.literal_eval(val) contents = {} items = old_val.__dict__.items() if dataclasses.is_dataclass( old_val) else old_val._asdict().items() for k, old_field_value in items: new_field_value = name_to_new_value[k] # Recurse to parse any non-POD contents not converted by # literal_eval(). if isinstance(new_field_value, str): contents[k] = _ValueFromText(k, old_field_value, new_field_value) else: contents[k] = new_field_value return type(old_val)(**contents) elif val_type in ['list', 'tuple']: return ast.literal_eval(val) elif val_type == 'dict': return ast.literal_eval(val) if val != 'dict' else {} elif val_type == 'str': val = _UnquoteString(val) if val.startswith('[') and val.endswith(']'): # We may have stored a list as a string, try converting to a list. # In case of ValueError - use the string as is. try: return ast.literal_eval(val) except ValueError: pass return val elif isinstance(old_val, enum.Enum): cls, _, name = val.rpartition('.') if val_type != cls: raise ValueError('Expected enum of class %s but got %s' % (val_type, cls)) return type(old_val)[name] elif (isinstance(old_val, type) or isinstance(old_val, message.Message) or old_val is None): if val == 'NoneType': return None elif old_val is None and val in ('False', 'false'): return False elif old_val is None and val in ('True', 'true'): return True else: try: val_type, pkg, cls = val.split('/', 2) if val_type == 'type': return getattr(sys.modules[pkg], cls) elif val_type == 'proto': cls, proto_str = cls.split('/', 1) proto_cls = getattr(sys.modules[pkg], cls) if not issubclass(proto_cls, message.Message): raise ValueError('%s is not a proto class.' % proto_cls) return text_format.Parse(proto_str, proto_cls()) except ValueError as e: raise ValueError('Error processing %r : %r with %r' % (key, val, e)) else: raise ValueError('Failed to read a parameter: %r : %r' % (key, val))
def append_to_csv(csv_path: Path, data_obj: Dataclass, encoding: str = "utf-8") -> None: """ Append a data object to a CSV file. This function makes the following assumptions: * 'obj' is a dataclass. This code will check dynamically to make sure the object is a dataclass before writing it to file, and will do nothing if it is not. * 'obj''s dataclass is flat---that is, it consists of only primitive data types. * each 'obj' passed to this function for the same csv_path must be of the same dataclass type. It would not make sense to write objects with different sets of fields to the same CSV file. This function does not check to make sure that all the objects you pass in have the same format. """ if not dataclasses.is_dataclass(data_obj): logging.error( # pylint: disable=logging-not-lazy ("Object of type %s is not a dataclass. The code calling this append_to_csv function" + "must be rewritten to only attempt to write objects that are of a dataclass type" ), type(data_obj), ) # Check to see whether the file is empty try: file_empty = os.stat(csv_path).st_size == 0 except FileNotFoundError: file_empty = True with open(csv_path, "a", encoding=encoding) as csv_file: try: data_dict = dataclasses.asdict(data_obj) except RecursionError: logging.warning( # pylint: disable=logging-not-lazy "Couldn't serialize data %s due to recursion error. " + "Make sure that there are no cyclic references in data. ", data_obj, ) else: # Because the CSV writer outputs null values as empty strings, all 'None's need to be # replaced with a unique string indicating the value is null, so that the string can # be replaced with 'None' again when the data is loaded back in. for k, v in data_dict.items(): if v is None: data_dict[k] = "<!NULL!>" if isinstance(v, JournaledString): data_dict[k] = json.dumps(v.to_json()) writer = csv.DictWriter( # QUOTE_NONNUMERIC is used in both the writer and the reader to ensure that numbers # (e.g., indexes, hues, positions) are decoded as numbers. csv_file, fieldnames=data_dict.keys(), quoting=csv.QUOTE_MINIMAL, ) # Only write the header the first time a record is added to the file try: if file_empty: writer.writeheader() writer.writerow(data_dict) except Exception as exception: # pylint: disable=broad-except logging.warning( "Couldn't write row containing data %s to CSV file. Reason: %s.", data_obj, exception, )
def default(self, o): if dataclasses.is_dataclass(o): return dataclasses.asdict(o) if isinstance(o, modeling.BertConfig): return o.to_dict() return super().default(o)
def test_has_all_subclasses_are_dataclasses(self): _import_all(pcs.__path__) for cls in _all_subclasses(DataTransferObject): self.assertTrue(is_dataclass(cls), f"{cls} is not a dataclass")
def default(self, o): return ( dataclasses.asdict(o) if dataclasses.is_dataclass(o) else super().default(o) )
def check_a(cls, v): assert cls is MyDataclass and is_dataclass(MyDataclass) return v
def _default(o: Any) -> Any: if is_dataclass(o): return asdict(o) elif isinstance(o, datetime): return str(o) raise TypeError(f"no way to serialize {o} {type(o)}")
def create_cloned_field(field: ModelField) -> ModelField: original_type = field.type_ if is_dataclass(original_type) and hasattr(original_type, "__pydantic_model__"): original_type = original_type.__pydantic_model__ # type: ignore use_type = original_type if lenient_issubclass(original_type, BaseModel): original_type = cast(Type[BaseModel], original_type) use_type = create_model(original_type.__name__, __base__=original_type) for f in original_type.__fields__.values(): use_type.__fields__[f.name] = create_cloned_field(f) if PYDANTIC_1: new_field = ModelField( name=field.name, type_=use_type, class_validators={}, default=None, required=False, model_config=BaseConfig, field_info=FieldInfo(None), ) else: # pragma: nocover new_field = ModelField( # type: ignore name=field.name, type_=use_type, class_validators={}, default=None, required=False, model_config=BaseConfig, schema=FieldInfo(None), ) new_field.has_alias = field.has_alias new_field.alias = field.alias new_field.class_validators = field.class_validators new_field.default = field.default new_field.required = field.required new_field.model_config = field.model_config if PYDANTIC_1: new_field.field_info = field.field_info else: # pragma: nocover new_field.schema = field.schema # type: ignore new_field.allow_none = field.allow_none new_field.validate_always = field.validate_always if field.sub_fields: new_field.sub_fields = [ create_cloned_field(sub_field) for sub_field in field.sub_fields ] if field.key_field: new_field.key_field = create_cloned_field(field.key_field) new_field.validators = field.validators if PYDANTIC_1: new_field.pre_validators = field.pre_validators new_field.post_validators = field.post_validators else: # pragma: nocover new_field.whole_pre_validators = field.whole_pre_validators # type: ignore new_field.whole_post_validators = field.whole_post_validators # type: ignore new_field.parse_json = field.parse_json new_field.shape = field.shape try: new_field.populate_validators() except AttributeError: # pragma: nocover # TODO: remove when removing support for Pydantic < 1.0.0 new_field._populate_validators() # type: ignore return new_field
def is_template_like(obj: Any) -> bool: """Check whether the given object is template-like. Currently this includes templates and dataclasses. """ return is_template(obj) or dataclasses.is_dataclass(obj)
def should_handle(self, cls: type, origin, args) -> bool: return is_dataclass(cls)
def has_proto(target): return dataclasses.is_dataclass(target)
def _is_dataclass(obj: Any) -> bool: try: import dataclasses except ImportError: return False return dataclasses.is_dataclass(obj)
def default(self, o): if dataclasses.is_dataclass(o): return dataclasses.asdict(o) if isinstance(o, Decimal): return str(o) return super().default(o)
def prepare_lr_scheduler( optimizer: optim.Optimizer, scheduler_config: Union[Dict[str, Any], DictConfig], train_dataloader: Optional[dataloader.DataLoader] = None, ) -> Optional[Dict[str, Any]]: """ Constructs an LR Scheduler (optionally) for a given optimizer, based on a config with the following schema optim: name: <name of optimizer> lr: <maximal learning rate> # <additional optimizer arguments> args: name: auto # special keyword, resolves to correct optimizer config for given optimizer name # cls: nemo.core.config.optimizers.NovogradParams # explicit instantiation by class path params: # optional override parameters for the optimizer config betas: [0.8, 0.5] weight_decay: 0.001 # scheduler setup sched: name: <name of scheduler> iters_per_batch: null # computed at runtime; mandatory to have max_steps: null # computed at runtime or explicitly set here; mandatory to have # pytorch lightning args <mandatory> monitor: val_loss reduce_on_plateau: false # <scheduler config override> args: name: auto # special keyword, resolves to correct optimizer config for given optimizer name # cls: nemo.core.config.schedulers.CosineAnnealingParams # explicit instantiation by class path params: # optional override parameters for the optimizer config warmup_steps: null warmup_ratio: null min_lr: 0.0 last_epoch: -1 Args: optimizer: An instantiated Optimizer. scheduler_config: A dictionary / config dict which follows the above schema. train_dataloader: Optional requirement, must be passed if "iters_per_batch" is defined instead of "max_steps". Used to compute effective "max_steps". Returns: A dictionary containing the LR Scheduler implementation if the config was successfully parsed along with other parameters required by Pytorch Lightning, otherwise None. """ if scheduler_config is not None: scheduler_config = maybe_update_config_version(scheduler_config) # Build nested dictionary for convenience out of structured objects if isinstance(scheduler_config, DictConfig): scheduler_config = OmegaConf.to_container(scheduler_config, resolve=True) elif dataclasses.is_dataclass(scheduler_config): # Recursively transform data classes to basic dictionaries scheduler_config = OmegaConf.create(scheduler_config) scheduler_config = OmegaConf.to_container(scheduler_config, resolve=True) # Test to see if config follows above schema add_max_args_flag = True interval = 'step' if scheduler_config is not None: if 'args' in scheduler_config: scheduler_args = scheduler_config.pop('args') else: scheduler_args = copy.deepcopy(scheduler_config) # Remove extra parameters from scheduler_args nest # Assume all other parameters are to be passed into scheduler constructor if 'name' in scheduler_args and scheduler_args['name'] == 'ReduceLROnPlateau': add_max_args_flag = False interval = 'epoch' scheduler_args.pop('name', None) scheduler_args.pop('t_max_epochs', None) scheduler_args.pop('t_accumulate_grad_batches', None) scheduler_args.pop('t_limit_train_batches', None) scheduler_args.pop('t_num_workers', None) scheduler_args.pop('monitor', None) scheduler_args.pop('reduce_on_plateau', None) else: # Return gracefully in case `sched` was not supplied; inform user logging.info('Scheduler not initialized as no `sched` config supplied to setup_optimizer()') return None # Try instantiation of scheduler params from config class path if '_target_' in scheduler_args: scheduler_args_cfg = OmegaConf.create(scheduler_args) scheduler_conf = hydra.utils.instantiate(scheduler_args_cfg) scheduler_args = vars(scheduler_conf) # Get name of the scheduler scheduler_name = scheduler_conf.__class__.__name__ if 'Params' in scheduler_name: scheduler_name = scheduler_name.replace('Params', '') else: # Class path instantiation failed; try resolving "name" component # Get name of the scheduler if 'name' in scheduler_config: scheduler_name = scheduler_config['name'] else: logging.warning( "Could not resolve classpath for Scheduler Config, and `name` " "was not provided either. \n" "Scheduler cannot be instantiated !" ) return None # If class path was not provided, perhaps `name` is provided for resolution if 'name' in scheduler_args: # If `auto` is passed as name for resolution of optimizer name, # then lookup optimizer name and resolve its parameter config if scheduler_args['name'] == 'auto': scheduler_params_name = "{}Params".format(scheduler_name) else: scheduler_params_name = scheduler_args['name'] # Get override arguments provided in the config yaml file / Dict Config scheduler_params_override = scheduler_args.get('params', {}) # If params is itself a dict config object provided explicitly in Dict Config # Resolve to dictionary for convenience if isinstance(scheduler_params_override, DictConfig): scheduler_params_override = OmegaConf.to_container(scheduler_params_override, resolve=True) # Get and instantiate the Config dataclass for this scheduler scheduler_params_cls = get_scheduler_config(scheduler_params_name, **scheduler_params_override) scheduler_params = scheduler_params_cls() # instantiate the parameters object scheduler_args = vars(scheduler_params) # extract just the dictionary from the Config object else: # assume the input dictionary is schedular args (from dataclasses / omegaconf) pass # Extract value to monitor in losses, if provided. if 'monitor' in scheduler_config: monitor = scheduler_config.get('monitor') else: # Default to train loss monitor = 'loss' # Store exact max_steps if it is provided if 'max_steps' in scheduler_config and scheduler_config['max_steps'] is not None: max_steps = scheduler_config['max_steps'] elif 't_max_epochs' in scheduler_config: # Compute effective max_steps if t_max_epochs is provided if train_dataloader is None: logging.warning( 'As `t_max_epochs` is provided/computed, it is required to pass the train dataloader in order\n' 'to compute effective maximum number of steps.\n' 'Scheduler will not be instantiated !' ) return None # Raise exception if neither `max_steps` nor `t_max_epochs` is provided if scheduler_config.get('t_max_epochs', None) is None: logging.warning( "`t_max_epochs` cannot be None when `max_steps` is not not provided.\n" "This can occur when `train dataloader` is not available to correctly " "prepare the scheduler.\n" "Scheduler will not be instantiated !" ) return None # Get iters_per_batch max_epochs = scheduler_config.get('t_max_epochs') accumulate_grad_batches = scheduler_config.get('t_accumulate_grad_batches') limit_train_batches = scheduler_config.get('t_limit_train_batches') num_workers = scheduler_config.get('t_num_workers') # Compute effective num max_steps num_samples = len(train_dataloader.dataset) # TODO: not sure if this will be the correct LR schedule for Megatron # we may need to override ModelPT setup_optimization if train_dataloader.batch_size is not None: batch_size = train_dataloader.batch_size elif hasattr(train_dataloader, 'batch_sampler') and train_dataloader.batch_sampler is not None: if train_dataloader.batch_sampler.micro_batch_size is not None: batch_size = train_dataloader.batch_sampler.micro_batch_size else: raise ValueError(f'Could not find batch_size from batch_sampler: {train_dataloader.batch_sampler}') else: raise ValueError(f'Could not find batch_size from train_dataloader: {train_dataloader}') drop_last = train_dataloader.drop_last max_steps = compute_max_steps( max_epochs=max_epochs, accumulate_grad_batches=accumulate_grad_batches, limit_train_batches=limit_train_batches, num_workers=num_workers, num_samples=num_samples, batch_size=batch_size, drop_last=drop_last, ) else: logging.warning( "Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, " "cannot compute effective `max_steps` !\n" "Scheduler will not be instantiated !" ) return None # Inject max_steps (effective or provided) into the scheduler config if add_max_args_flag: scheduler_args['max_steps'] = max_steps # Get the scheduler class from the config scheduler_cls = get_scheduler(scheduler_name, **scheduler_args) # Instantiate the LR schedule schedule = scheduler_cls(optimizer, **scheduler_args) logging.info( 'Scheduler "%s" \nwill be used during training (effective maximum steps = %d) - \nParameters : \n(%s)', str(schedule), max_steps, OmegaConf.to_yaml(OmegaConf.create(scheduler_args)), ) # Wrap the schedule in PTL arguments to perform stepwise computation # Rather than epoch level computation if isinstance(schedule, optim.lr_scheduler.ReduceLROnPlateau): reduce_lr_on_plateau = True else: reduce_lr_on_plateau = False schedule_dict = { 'scheduler': schedule, 'interval': interval, 'frequency': 1, 'monitor': monitor, 'reduce_on_plateau': reduce_lr_on_plateau, } return schedule_dict
def default(self, o): if dataclasses.is_dataclass(o): return dataclasses.asdict(o) return super().default(o)
from prettyprinter import pretty_call, register_pretty from dataclasses import is_dataclass from mltoscm import ast def reg_dataclass(T): fields = list(T.__annotations__) t_name = T.__name__ @register_pretty(T) def f(x, ctx): return pretty_call(ctx, t_name, **{field: getattr(x, field) for field in fields}) for _, each in ast.__dict__.items(): if isinstance(each, type) and is_dataclass(each) and issubclass( each, ast.AST): reg_dataclass(each)
def default(self, o): assert dataclasses.is_dataclass(o) return dataclasses.asdict(o)
def _is_property_name(t: Type[T], property_name: str) -> bool: return (is_dataclass(t) and property_name in (f.name for f in fields(t)) or property_name in dir(t))
def _serialize_all(obj): if is_dataclass(obj): return asdict(obj) else: return str(obj)
def default(self: "DataclassJsonEncoder", obj: Any) -> Any: if is_dataclass(obj): return obj.__dict__ else: return json.JSONEncoder.default(self, obj)
def check_account_dataclass(self, obj): assert dataclasses.is_dataclass(obj) account_id, widgets, widget_count = dataclasses.fields(obj) eq_(account_id.name, "account_id") eq_(widget_count.name, "widget_count") eq_(widgets.name, "widgets")
def default(self, o): if is_dataclass(o): return o.asdict() return super().default(o)
def check_special_widget_dataclass(self, obj): assert dataclasses.is_dataclass(obj) id_, name, magic = dataclasses.fields(obj) eq_(id_.name, "widget_id") eq_(name.name, "name") eq_(magic.name, "magic")
def gen_paths(self, obj: object, path): if is_dataclass(obj): for name, value in dataclass_fields(obj).items(): yield from gen_paths(value, path + (name, )) else: yield path
def _custom_default(o): if is_dataclass(o): return asdict(o) raise TypeError(f"{o!r} is not JSON serializable")
def shallow_asdict(x: Any) -> Dict[str, Any]: assert dataclasses.is_dataclass(x) return { field.name: getattr(x, field.name) for field in dataclasses.fields(x) }