def __init__(self, param_groups=None, mpu=None, zero_reduce_scatter=False, deepspeed=None): self.mpu = mpu self.params = [] if param_groups else None self.zero_reduce_scatter = zero_reduce_scatter self.deepspeed = deepspeed self.has_moe_params = False if param_groups: for group in param_groups: for param in group: self.params.append(param) if is_moe_param(param): self.has_moe_params = True
def check(self, param_groups=None): params = [] has_moe_params = False if param_groups is None: params = self.params has_moe_params = self.has_moe_params else: assert param_groups is not None, \ "self.params and param_groups both cannot be none" for group in param_groups: for param in group: params.append(param) if is_moe_param(param): has_moe_params = True return self.has_overflow(params, has_moe_params=has_moe_params)