def model_init(self, trial=None): if self.sparse_args.final_finetune: model = self.compile_model(self.model_args.model_name_or_path) model = optimize_model(model, "dense") model = self.unzero_parameters(model) else: model = super().model_init(trial) self.patch_coordinator.patch_model(model, trial) return model
def copy_model_files(self): modified = False src_path = self.checkpoint_path d = None try: if not (self.git_path / "tf_model.h5").exists() or not ( self.git_path / "pytorch_model.bin").exists(): if task.startswith("squad"): d = TemporaryDirectory() model = QASparseXP.compile_model(src_path, dest_path=d.name) model = optimize_model(model, "heads") model.save_pretrained(d.name) src_path = d.name else: raise Exception(f"Unknown task {task}") if not (self.git_path / "tf_model.h5").exists(): with TemporaryDirectory() as d2: if task.startswith("squad"): QASparseXP.final_fine_tune_bertarize( src_path, d2, remove_head_pruning=True) else: raise Exception(f"Unknown task {task}") tf_model = TFBertForQuestionAnswering.from_pretrained( d2, from_pt=True) tf_model.save_pretrained(self.git_path) modified = True if not (self.git_path / "pytorch_model.bin").exists(): model = BertForQuestionAnswering.from_pretrained(src_path) model.save_pretrained(self.git_path) modified = True FILES = "special_tokens_map.json", "tokenizer_config.json", "vocab.txt" for file in FILES: if not (self.git_path / file).exists(): shutil.copyfile(str(Path(src_path) / file), str(self.git_path / file)) modified = True finally: if d is not None: d.cleanup() # Reload the config, this may have been changed by compilation / optimization (pruned_heads, gelu_patch, layer_norm_patch) with (self.git_path / "config.json").open() as f: self.checkpoint_info["config"] = json.load(f) return modified
def run_(self, model): self.attention_size = model.config.hidden_size for name, parameter in model.named_parameters(): if ".encoder." in name: is_linear_layer_weight = name.endswith(".weight") and "LayerNorm" not in name is_attention = "attention" in name else: is_linear_layer_weight = False is_attention = False self.add_parameter(name, parameter, is_linear_layer_weight, is_attention) total_sparsity = (1.0 - self.stats["nnz"] / self.stats["total"]) * 100 self.stats["total_sparsity"] = total_sparsity sparsity = (1.0 - self.stats["linear_nnz"] / self.stats["linear_total"]) * 100 self.stats["linear_sparsity"] = sparsity model = optimize_model(model, "heads") self.stats["pruned_heads"] = model.config.pruned_heads return self.stats
def run_(self, model): model_structure = struct_from_config(model.config_class) self.attention_size = getattr( model.config, model_structure.NAME_CONFIG["hidden_size"]) for name, parameter in model.named_parameters(): is_attention = model_structure.is_attention(name) is_ffn = model_structure.is_ffn(name) is_layernorm = model_structure.is_layernorm(name) is_linear_layer_weight = ( is_attention or is_ffn) and name.endswith(".weight") and not is_layernorm self.add_parameter(name, parameter, is_linear_layer_weight, is_attention) total_sparsity = (1.0 - self.stats["nnz"] / self.stats["total"]) * 100 self.stats["total_sparsity"] = total_sparsity sparsity = ( 1.0 - self.stats["linear_nnz"] / self.stats["linear_total"]) * 100 self.stats["linear_sparsity"] = sparsity model = optimize_model(model, "heads") self.stats["pruned_heads"] = getattr(model.config, "pruned_heads") return self.stats
def instrument_model(self, model): if self.args.optimize_model_before_eval != "disabled": model = optimize_model(self.model, self.args.optimize_model_before_eval) return TimingModule(model)
def copy_model_files(self, force=False): modified = False src_path = self.checkpoint_path d = None try: if force or not (self.git_path / "tf_model.h5").exists() or not ( self.git_path / "pytorch_model.bin").exists(): d = TemporaryDirectory() if self.task in self.QA_TASKS: model = QASparseXP.compile_model(src_path, dest_path=d.name) elif self.task in self.GLUE_TASKS: model = GlueSparseXP.compile_model(src_path, dest_path=d.name) elif self.task in self.SUMMARIZATION_TASKS: model = SummarizationSparseXP.compile_model( src_path, dest_path=d.name) else: raise Exception(f"Unknown task {self.task}") model = optimize_model(model, "heads") model.save_pretrained(d.name) src_path = d.name if force or not (self.git_path / "tf_model.h5").exists(): with TemporaryDirectory() as d2: if self.task in self.QA_TASKS: QASparseXP.final_fine_tune_bertarize( src_path, d2, remove_head_pruning=True) tf_model = TFAutoModelForQuestionAnswering.from_pretrained( d2, from_pt=True) elif self.task in self.GLUE_TASKS: GlueSparseXP.final_fine_tune_bertarize( src_path, d2, remove_head_pruning=True) tf_model = TFAutoModelForSequenceClassification.from_pretrained( d2, from_pt=True) elif self.task in self.SUMMARIZATION_TASKS: SummarizationSparseXP.final_fine_tune_bertarize( src_path, d2, remove_head_pruning=True) tf_model = TFAutoModelForSeq2SeqLM.from_pretrained( d2, from_pt=True) else: raise Exception(f"Unknown task {self.task}") tf_model.save_pretrained(self.git_path) modified = True if force or not (self.git_path / "pytorch_model.bin").exists(): if self.task in self.QA_TASKS: model = AutoModelForQuestionAnswering.from_pretrained( src_path) elif self.task in self.GLUE_TASKS: model = AutoModelForSequenceClassification.from_pretrained( src_path) elif self.task in self.SUMMARIZATION_TASKS: model = AutoModelForSeq2SeqLM.from_pretrained(src_path) else: raise Exception(f"Unknown task {self.task}") model.save_pretrained(self.git_path) modified = True src_path = Path(src_path) to_copy = self.get_copy_list() for files, dest in to_copy: dest.mkdir(exist_ok=True) for file in files: if force or not (dest / file).exists(): shutil.copyfile(str(src_path / file), str(dest / file)) modified = True finally: if d is not None: d.cleanup() # Reload the config, this may have been changed by compilation / optimization (pruned_heads, gelu_patch, layer_norm_patch) with (self.git_path / "config.json").open() as f: self.checkpoint_info["config"] = json.load(f) return modified
def instrument_model(self, model): if self.args.optimize_model_before_eval != "disabled": model = optimize_model(self.model, self.args.optimize_model_before_eval) return TimingModule(model, method_list=["generate", "config"])