def load(cls, load_dir, device): """ Loads an AdaptiveModel from a directory. The directory must contain: * language_model.bin * language_model_config.json * prediction_head_X.bin multiple PH possible * prediction_head_X_config.json * processor_config.json config for transforming input * vocab.txt vocab file for language model, turning text to Wordpiece Tokens :param load_dir: location where adaptive model is stored :type load_dir: str :param device: to which device we want to sent the model, either cpu or cuda :type device: torch.device """ # Language Model language_model = LanguageModel.load(load_dir) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: head = PredictionHead.load(config_file) # # set shared weights between LM and PH # if type(head) == BertLMHead: # head.set_shared_weights(language_model) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) return cls(language_model, prediction_heads, 0.1, ph_output_type, device)
def load(cls, load_dir, device, **kwargs): import onnxruntime sess_options = onnxruntime.SessionOptions() # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization. sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # Use OpenMP optimizations. Only useful for CPU, has little impact for GPUs. sess_options.intra_op_num_threads = multiprocessing.cpu_count() onnx_session = onnxruntime.InferenceSession( str(load_dir / "model.onnx"), sess_options) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir, strict=False) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: # ONNX Model doesn't need have a separate neural network for PredictionHead. It only uses the # instance methods of PredictionHead class, so, we load with the load_weights param as False. head = PredictionHead.load(config_file, load_weights=False) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) with open(load_dir / "model_config.json") as f: model_config = json.load(f) language = model_config["language"] return cls(onnx_session, prediction_heads, language, device)
def get_adaptive_model( lm_output_type, prediction_heads, layer_dims, model, device, embeds_dropout_prob, class_weights=None, ): parsed_lm_output_types = lm_output_type.split(",") language_model = LanguageModel.load(model) initialized_heads = [] for head_name in prediction_heads.split(","): initialized_heads.append( PredictionHead.create( prediction_head_name=head_name, layer_dims=layer_dims, class_weights=class_weights, )) model = AdaptiveModel( language_model=language_model, prediction_heads=initialized_heads, embeds_dropout_prob=embeds_dropout_prob, lm_output_types=parsed_lm_output_types, device=device, ) return model
def load(cls, load_dir, device, **kwargs): sess_options = onnxruntime.SessionOptions() # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization. sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # To enable model serialization and store the optimized graph to desired location. sess_options.optimized_model_filepath = os.path.join( load_dir, "optimized_model.onnx") onnx_session = onnxruntime.InferenceSession( str(load_dir / "model.onnx"), sess_options) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir, strict=False) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: # ONNX Model doesn't need have a separate neural network for PredictionHead. It only uses the # instance methods of PredictionHead class, so, we load with the load_weights param as False. head = PredictionHead.load(config_file, load_weights=False) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) with open(load_dir / "model_config.json") as f: model_config = json.load(f) language = model_config["language"] return cls(onnx_session, prediction_heads, language, device)
def load(cls, load_dir, device): """ Loads an AdaptiveModel from a directory. The directory must contain: - language_model.bin - language_model_config.json - prediction_head_X.bin multiple PH possible - prediction_head_X_config.json - processor_config.json config for transforming input - vocab.txt vocab file for language model, turning text to Wordpiece Tokens :param load_dir: location where adaptive model is stored :type load_dir: str :param device: to which device we want to sent the model, either cpu or cuda :type device: torch.device :return: AdaptiveModel :rtype: AdaptiveModel """ # Prediction heads ph_model_files, ph_config_files = cls._get_prediction_head_files(load_dir) prediction_heads = [] ph_output_type = [] for model_file, config_file in zip(ph_model_files, ph_config_files): head = PredictionHead.load( model_file=model_file, config_file=config_file, device=device ) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) # Language Model language_model = LanguageModel.load(load_dir) return cls(language_model, prediction_heads, 0.1, ph_output_type, device)
def load(cls, config_file): """ Loads a Prediction Head. Directly calls PredictionHead.load() as we don't want to use the superclass's load method. :param config_file: location where corresponding config is stored :type config_file: str :return: PredictionHead :rtype: PredictionHead[T] """ return PredictionHead.load(config_file)
def load(cls, load_dir, device, strict=False, lm1_name="lm1", lm2_name="lm2", processor=None): """ Loads a BiAdaptiveModel from a directory. The directory must contain: * directory "lm1_name" with following files: -> language_model.bin -> language_model_config.json * directory "lm2_name" with following files: -> language_model.bin -> language_model_config.json * prediction_head_X.bin multiple PH possible * prediction_head_X_config.json * processor_config.json config for transforming input * vocab.txt vocab file for language model, turning text to Wordpiece Token * special_tokens_map.json :param load_dir: location where adaptive model is stored :type load_dir: Path :param device: to which device we want to sent the model, either cpu or cuda :type device: torch.device :param lm1_name: the name to assign to the first loaded language model(for encoding queries) :type lm1_name: str :param lm2_name: the name to assign to the second loaded language model(for encoding context/passages) :type lm2_name: str :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in the PredictionHead (see torch.nn.module.load_state_dict()). Set to `False` for backwards compatibility with PHs saved with older version of FARM. :type strict: bool :param processor: populates prediction head with information coming from tasks :type processor: Processor """ # Language Model if lm1_name: language_model1 = LanguageModel.load(os.path.join(load_dir, lm1_name)) else: language_model1 = LanguageModel.load(load_dir) if lm2_name: language_model2 = LanguageModel.load(os.path.join(load_dir, lm2_name)) else: language_model2 = LanguageModel.load(load_dir) # Prediction heads ph_config_files = cls._get_prediction_head_files(load_dir) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: head = PredictionHead.load(config_file, strict=False, load_weights=False) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) model = cls(language_model1, language_model2, prediction_heads, 0.1, device) if processor: model.connect_heads_with_processor(processor.tasks) return model
def load(cls, load_dir, device, strict=True, lm_name=None, processor=None): """ Loads an AdaptiveModel from a directory. The directory must contain: * language_model.bin * language_model_config.json * prediction_head_X.bin multiple PH possible * prediction_head_X_config.json * processor_config.json config for transforming input * vocab.txt vocab file for language model, turning text to Wordpiece Tokens :param load_dir: location where adaptive model is stored :type load_dir: Path :param device: to which device we want to sent the model, either cpu or cuda :type device: torch.device :param lm_name: the name to assign to the loaded language model :type lm_name: str :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in the PredictionHead (see torch.nn.module.load_state_dict()). Set to `False` for backwards compatibility with PHs saved with older version of FARM. :type strict: bool :param processor: populates prediction head with information coming from tasks :type processor: Processor """ # Language Model if lm_name: language_model = LanguageModel.load(load_dir, farm_lm_name=lm_name) else: language_model = LanguageModel.load(load_dir) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: head = PredictionHead.load(config_file, strict=strict) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) model = cls(language_model, prediction_heads, 0.1, ph_output_type, device) if processor: model.connect_heads_with_processor(processor.tasks) return model
def load_prediction_heads(args, silo): if args.recycle_heads: args.logger.info("Recycling heads of the loaded model") # Model name should be a directory in this case _, ph_configs = CustomAdaptiveModel._get_prediction_head_files( args.model_name) prediction_heads = [ PredictionHead.load(config_file) for config_file in ph_configs ] # Ensure that label_columns order is the same as respective prediction heads (ascending) # else this will misalign heads with tasks. for idx in range(len(prediction_heads)): args.logger.info( f"Renaming head task {prediction_heads[idx].task_name} to {args.label_columns[idx]}" ) prediction_heads[idx].task_name = args.label_columns[idx] out_types = [head.ph_output_type for head in prediction_heads] elif args.train_mode == "classification": prediction_heads = [ TextClassificationHead( layer_dims=[ args.heads_dim, len(get_labels(args.data_dir, task)) ], task_name=task, ) for task in args.label_columns ] out_types = ["per_sequence" for _ in args.label_columns] else: # Regression from raw heads if args.do_feat_embeds: args.logger.info(f"feat_size: {args.feat_size}") prediction_heads = [ FeaturesRegressionHead( layer_dims=[args.heads_dim + args.feat_size, 1], task_name=task) for task in args.label_columns ] else: prediction_heads = [ RegressionHead(layer_dims=[args.heads_dim, 1], task_name=task) for task in args.label_columns ] out_types = ["per_sequence_continuous" for _ in args.label_columns] return prediction_heads, out_types
def get_adaptive_model( lm_output_type, prediction_heads, layer_dims, model, device, embeds_dropout_prob, local_rank, n_gpu, fp16=False, class_weights=None, ): parsed_lm_output_types = lm_output_type.split(",") initialized_heads = [] for head_name in prediction_heads.split(","): initialized_heads.append( PredictionHead.create( prediction_head_name=head_name, layer_dims=layer_dims, class_weights=class_weights, )) language_model = LanguageModel.load(model) # TODO where are balance class weights? model = AdaptiveModel( language_model=language_model, prediction_heads=initialized_heads, embeds_dropout_prob=embeds_dropout_prob, lm_output_types=parsed_lm_output_types, device=device, ) if fp16: model.half() if local_rank > -1: model = WrappedDDP(model) elif n_gpu > 1: model = WrappedDataParallel(model) return model
def load(cls, load_dir, device, strict=True, lm_name=None, processor=None, **kwargs): """ Allows for passing custom kwargs to language model and for custom loss_aggregation_fn to keep same training setup after reloading. """ # Language Model if lm_name: language_model = CustomLanguageModel.load(load_dir, farm_lm_name=lm_name, **kwargs) else: language_model = CustomLanguageModel.load(load_dir, **kwargs) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: head = PredictionHead.load(config_file, strict=strict) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) loss_aggregation_fn = kwargs.get("loss_aggregation_fn", None) model = cls( language_model, prediction_heads, 0.1, ph_output_type, device, loss_aggregation_fn=loss_aggregation_fn, custom_pooling_strategy=language_model.pooling_strategy, ) if processor: model.connect_heads_with_processor(processor.tasks) return model
def load(cls, load_dir, device, strict=True): """ Loads an AdaptiveModel from a directory. The directory must contain: * language_model.bin * language_model_config.json * prediction_head_X.bin multiple PH possible * prediction_head_X_config.json * processor_config.json config for transforming input * vocab.txt vocab file for language model, turning text to Wordpiece Tokens :param load_dir: location where adaptive model is stored :type load_dir: str :param device: to which device we want to sent the model, either cpu or cuda :type device: torch.device :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in the PredictionHead (see torch.nn.module.load_state_dict()). Set to `False` for backwards compatibility with PHs saved with older version of FARM. :type strict: bool """ # Language Model language_model = LanguageModel.load(load_dir) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: head = PredictionHead.load(config_file, strict=strict) # # set shared weights between LM and PH # if type(head) == BertLMHead: # head.set_shared_weights(language_model) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) return cls(language_model, prediction_heads, 0.1, ph_output_type, device)