def __init__( self, vocab_path=None, model_paths=None, weigths=None, max_len=50, min_len=3, lowercase_tokens=False, log=False, iterations=3, model_name='roberta', special_tokens_fix=1, is_ensemble=True, min_error_probability=0.0, confidence=0, del_confidence=0, resolve_cycles=False, ): self.model_weights = list(map( float, weigths)) if weigths else [1] * len(model_paths) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.max_len = max_len self.min_len = min_len self.lowercase_tokens = lowercase_tokens self.min_error_probability = min_error_probability self.vocab = Vocabulary.from_files(vocab_path) self.log = log self.iterations = iterations self.confidence = confidence self.del_conf = del_confidence self.resolve_cycles = resolve_cycles # set training parameters and operations self.indexers = [] self.models = [] for model_path in model_paths: if is_ensemble: model_name, special_tokens_fix = self._get_model_data( model_path) weights_name = get_weights_name(model_name, lowercase_tokens) self.indexers.append( self._get_indexer(weights_name, special_tokens_fix)) model = Seq2Labels( vocab=self.vocab, text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix), confidence=self.confidence, del_confidence=self.del_conf, ).to(self.device) if torch.cuda.is_available(): model.load_state_dict(torch.load(model_path), strict=False) else: model.load_state_dict(torch.load( model_path, map_location=torch.device('cpu')), strict=False) model.eval() self.models.append(model)
def get_model(model_name, vocab, tune_bert=False, predictor_dropout=0, label_smoothing=0.0, confidence=0, special_tokens_fix=0): token_embs = get_token_embedders(model_name, tune_bert=tune_bert, special_tokens_fix=special_tokens_fix) model = Seq2Labels(vocab=vocab, # 这行就是我们自己搭建的后续网络到label. text_field_embedder=token_embs, predictor_dropout=predictor_dropout, label_smoothing=label_smoothing, confidence=confidence) return model
def get_model(model_name, vocab, tune_bert=False, predictor_dropout=0, label_smoothing=0.0, confidence=0, special_tokens_fix=0): # 其实这个地方可以不用加载 就的xlnet,但是为了不出bug,我们就先不改了. token_embs = get_token_embedders(model_name, tune_bert=tune_bert, special_tokens_fix=special_tokens_fix) model = Seq2Labels( vocab=vocab, # 这行就是我们自己搭建的后续网络到label. # 这个需要更改参数shape,并且保留之前学习到的参数 text_field_embedder=token_embs, predictor_dropout=predictor_dropout, label_smoothing=label_smoothing, confidence=confidence) return model
def __init__( self, vocab_path=None, model_paths=None, weigths=None, max_len=50, min_len=3, lowercase_tokens=False, log=False, iterations=3, min_probability=0.0, model_name='roberta', special_tokens_fix=1, is_ensemble=True, # is_ensemble=False, min_error_probability=0.0, confidence=0, resolve_cycles=False, prune_amount=0., num_layers_to_keep=12): # print('here') self.model_weights = list(map( float, weigths)) if weigths else [1] * len(model_paths) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.max_len = max_len self.min_len = min_len self.lowercase_tokens = lowercase_tokens self.min_probability = min_probability self.min_error_probability = min_error_probability self.vocab = Vocabulary.from_files(vocab_path) self.log = log self.iterations = iterations self.confidence = confidence self.resolve_cycles = resolve_cycles # set training parameters and operations self.indexers = [] self.models = [] for model_path in model_paths: # print('model_path:', model_path); exit(0) if is_ensemble: model_name, special_tokens_fix = self._get_model_data( model_path) weights_name = get_weights_name(model_name, lowercase_tokens) self.indexers.append( self._get_indexer(weights_name, special_tokens_fix)) # token_embs = get_token_embedders(model_name, tune_bert=1, special_tokens_fix=special_tokens_fix) model = Seq2Labels( vocab=self.vocab, text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix), # text_field_embedder= token_embs, confidence=self.confidence).to(self.device) # count number of params pytorch_total_params = sum(p.numel() for p in model.parameters()) print('total params:', pytorch_total_params) # print('model:', model) print('type:', type(model)) #exit(0) if torch.cuda.is_available(): model.load_state_dict(torch.load(model_path)) else: model.load_state_dict( torch.load(model_path, map_location=torch.device('cpu'))) # print('chk1'); exit(0) # get model size def print_size_of_model(model): torch.save(model.state_dict(), "temp.p") print('Size (MB):', os.path.getsize("temp.p") / 1e6) os.remove('temp.p') # print(model) print_size_of_model(model) #exit(0) print('type:', type(model)) #exit(0) def deleteEncodingLayers( model, num_layers_to_keep): # must pass in the full bert model oldModuleList = model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer # print('oldModuleList:', oldModuleList) # print('oldModuleList:', len(oldModuleList)); exit(0) newModuleList = nn.ModuleList() # Now iterate over all layers, only keeping only the relevant layers. for i in range(0, num_layers_to_keep): # for i in range(0, len(num_layers_to_keep)): newModuleList.append(oldModuleList[i]) # create a copy of the model, modify it with the new list, and return copyOfModel = copy.deepcopy(model) copyOfModel.text_field_embedder.token_embedder_bert.bert_model.encoder.layer = newModuleList return copyOfModel print('before model 12:', model) # model = deleteEncodingLayers(model, 12) # print('after 12:', model) # print_size_of_model(model) # # print('before model:', model) # model = deleteEncodingLayers(model, 11) # print ('after 11:', model) # print_size_of_model(model) model = deleteEncodingLayers(model, num_layers_to_keep) print('after', num_layers_to_keep, ' :', model) print_size_of_model(model) # exit(0) # # save model # torch.save(model, 'pytorch-saved.pth') # # print('model:', model) # # for name, module in model.named_modules(): # print('name:', name) # print('module:', module) # exit(0) # onnx_batch_size = 64 # dummy_input = {'tokens': { # 'bert': torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')), # 'bert-offsets':torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')), # 'mask': torch.zeros(onnx_batch_size, 64, dtype=torch.long, device=torch.device('cuda:0')) # }} # # # print('dummy_input:', dummy_input.shape) # # # pred = model(dummy_input['tokens']) # # # print('pred:', pred) # # d_inp = (dummy_input['tokens']['bert'], dummy_input['tokens']['bert-offsets'], # # dummy_input['tokens']['mask']) # d_inp = dummy_input['tokens'] # input_names = ['bert', 'bert-offsets', 'mask'] # output_names = ['output'] # # # convert model to onnx # torch.onnx.export(model, d_inp, 'bert_64.onnx', # input_names=input_names, output_names=output_names, verbose = False) # # torch.onnx.export(model, dummy_input['tokens'], 'try.onnx', verbose=False) # # d_inp = {'bert': np.zeros(shape=(1, 64), 'bert-offsets': np.zeros(1, 64), 'mask': torch.zeros(1, 64)} # exit(0) # model = torch.quantization.quantize_dynamic( # model, # # {torch.nn.Linear}, # dtype=torch.qint8 # ) # print_size_of_model(model) # ########################## # # # quantized_model = torch.quantization.quantize_dynamic( # # # model, {torch.nn.Linear}, dtype=torch.qint8 # # # ) # quantized_model = torch.quantization.quantize_dynamic( # model.cpu(), # # model, # # {torch.nn.Linear}, # dtype=torch.qint8 # ) # # # print_size_of_model(model) # print_size_of_model(quantized_model) # # # quantized_model.cuda() # # # exit(0) # # quantized_model.eval() # self.models.append(quantized_model) # ####################################### # prune model ################################################# # random unstructured # model = prune.random_unstructured(model, 'weight', amount=0.2) # # l1_unstructured # # m = prune.l1_unstructured(model, 'weight', amount=0.2) # # m = prune.l1_unstructured(model, 'bias', amount=3) print_size_of_model(model) for name, module in model.named_modules(): # print('name:', name) # print('module:', module)#; exit(0) # prune.random_unstructured(module, name='weight', amount=0.2) # # prune 20% of connections in all 2D-conv layers # if isinstance(module, torch.nn.Conv2d): # prune.l1_unstructured(module, name='weight', amount=0.2) # prune 40% of connections in all linear layers if isinstance(module, torch.nn.Linear): # print('prune_amount:', prune_amount) # print('.....pruning.....') # print('before pruning:', torch.sum(module.weight)); #exit(0) # print(list(module.named_parameters())) prune.l1_unstructured(module, name='weight', amount=prune_amount) # print('shape:', module.weight.shape); #exit(0) # prune.ln_structured(module, name='weight', amount=prune_amount, n=1, dim=module.weight.shape[1]) # print(list(module.named_parameters())); exit(0) # print('after pruning:', torch.sum(module.weight)); prune.remove(module, name='weight') # module.weight = torch.nn.Parameter(module.weight.data.to_sparse()) # print('after removing:', torch.sum(module.weight)); # print('shape:', module.weight.shape); exit(0) # exit(0) # prune.random_unstructured(module, name='weight', amount=0.25) # exit(0) # exit(0) print('About to return') print_size_of_model(model) #exit(0) ############################################################## model.eval() self.models.append(model)
def __init__(self, vocab_path=None, model_paths=None, weigths=None, max_len=50, min_len=3, lowercase_tokens=False, log=False, iterations=3, min_probability=0.0, model_name='roberta', special_tokens_fix=1, is_ensemble=True, # is_ensemble=False, min_error_probability=0.0, confidence=0, resolve_cycles=False, prune_amount=0., num_layers_to_keep=12 ): # print('here') self.model_weights = list(map(float, weigths)) if weigths else [1] * len(model_paths) self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.max_len = max_len self.min_len = min_len self.lowercase_tokens = lowercase_tokens self.min_probability = min_probability self.min_error_probability = min_error_probability self.vocab = Vocabulary.from_files(vocab_path) self.log = log self.iterations = iterations self.confidence = confidence self.resolve_cycles = resolve_cycles # set training parameters and operations self.indexers = [] self.models = [] for model_path in model_paths: # print('model_path:', model_path); exit(0) if is_ensemble: model_name, special_tokens_fix = self._get_model_data(model_path) weights_name = get_weights_name(model_name, lowercase_tokens) self.indexers.append(self._get_indexer(weights_name, special_tokens_fix)) # token_embs = get_token_embedders(model_name, tune_bert=1, special_tokens_fix=special_tokens_fix) model = Seq2Labels(vocab=self.vocab, text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix), # text_field_embedder= token_embs, confidence=self.confidence ).to(self.device) # count number of params pytorch_total_params = sum(p.numel() for p in model.parameters()) print('total params:', pytorch_total_params) # print('model:', model) print('type:', type(model)); #exit(0) def print_size_of_model(model): torch.save(model.state_dict(), "temp.p") print('Size (MB):', os.path.getsize("temp.p") / 1e6) os.remove('temp.p') # delete top layers def deleteEncodingLayers(model, num_layers_to_keep): # must pass in the full bert model oldModuleList = model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer # print('oldModuleList:', oldModuleList) # print('oldModuleList:', len(oldModuleList)); exit(0) newModuleList = nn.ModuleList() # Now iterate over all layers, only keeping only the relevant layers. for i in range(0, num_layers_to_keep): # for i in range(0, len(num_layers_to_keep)): newModuleList.append(oldModuleList[i]) # create a copy of the model, modify it with the new list, and return copyOfModel = copy.deepcopy(model) copyOfModel.text_field_embedder.token_embedder_bert.bert_model.encoder.layer = newModuleList return copyOfModel print('before model 12:') # model = deleteEncodingLayers(model, 12) # print('after 12:', model) print_size_of_model(model) # # print('before model:', model) # model = deleteEncodingLayers(model, 11) # print ('after 11:', model) # print_size_of_model(model) model = deleteEncodingLayers(model, num_layers_to_keep) print('after', num_layers_to_keep,':', model) print_size_of_model(model) if torch.cuda.is_available(): model.load_state_dict(torch.load(model_path)) else: model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) # print('chk1'); exit(0) # get model size # print(model) print_size_of_model(model); #exit(0) print('type:', type(model)); #exit(0) model.eval() self.models.append(model)