def generate_onnx_model(model_name: str, filename: str, seq_len: int, batch_size: int, backend: str): import transformers import torch import os test_device = torch.device('cuda:0') if backend == "GPU" else torch.device( 'cpu:0') torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() model.to(test_device) cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) with open(filename, 'wb') as outf: torch.onnx.export(model=model, args=(input_ids, ), f=outf) outf.flush() return cfg.vocab_size
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int, num_threads: int): import torch import transformers import contexttimer import benchmark_helper torch.set_num_threads(num_threads) torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long) benchmark_helper.run_model(lambda: model(input_ids), False, n, batch_size, seq_len, "torch", num_threads)
def benchmark_turbo_transformers(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool, enable_mem_opt: bool): import torch import transformers import turbo_transformers import benchmark_helper test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0') cfg = None torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.BertModel.from_torch(model, backend="turbo") elif model_name == "albert": cfg = transformers.AlbertConfig(hidden_size=768, num_attention_heads=12, intermediate_size=3072) model = transformers.AlbertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.AlbertModel.from_torch(model) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.RobertaModel.from_torch(model) elif model_name == "distilbert": cfg = transformers.DistilBertConfig() model = transformers.DistilBertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.DistilBertModel.from_torch(model) else: raise (f"benchmark does not support {model_name}") turbo_transformers.set_num_threads(num_threads) if enable_random: if enable_mem_opt: turbo_transformers.reset_allocator_schema("model-aware") benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len, min_seq_len, "turbo", num_threads, cfg, enable_mem_opt, model_name) if enable_mem_opt: turbo_transformers.reset_allocator_schema("naive") else: input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n, batch_size, seq_len, "turbo", num_threads, enable_mem_opt, model_name)
def generate_onnx_model(model_name: str, use_gpu: bool, filename: str, seq_len: int, batch_size: int, backend: str, use_dynamic_axes: bool = False): import transformers import torch import os test_device = torch.device( 'cuda:0') if backend == "GPU" and use_gpu else torch.device('cpu:0') torch.set_grad_enabled(False) if model_name == "bert": # use a real model to check the correctness if checkonnxrest: model = transformers.BertModel.from_pretrained("bert-base-uncased") else: cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() model.to(test_device) cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) with open(filename, 'wb') as outf: if not use_dynamic_axes: torch.onnx.export(model=model, args=(input_ids, ), f=outf) else: torch.onnx.export(model=model, args=(input_ids, ), f=outf, input_names=['input'], output_names=['output'], dynamic_axes={ 'input': [0, 1], 'output': [0, 1] }) # If not intended to make onnxruntime support variable batch size and sequence length, # you can unset the parameter `dynamic_axes`. # For some model, you have to try `opset_version=12` outf.flush() return cfg.vocab_size, cfg
def benchmark_turbo_transformers(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool): import torch import transformers import contexttimer import turbo_transformers import benchmark_helper test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0') if use_gpu: print("using GPU") else: print("using CPU") cfg = None torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.BertModel.from_torch(model) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.AlbertModel.from_torch(model) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.RobertaModel.from_torch(model) else: raise (f"benchmark does not support {model_name}") turbo_transformers.set_num_threads(num_threads) if enable_random: benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len, min_seq_len, "turbo", num_threads, cfg) else: input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n, batch_size, seq_len, "turbo", num_threads)
def __init__(self, dataPath: str = None, pipeLine=None, loadFunction=None, modelName: str = "roberta"): self.configuration = transformers.RobertaConfig() if pipeLine == None: self.pipeLine = getDefaultTokenizer(loadFunction=loadFunction) elif type(pipeLine) == type({}): self.pipeLine = getTransformersTokenizer(pipeLine['modelName'], loadFunction) else: self.pipeLine = pipeLine self._registeredMetrics = [] self._modelName = modelName
def benchmark_torch_jit(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool, enable_mem_opt: bool): import transformers import contexttimer import torch.jit torch.set_num_threads(num_threads) torch.set_grad_enabled(False) if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long) model = torch.jit.trace(model, (input_ids, )) with torch.jit.optimized_execution(True): model(input_ids) with contexttimer.Timer() as t: for _ in range(n): model(input_ids) print( json.dumps({ "QPS": n / t.elapsed, "elapsed": t.elapsed, "n": n, "batch_size": batch_size, "seq_len": seq_len, "framework": "torch_jit", "n_threads": num_threads, "model_name": model_name }))
def benchmark_turbo_transformers(model_name: str, seq_len: int, batch_size: int, n: int): import torch import transformers import contexttimer import turbo_transformers import benchmark_helper if not torch.cuda.is_available(): print("cuda is not available for torch") return test_device = torch.device('cuda:0') if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.BertModel.from_torch(model) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.AlbertModel.from_torch(model) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) model.to(test_device) model.eval() model = turbo_transformers.RobertaModel.from_torch(model) else: raise (f"benchmark does not support {model_name}") cfg = model.config # type: transformers.BertConfig input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), True, n, batch_size, seq_len, "turbo")
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int, enable_random: bool, max_seq_len: int, min_seq_len: int, num_threads: int, use_gpu: bool, enable_mem_opt: bool): import torch import transformers import benchmark_helper test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0') torch.set_grad_enabled(False) torch.set_num_threads(num_threads) cfg = None if model_name == "bert": cfg = transformers.BertConfig() model = transformers.BertModel(cfg) elif model_name == "albert": cfg = transformers.AlbertConfig() model = transformers.AlbertModel(cfg) elif model_name == "roberta": cfg = transformers.RobertaConfig() model = transformers.RobertaModel(cfg) elif model_name == "distilbert": cfg = transformers.DistilBertConfig() model = transformers.DistilBertModel(cfg) else: raise (f"benchmark does not support {model_name}") model.eval() model.to(test_device) # cfg = model.config # type: transformers.BertConfig if enable_random: benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len, min_seq_len, "torch", num_threads, cfg, enable_mem_opt, model_name) else: input_ids = torch.randint(low=0, high=cfg.vocab_size - 1, size=(batch_size, seq_len), dtype=torch.long, device=test_device) benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n, batch_size, seq_len, "torch", num_threads, enable_mem_opt, model_name)
def trainModelForBatches(tweets, scores, batchStart, batchEnd, task): # while part < linesToRead: # if part*chunksEnd > i: # batchTweets = lstOfTweets[chunksStart:] print('len of tweets to consider at the moment:', len(tweets), 'len of scores', len(scores)) df = pd.DataFrame(list(zip(tweets, scores))) # print(i) # if df.is_empty(): # print('empty df!!') # sys.exit() # sys.exit() # 2000 r = 0.27 ok-ram # 20000 r = 0.67 ok-ram # 30000 r = 0.66 ok-ram # 40000 r = 0.66 ok-ram # 50000 r = 0.67 ok ram # 60000 r =0.67 ok ram # 70000 r = 0.65 ok-ram # 80000 r = 0.67 ok-ram # 90000 r = ? not-ram # 100000 r = ? not-ram print(df.head()) # print(df['subtask_a']) # sys.exit() print('********************************************************') # apply le on categorical feature columns # X[categorical_cols] = X[categorical_cols].apply(lambda col: le.fit_transform(col)) # X[categorical_cols].head(10) # print(df['subtask_a'].apply(lambda col: le.fit_transform(col))) # Not working # df['subtask_a_encoded'] = 0 print(df.head()) print(df.shape) # labelEncodedTaskA = [] # labelEncodedTaskB = [] # labelEncodedTaskC = [] # for row in df.itertuples(): # # print(row[4]) # if row[3] == 'OFF': # labelEncodedTaskA.append(1) # else: # labelEncodedTaskA.append(0) # if row[4] == 'TIN': # labelEncodedTaskB.append(1) # else: # labelEncodedTaskB.append(0) # df['LabelEncodedTaskA'] = labelEncodedTaskA # df['LabelEncodedTaskB'] = labelEncodedTaskB # print(df.tweet[:5]) # df[['subtask_a', 'subtask_a_encoded']] = df[['subtask_a', 'subtask_a_encoded']].apply(lambda colA, colB: if colA == 'OFF': print('ofensive', colB)) # df[['subtask_a', 'subtask_a_encoded']] = df[['subtask_a', 'subtask_a_encoded']].apply(lambda colA, colB: print(colA, colB)) # id, tweet, sA, sB, sC, sAE # 1 2 3 4 5 6 # for row in df.itertuples(): # print(row[3]) # if row[3] == 'OFf':' # row[6] = 1' # else: # row[6] = 0 #sys.exit() # batch_1 = df[:2000] # configuration # configuration = DistilBertConfig() # # Initializing a model from the configuration # model = DistilBertModel(configuration) # # Accessing the model configuration # configuration = model.config # Configuration parameters # vocab_size (int, optional, defaults to 30522) – Vocabulary size of the DistilBERT model. Defines the different tokens that can be represented by the inputs_ids passed to the forward method of BertModel. # max_position_embeddings (int, optional, defaults to 512) – The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048). # sinusoidal_pos_embds (boolean, optional, defaults to False) – Whether to use sinusoidal positional embeddings. # n_layers (int, optional, defaults to 6) – Number of hidden layers in the Transformer encoder. # n_heads (int, optional, defaults to 12) – Number of attention heads for each attention layer in the Transformer encoder. # dim (int, optional, defaults to 768) – Dimensionality of the encoder layers and the pooler layer. # hidden_dim (int, optional, defaults to 3072) – The size of the “intermediate” (i.e., feed-forward) layer in the Transformer encoder. # dropout (float, optional, defaults to 0.1) – The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler. # attention_dropout (float, optional, defaults to 0.1) – The dropout ratio for the attention probabilities. # activation (str or function, optional, defaults to “gelu”) – The non-linear activation function (function or string) in the encoder and pooler. If string, “gelu”, “relu”, “swish” and “gelu_new” are supported. # initializer_range (float, optional, defaults to 0.02) – The standard deviation of the truncated_normal_initializer for initializing all weight matrices. # qa_dropout (float, optional, defaults to 0.1) – The dropout probabilities used in the question answering model DistilBertForQuestionAnswering. # seq_classif_dropout (float, optional, defaults to 0.2) – The dropout probabilities used in the sequence classification model DistilBertForSequenceClassification. config = ppb.RobertaConfig() # Initializing a DistilBERT model_class, tokenizer_class, pretrained_weights = (ppb.RobertaModel, ppb.RobertaTokenizer, 'roberta-base-uncased') ## Want BERT instead of distilBERT? Uncomment the following line: #model_class, tokenizer_class, pretrained_weights = (ppb.BertModel, ppb.BertTokenizer, 'bert-base-uncased') # Load pretrained model/tokenizer tokenizer = tokenizer_class.from_pretrained(pretrained_weights) model = Classifier(base_model=RoBERTa) # model = Classifier() # Load base model # model.fit(trainX, trainY) # Finetune base model on custom data # predictions= model.predict(testX) # ['class_2', 'class_1', 'class_3'...] # probs = model.predict_proba(testX) # [{'class_1': 0.23, 'class_2': 0.54, ..}, ..] # model.save(path) # Serialize the model to disk # model = model_class.from_pretrained(pretrained_weights) # model = torch.nn.parallel.DistributedDataParallel(model) #model.to(device) tokenized = df[0].apply( (lambda x: tokenizer.encode(x, add_special_tokens=True))) # tokenized = df.tweet.apply((lambda x: tokenizer.encode(x, add_special_tokens=True))) # tokenized = batch_1[0].apply((lambda x: tokenizer.encode(x, add_special_tokens=True))) print(tokenized) #sys.exit() max_len = 0 for i in tokenized.values: if len(i) > max_len: max_len = len(i) padded = np.array([i + [0] * (max_len - len(i)) for i in tokenized.values]) print(padded) attention_mask = np.where(padded != 0, 1, 0) input_ids = torch.tensor(np.array(padded)) attention_mask = torch.tensor(attention_mask) #input_ids = input_ids.to(device) #attention_mask = attention_mask.to(device) print(attention_mask.shape) print('att Mask', attention_mask) print("\n * input_ids_tensor \n ") print(input_ids) print(input_ids.device) # print("\n * segment_ids_tensor \n ") # print(segment_ids_tensor) # print(segment_ids_tensor.device) print("\n * input_mask_tensor \n ") print(attention_mask) print(attention_mask.device) print("\n * self.device \n ") print(device) # sys.exit() with torch.no_grad(): last_hidden_states = model(input_ids, attention_mask=attention_mask) # Slice the output for the first position for all the sequences, take all hidden unit outputs # All the [CLS] tokens for classification purposes # features = last_hidden_states[0][:,0,:].numpy() # TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first. features = last_hidden_states[0][:, 0, :].cpu().numpy() dfFeaturesSave = pd.DataFrame(zip(features, df[1])) dfFeaturesSave.to_csv( f'./FeaturesCompilation/FeaturesAndLabelsForTask{task}BatchStart{batchStart}BatchEnd{batchEnd}.csv' ) # labels = df['LabelEncodedTaskA'] # labels = batch_1[1] # labels = df[1] # # del df del dfFeaturesSave # del model del tokenizer del tokenized del padded del attention_mask del input_ids del last_hidden_states gc.collect()