Example #1
0
def generate_onnx_model(model_name: str, filename: str, seq_len: int,
                        batch_size: int, backend: str):
    import transformers
    import torch
    import os

    test_device = torch.device('cuda:0') if backend == "GPU" else torch.device(
        'cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)
    with open(filename, 'wb') as outf:
        torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        outf.flush()
    return cfg.vocab_size
Example #2
0
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    num_threads: int):
    import torch
    import transformers
    import contexttimer
    import benchmark_helper
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)
    benchmark_helper.run_model(lambda: model(input_ids), False, n, batch_size,
                               seq_len, "torch", num_threads)
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool,
                                 enable_mem_opt: bool):
    import torch
    import transformers
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model, backend="turbo")
    elif model_name == "albert":
        cfg = transformers.AlbertConfig(hidden_size=768,
                                        num_attention_heads=12,
                                        intermediate_size=3072)
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.DistilBertModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("model-aware")
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg, enable_mem_opt, model_name)
        if enable_mem_opt:
            turbo_transformers.reset_allocator_schema("naive")
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads,
                                   enable_mem_opt, model_name)
def generate_onnx_model(model_name: str,
                        use_gpu: bool,
                        filename: str,
                        seq_len: int,
                        batch_size: int,
                        backend: str,
                        use_dynamic_axes: bool = False):
    import transformers
    import torch
    import os

    test_device = torch.device(
        'cuda:0') if backend == "GPU" and use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)

    if model_name == "bert":
        # use a real model to check the correctness
        if checkonnxrest:
            model = transformers.BertModel.from_pretrained("bert-base-uncased")
        else:
            cfg = transformers.BertConfig()
            model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")

    model.eval()
    model.to(test_device)

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    with open(filename, 'wb') as outf:
        if not use_dynamic_axes:
            torch.onnx.export(model=model, args=(input_ids, ), f=outf)
        else:
            torch.onnx.export(model=model,
                              args=(input_ids, ),
                              f=outf,
                              input_names=['input'],
                              output_names=['output'],
                              dynamic_axes={
                                  'input': [0, 1],
                                  'output': [0, 1]
                              })
        # If not intended to make onnxruntime support variable batch size and sequence length,
        # you can unset the parameter `dynamic_axes`.
        # For some model, you have to try `opset_version=12`
        outf.flush()
    return cfg.vocab_size, cfg
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int, enable_random: bool,
                                 max_seq_len: int, min_seq_len: int,
                                 num_threads: int, use_gpu: bool):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper
    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    if use_gpu:
        print("using GPU")
    else:
        print("using CPU")
    cfg = None
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    turbo_transformers.set_num_threads(num_threads)
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "turbo", num_threads,
                                            cfg)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)

        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "turbo", num_threads)
Example #6
0
 def __init__(self,
              dataPath: str = None,
              pipeLine=None,
              loadFunction=None,
              modelName: str = "roberta"):
     self.configuration = transformers.RobertaConfig()
     if pipeLine == None:
         self.pipeLine = getDefaultTokenizer(loadFunction=loadFunction)
     elif type(pipeLine) == type({}):
         self.pipeLine = getTransformersTokenizer(pipeLine['modelName'],
                                                  loadFunction)
     else:
         self.pipeLine = pipeLine
     self._registeredMetrics = []
     self._modelName = modelName
Example #7
0
def benchmark_torch_jit(model_name: str, seq_len: int, batch_size: int, n: int,
                        enable_random: bool, max_seq_len: int,
                        min_seq_len: int, num_threads: int, use_gpu: bool,
                        enable_mem_opt: bool):
    import transformers
    import contexttimer
    import torch.jit
    torch.set_num_threads(num_threads)
    torch.set_grad_enabled(False)
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long)

    model = torch.jit.trace(model, (input_ids, ))

    with torch.jit.optimized_execution(True):
        model(input_ids)
        with contexttimer.Timer() as t:
            for _ in range(n):
                model(input_ids)

    print(
        json.dumps({
            "QPS": n / t.elapsed,
            "elapsed": t.elapsed,
            "n": n,
            "batch_size": batch_size,
            "seq_len": seq_len,
            "framework": "torch_jit",
            "n_threads": num_threads,
            "model_name": model_name
        }))
def benchmark_turbo_transformers(model_name: str, seq_len: int,
                                 batch_size: int, n: int):
    import torch
    import transformers
    import contexttimer
    import turbo_transformers
    import benchmark_helper

    if not torch.cuda.is_available():
        print("cuda is not available for torch")
        return
    test_device = torch.device('cuda:0')

    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.BertModel.from_torch(model)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.AlbertModel.from_torch(model)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
        model.to(test_device)
        model.eval()
        model = turbo_transformers.RobertaModel.from_torch(model)
    else:
        raise (f"benchmark does not support {model_name}")

    cfg = model.config  # type: transformers.BertConfig
    input_ids = torch.randint(low=0,
                              high=cfg.vocab_size - 1,
                              size=(batch_size, seq_len),
                              dtype=torch.long,
                              device=test_device)

    benchmark_helper.run_model(lambda: model(input_ids), True, n, batch_size,
                               seq_len, "turbo")
def benchmark_torch(model_name: str, seq_len: int, batch_size: int, n: int,
                    enable_random: bool, max_seq_len: int, min_seq_len: int,
                    num_threads: int, use_gpu: bool, enable_mem_opt: bool):
    import torch
    import transformers
    import benchmark_helper

    test_device = torch.device('cuda:0') if use_gpu else torch.device('cpu:0')
    torch.set_grad_enabled(False)
    torch.set_num_threads(num_threads)

    cfg = None
    if model_name == "bert":
        cfg = transformers.BertConfig()
        model = transformers.BertModel(cfg)
    elif model_name == "albert":
        cfg = transformers.AlbertConfig()
        model = transformers.AlbertModel(cfg)
    elif model_name == "roberta":
        cfg = transformers.RobertaConfig()
        model = transformers.RobertaModel(cfg)
    elif model_name == "distilbert":
        cfg = transformers.DistilBertConfig()
        model = transformers.DistilBertModel(cfg)
    else:
        raise (f"benchmark does not support {model_name}")
    model.eval()
    model.to(test_device)

    # cfg = model.config  # type: transformers.BertConfig
    if enable_random:
        benchmark_helper.run_variable_model(model, use_gpu, n, max_seq_len,
                                            min_seq_len, "torch", num_threads,
                                            cfg, enable_mem_opt, model_name)
    else:
        input_ids = torch.randint(low=0,
                                  high=cfg.vocab_size - 1,
                                  size=(batch_size, seq_len),
                                  dtype=torch.long,
                                  device=test_device)
        benchmark_helper.run_model(lambda: model(input_ids), use_gpu, n,
                                   batch_size, seq_len, "torch", num_threads,
                                   enable_mem_opt, model_name)
Example #10
0
def trainModelForBatches(tweets, scores, batchStart, batchEnd, task):
    # while part < linesToRead:
    #     if part*chunksEnd > i:
    #         batchTweets = lstOfTweets[chunksStart:]
    print('len of tweets to consider at the moment:', len(tweets),
          'len of scores', len(scores))
    df = pd.DataFrame(list(zip(tweets, scores)))
    # print(i)
    # if df.is_empty():
    #     print('empty df!!')
    #     sys.exit()

    # sys.exit()
    # 2000 r = 0.27 ok-ram
    # 20000 r = 0.67 ok-ram
    # 30000 r = 0.66 ok-ram
    # 40000 r = 0.66 ok-ram
    # 50000 r = 0.67 ok ram
    # 60000 r =0.67 ok ram
    # 70000 r = 0.65 ok-ram
    # 80000 r = 0.67 ok-ram
    # 90000 r = ? not-ram
    # 100000 r = ? not-ram
    print(df.head())
    # print(df['subtask_a'])
    # sys.exit()
    print('********************************************************')
    # apply le on categorical feature columns
    # X[categorical_cols] = X[categorical_cols].apply(lambda col: le.fit_transform(col))
    # X[categorical_cols].head(10)
    # print(df['subtask_a'].apply(lambda col: le.fit_transform(col)))
    # Not working

    # df['subtask_a_encoded'] = 0
    print(df.head())
    print(df.shape)

    # labelEncodedTaskA = []
    # labelEncodedTaskB = []
    # labelEncodedTaskC = []

    # for row in df.itertuples():
    # #    print(row[4])
    #     if row[3] == 'OFF':
    #         labelEncodedTaskA.append(1)
    #     else:
    #         labelEncodedTaskA.append(0)

    #     if row[4] == 'TIN':
    #         labelEncodedTaskB.append(1)
    #     else:
    #         labelEncodedTaskB.append(0)
    # df['LabelEncodedTaskA'] = labelEncodedTaskA
    # df['LabelEncodedTaskB'] = labelEncodedTaskB
    # print(df.tweet[:5])
    # df[['subtask_a', 'subtask_a_encoded']] = df[['subtask_a', 'subtask_a_encoded']].apply(lambda colA, colB: if colA == 'OFF': print('ofensive', colB))

    # df[['subtask_a', 'subtask_a_encoded']] = df[['subtask_a', 'subtask_a_encoded']].apply(lambda colA, colB: print(colA, colB))
    # id, tweet, sA, sB, sC, sAE
    #  1    2    3   4   5    6
    # for row in df.itertuples():
    # print(row[3])
    # if row[3] == 'OFf':'
    #    row[6] = 1'
    # else:
    #     row[6] = 0

    #sys.exit()
    # batch_1 = df[:2000]
    # configuration
    # configuration = DistilBertConfig()

    # # Initializing a model from the configuration
    # model = DistilBertModel(configuration)

    # # Accessing the model configuration
    # configuration = model.config
    # Configuration parameters
    # vocab_size (int, optional, defaults to 30522) – Vocabulary size of the DistilBERT model. Defines the different tokens that can be represented by the inputs_ids passed to the forward method of BertModel.

    # max_position_embeddings (int, optional, defaults to 512) – The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048).

    # sinusoidal_pos_embds (boolean, optional, defaults to False) – Whether to use sinusoidal positional embeddings.

    # n_layers (int, optional, defaults to 6) – Number of hidden layers in the Transformer encoder.

    # n_heads (int, optional, defaults to 12) – Number of attention heads for each attention layer in the Transformer encoder.

    # dim (int, optional, defaults to 768) – Dimensionality of the encoder layers and the pooler layer.

    # hidden_dim (int, optional, defaults to 3072) – The size of the “intermediate” (i.e., feed-forward) layer in the Transformer encoder.

    # dropout (float, optional, defaults to 0.1) – The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.

    # attention_dropout (float, optional, defaults to 0.1) – The dropout ratio for the attention probabilities.

    # activation (str or function, optional, defaults to “gelu”) – The non-linear activation function (function or string) in the encoder and pooler. If string, “gelu”, “relu”, “swish” and “gelu_new” are supported.

    # initializer_range (float, optional, defaults to 0.02) – The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

    # qa_dropout (float, optional, defaults to 0.1) – The dropout probabilities used in the question answering model DistilBertForQuestionAnswering.

    # seq_classif_dropout (float, optional, defaults to 0.2) – The dropout probabilities used in the sequence classification model DistilBertForSequenceClassification.
    config = ppb.RobertaConfig()
    # Initializing a DistilBERT
    model_class, tokenizer_class, pretrained_weights = (ppb.RobertaModel,
                                                        ppb.RobertaTokenizer,
                                                        'roberta-base-uncased')

    ## Want BERT instead of distilBERT? Uncomment the following line:
    #model_class, tokenizer_class, pretrained_weights = (ppb.BertModel, ppb.BertTokenizer, 'bert-base-uncased')

    # Load pretrained model/tokenizer
    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
    model = Classifier(base_model=RoBERTa)
    # model = Classifier()               # Load base model
    # model.fit(trainX, trainY)          # Finetune base model on custom data
    # predictions= model.predict(testX)  # ['class_2', 'class_1', 'class_3'...]
    # probs = model.predict_proba(testX) # [{'class_1': 0.23, 'class_2': 0.54, ..}, ..]
    # model.save(path)                   # Serialize the model to disk
    # model = model_class.from_pretrained(pretrained_weights)
    # model = torch.nn.parallel.DistributedDataParallel(model)
    #model.to(device)

    tokenized = df[0].apply(
        (lambda x: tokenizer.encode(x, add_special_tokens=True)))
    # tokenized = df.tweet.apply((lambda x: tokenizer.encode(x, add_special_tokens=True)))
    # tokenized = batch_1[0].apply((lambda x: tokenizer.encode(x, add_special_tokens=True)))

    print(tokenized)
    #sys.exit()

    max_len = 0
    for i in tokenized.values:
        if len(i) > max_len:
            max_len = len(i)

    padded = np.array([i + [0] * (max_len - len(i)) for i in tokenized.values])
    print(padded)

    attention_mask = np.where(padded != 0, 1, 0)

    input_ids = torch.tensor(np.array(padded))
    attention_mask = torch.tensor(attention_mask)

    #input_ids = input_ids.to(device)
    #attention_mask = attention_mask.to(device)

    print(attention_mask.shape)
    print('att Mask', attention_mask)

    print("\n * input_ids_tensor \n ")
    print(input_ids)
    print(input_ids.device)

    # print("\n * segment_ids_tensor \n ")
    # print(segment_ids_tensor)
    # print(segment_ids_tensor.device)

    print("\n * input_mask_tensor \n ")
    print(attention_mask)
    print(attention_mask.device)

    print("\n * self.device \n ")
    print(device)

    # sys.exit()

    with torch.no_grad():
        last_hidden_states = model(input_ids, attention_mask=attention_mask)

    # Slice the output for the first position for all the sequences, take all hidden unit outputs
    # All the [CLS] tokens for classification purposes
    # features = last_hidden_states[0][:,0,:].numpy()

    # TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
    features = last_hidden_states[0][:, 0, :].cpu().numpy()
    dfFeaturesSave = pd.DataFrame(zip(features, df[1]))
    dfFeaturesSave.to_csv(
        f'./FeaturesCompilation/FeaturesAndLabelsForTask{task}BatchStart{batchStart}BatchEnd{batchEnd}.csv'
    )
    # labels = df['LabelEncodedTaskA']
    # labels = batch_1[1]
    # labels = df[1]
    # # del df
    del dfFeaturesSave
    # del model
    del tokenizer
    del tokenized
    del padded
    del attention_mask
    del input_ids
    del last_hidden_states
    gc.collect()