nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=args.work_dir, create_tb_writer=True, files_to_copy=[__file__], add_time_to_log_dir=True) special_tokens = ['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'] data_desc = BERTPretrainingDataDesc(args.dataset_name, args.data_dir, args.vocab_size, args.sample_size, special_tokens, 'train.txt') if args.tokenizer == "sentence-piece": nf.logger.info("To use SentencePieceTokenizer.") tokenizer = nemo_nlp.SentencePieceTokenizer( model_path=data_desc.tokenizer_model) tokenizer.add_special_tokens(special_tokens) elif args.tokenizer == "nemo-bert": nf.logger.info("To use NemoBertTokenizer.") vocab_file = os.path.join(args.data_dir, 'vocab.txt') # To train on a Chinese dataset, use NemoBertTokenizer tokenizer = nemo_nlp.NemoBertTokenizer(vocab_file=vocab_file) else: raise ValueError("Please add your tokenizer" " or use sentence-piece or nemo-bert.") bert_model = nemo_nlp.huggingface.BERT(vocab_size=tokenizer.vocab_size, num_layers=args.num_layers, d_model=args.d_model, num_heads=args.num_heads, d_inner=args.d_inner,
if args.fp16 == 1: optimization_level = nemo.core.Optimization.mxprO1 elif args.fp16 == 2: optimization_level = nemo.core.Optimization.mxprO2 elif args.fp16 == 3: optimization_level = nemo.core.Optimization.mxprO3 else: optimization_level = nemo.core.Optimization.mxprO0 neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=optimization_level, placement=device) tokenizer = nemo_nlp.SentencePieceTokenizer(model_path=args.tokenizer_model) tokenizer.add_special_tokens(["[MASK]", "[CLS]", "[SEP]"]) vocab_size = 8 * math.ceil(tokenizer.vocab_size / 8) bert_model = nemo_nlp.huggingface.BERT(vocab_size=vocab_size, num_layers=args.num_layers, d_model=args.d_model, num_heads=args.num_heads, d_inner=args.d_inner, max_seq_length=args.max_sequence_length, hidden_act="gelu", factory=neural_factory) # instantiate necessary modules for the whole translation pipeline, namely # data layers, BERT encoder, and MLM and NSP loss functions mlm_log_softmax = nemo_nlp.TransformerLogSoftmaxNM(vocab_size=vocab_size,