tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') trans_func = partial(convert_example, tokenizer=tokenizer, max_seq_length=args.max_seq_length) batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # text_input Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # text_segment ): [data for data in fn(samples)] pretrained_model = ppnlp.transformers.ErnieModel.from_pretrained( "ernie-1.0") model = SemanticIndexBase(pretrained_model, output_emb_size=args.output_emb_size) model = paddle.DataParallel(model) # Load pretrained semantic model if args.params_path and os.path.isfile(args.params_path): state_dict = paddle.load(args.params_path) model.set_dict(state_dict) logger.info("Loaded parameters from %s" % args.params_path) else: raise ValueError( "Please set --params_path with correct pretrained model file") id2corpus = gen_id2corpus(args.corpus_file) # conver_example function's input must be dict corpus_list = [{idx: text} for idx, text in id2corpus.items()]
tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') trans_func = partial(convert_example, tokenizer=tokenizer, max_seq_length=args.max_seq_length) batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # text_input Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # text_segment ): [data for data in fn(samples)] pretrained_model = ppnlp.transformers.ErnieModel.from_pretrained( "ernie-1.0") model = SemanticIndexBase(pretrained_model, output_emb_size=args.output_emb_size) # load pretrained semantic model if args.params_path and os.path.isfile(args.params_path): state_dict = paddle.load(args.params_path) model.set_dict(state_dict) logger.info("Loaded parameters from %s" % args.params_path) else: raise ValueError( "Please set --params_path with correct pretrained model file") id2corpus = gen_id2corpus(args.corpus_file) # conver_example function's input must be dict corpus_list = [{idx: text} for idx, text in id2corpus.items()] corpus_ds = MapDataset(corpus_list)