additional_params.update(net.predict_end.collect_params()) elif args.bidaf_style_out: additional_params.update(net.modeling_layer.collect_params()) additional_params.update(net.output_layer.collect_params()) if args.apply_self_attention: net.multi_head_attention.collect_params().initialize(ctx=ctx) additional_params.update(net.multi_head_attention.collect_params()) if args.apply_transformer: net.transformer.collect_params().initialize(ctx=ctx) additional_params.update(net.transformer.collect_params()) net.hybridize(static_alloc=True) loss_function = net.loss(customize_loss=args.customize_loss) loss_function.hybridize(static_alloc=True) if version_2 and VERIFIER_ID is not None: if VERIFIER_ID == 0: verifier = AnswerVerifyThreshold( tokenizer=nlp.data.BERTBasicTokenizer(lower=lower), max_answer_length=max_answer_length, n_best_size=n_best_size, max_len=max_seq_length, version_2=version_2, ctx=verify_ctx) elif VERIFIER_ID == 1: verifier = AnswerVerify( tokenizer=nlp.data.BERTBasicTokenizer(lower=lower), max_answer_length=max_answer_length,
elif pretrained: # only load BertModel parameters net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else: # no checkpoint is loaded net.initialize(init=mx.init.Normal(0.02), ctx=ctx) if args.apply_coattention: net.co_attention.collect_params().initialize(ctx=ctx) if args.apply_self_attention: net.multi_head_attention.collect_params().initialize(ctx=ctx) net.hybridize(static_alloc=True) loss_function = net.loss() loss_function.hybridize(static_alloc=True) if args.verify: if VERIFIER_ID == 1: verifier = AnswerVerify( tokenizer=nlp.data.BERTBasicTokenizer(lower=lower), max_answer_length=max_answer_length, null_score_diff_threshold=null_score_diff_threshold, n_best_size=n_best_size, max_len=max_seq_length, version_2=version_2, ctx=verify_ctx ) # debug: to be moved onto another GPU latter if space issue happens elif VERIFIER_ID == 2: verifier = AnswerVerify2(version_2=version_2,