def __init__(self, pc: BK.ParamCollection, sconf: FpScorerConf): super().__init__(pc, None, None) # options input_dim = sconf._input_dim arc_space = sconf.arc_space lab_space = sconf.lab_space transform_act = sconf.transform_act # self.input_dim = input_dim self.num_label = sconf._num_label self.mask_value = Constants.REAL_PRAC_MIN # attach/arc if arc_space > 0: self.arc_f = self.add_sub_node( "af", Affine(pc, input_dim, arc_space, act=transform_act)) else: self.arc_f = None arc_space = input_dim self.arc_scorer = self.add_sub_node( "as", Affine(pc, arc_space, 1, init_rop=NoDropRop())) # labeling if lab_space > 0: self.lab_f = self.add_sub_node( "lf", Affine(pc, input_dim, lab_space, act=transform_act)) else: self.lab_f = None lab_space = input_dim self.lab_scorer = self.add_sub_node( "ls", Affine(pc, lab_space, self.num_label, init_rop=NoDropRop()))
def __init__(self, pc, conf: RealisTypePredictorConf, vocab: HLabelVocab, input_enc_dims): super().__init__(pc, None, None) self.conf = conf self.vocab = vocab assert vocab.nil_as_zero VOCAB_LAYER = -1 # todo(note): simply use final largest layer self.lidx2hlidx = vocab.layered_hlidx[ VOCAB_LAYER] # int-idx -> HLabelIdx # scorer self.adp = self.add_sub_node( 'adp', TaskSpecAdp(pc, input_enc_dims, [], conf.hidden_dim)) adp_hidden_size = self.adp.get_output_dims()[0] # fixed types for realis self.realis_predictor = self.add_sub_node( 'pr', Affine(pc, adp_hidden_size, len(EVENT_REALIS_LIST), init_rop=NoDropRop())) # type predictor as a possible aux task self.type_predictor = self.add_sub_node( 'pt', Affine(pc, adp_hidden_size, len(self.lidx2hlidx), init_rop=NoDropRop()))
def __init__(self, pc, input_dims: List[int], use_affs: List[bool], out_dim: int, out_act='linear', out_drop=0., param_init_scale=1.): super().__init__(pc, None, None) # ----- self.input_dims = input_dims self.use_affs = use_affs self.out_dim = out_dim # ===== assert len(input_dims) == len(use_affs) self.aff_nodes = [] for d, use in zip(input_dims, use_affs): if use: one_aff = self.add_sub_node( "aff", Affine(pc, d, out_dim, init_scale=param_init_scale, init_rop=NoDropRop())) else: assert d == out_dim, f"Dimension mismatch for skipping affine: {d} vs {out_dim}!" one_aff = None self.aff_nodes.append(one_aff) self.out_act_f = ActivationHelper.get_act(out_act) self.out_drop = self.add_sub_node( "drop", Dropout(pc, (out_dim, ), fix_rate=out_drop))
def __init__(self, pc: BK.ParamCollection, pname: str, input_dim: int, conf: SeqLabNodeConf, inputter: Inputter): super().__init__(pc, conf, name="SLB") self.conf = conf self.inputter = inputter self.input_dim = input_dim # this step is performed at the embedder, thus still does not influence the inputter self.add_root_token = self.inputter.embedder.add_root_token # -- self.pname = pname self.attr_name = pname + "_seq" # attribute name in Instance self.vocab = inputter.vpack.get_voc(pname) # todo(note): we must make sure that 0 means NAN assert self.vocab.non == 0 # models if conf.hid_dim <= 0: # no hidden layer self.hid_layer = None self.pred_input_dim = input_dim else: self.hid_layer = self.add_sub_node( "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act)) self.pred_input_dim = conf.hid_dim self.pred_out_dim = self.vocab.unk # todo(note): UNK is the prediction boundary self.pred_layer = self.add_sub_node( "pr", Affine(pc, self.pred_input_dim, self.pred_out_dim, init_rop=NoDropRop()))
def __init__(self, pc: BK.ParamCollection, conf: MaskLMNodeConf, vpack: VocabPackage): super().__init__(pc, None, None) self.conf = conf # vocab and padder self.word_vocab = vpack.get_voc("word") self.padder = DataPadder( 2, pad_vals=self.word_vocab.pad, mask_range=2) # todo(note): <pad>-id is very large # models self.hid_layer = self.add_sub_node( "hid", Affine(pc, conf._input_dim, conf.hid_dim, act=conf.hid_act)) self.pred_layer = self.add_sub_node( "pred", Affine(pc, conf.hid_dim, conf.max_pred_rank + 1, init_rop=NoDropRop())) if conf.init_pred_from_pretrain: npvec = vpack.get_emb("word") if npvec is None: zwarn( "Pretrained vector not provided, skip init pred embeddings!!" ) else: with BK.no_grad_env(): self.pred_layer.ws[0].copy_( BK.input_real(npvec[:conf.max_pred_rank + 1].T)) zlog( f"Init pred embeddings from pretrained vectors (size={conf.max_pred_rank+1})." )
def __init__(self, pc: BK.ParamCollection, jconf: JPosConf, pos_vocab): super().__init__(pc, None, None) self.jpos_stacking = jconf.jpos_stacking self.jpos_multitask = jconf.jpos_multitask self.jpos_lambda = jconf.jpos_lambda self.jpos_decode = jconf.jpos_decode # encoder0 jconf.jpos_enc._input_dim = jconf._input_dim self.enc = self.add_sub_node("enc0", MyEncoder(self.pc, jconf.jpos_enc)) self.enc_output_dim = self.enc.get_output_dims()[0] # output # todo(warn): here, include some other things for convenience num_labels = len(pos_vocab) self.pred = self.add_sub_node( "pred", Affine(self.pc, self.enc_output_dim, num_labels, init_rop=NoDropRop())) # further stacking (if not, then simply multi-task learning) if jconf.jpos_stacking: self.pos_weights = self.add_param( "w", (num_labels, self.enc_output_dim)) # [n, dim] to be added else: self.pos_weights = None
def __init__(self, pc, input_dim, hid_dim: int, hid_act='linear', hid_drop=0., hid_piece4init=1, out_dim=0, out_fbias=0., out_fact="linear", out_piece4init=1, init_scale=1.): super().__init__(pc, None, None) # ----- # hidden layer self.hid_aff = self.add_sub_node( "hidden", Affine(pc, input_dim, hid_dim, n_piece4init=hid_piece4init, init_scale=init_scale, init_rop=NoDropRop())) self.hid_act_f = ActivationHelper.get_act(hid_act) self.hid_drop = self.add_sub_node( "drop", Dropout(pc, (hid_dim, ), fix_rate=hid_drop)) # ----- # output layer (optional) self.final_output_dim = hid_dim # todo(+N): how about split hidden layers for each specific output self.out_fbias = out_fbias # fixed extra bias self.out_act_f = ActivationHelper.get_act(out_fact) # no output dropouts if out_dim > 0: assert hid_act != "linear", "Please use non-linear activation for mlp!" assert hid_piece4init == 1, "Strange hid_piece4init for hidden layer with out_dim>0" self.final_aff = self.add_sub_node( "final", Affine(pc, hid_dim, out_dim, n_piece4init=out_piece4init, init_scale=init_scale, init_rop=NoDropRop())) self.final_output_dim = out_dim else: self.final_aff = None
def __init__(self, pc, conf: ArgSpanExpanderConf, input_enc_dims): super().__init__(pc, None, None) self.conf = conf # assert not conf.use_binary_scorer, "this mode seems problematic!!" # todo(note): only using bert's ones, simply flatten bert_input_dims, _ = input_enc_dims bert_dim, bert_fold = bert_input_dims flatten_bert_dim = bert_dim * bert_fold self.flatten_bert_dim = flatten_bert_dim # scoring params self.use_lstm_scorer = conf.use_lstm_scorer if self.use_lstm_scorer: self.llstm = self.add_sub_node( "llstm", RnnNode.get_rnn_node("lstm2", pc, flatten_bert_dim, conf.hid_dim)) self.rlstm = self.add_sub_node( "rlstm", RnnNode.get_rnn_node("lstm2", pc, flatten_bert_dim, conf.hid_dim)) self.lscorer = self.add_sub_node( "ls", Affine(pc, conf.hid_dim, 1, init_rop=NoDropRop())) self.rscorer = self.add_sub_node( "rs", Affine(pc, conf.hid_dim, 1, init_rop=NoDropRop())) else: self.lscorer = self.add_sub_node( "ls", get_mlp(pc, [flatten_bert_dim, flatten_bert_dim], 1, hidden_which_affine=3, n_hidden=conf.hid_dim, n_hidden_layer=1, hidden_act='elu', final_act="linear", final_bias=False, final_init_rop=NoDropRop())) self.rscorer = self.add_sub_node( "rs", get_mlp(pc, [flatten_bert_dim, flatten_bert_dim], 1, hidden_which_affine=3, n_hidden=conf.hid_dim, n_hidden_layer=1, hidden_act='elu', final_act="linear", final_bias=False, final_init_rop=NoDropRop()))
def __init__(self, pc, conf: NodeExtractorConfGene0, vocab: HLabelVocab, extract_type: str): super().__init__(pc, conf, vocab, extract_type) # decoding # ----- # the two parts: actually in biaffine attention forms # transform embeddings for attention match (token evidence) self.T_tok = self.add_sub_node( "at", Affine(pc, conf.lab_conf.n_dim, conf._input_dim, init_rop=NoDropRop())) # transform embeddings for global match (sent evidence) self.T_sent = self.add_sub_node( "as", Affine(pc, conf.lab_conf.n_dim, conf._input_dim, init_rop=NoDropRop())) # to be refreshed self.query_tok = None # [L, D] self.query_sent = None # [L, D] # ----- # how to combine the two parts: fix lambda or dynamic gated (with the input features) self.lambda_score_tok = conf.lambda_score_tok if self.lambda_score_tok < 0.: # auto mode: using an MLP (make hidden size equal to input//4) self.score_gate = self.add_sub_node( "mix", get_mlp(pc, [conf._input_dim] * 4, 1, conf._input_dim, hidden_act="elu", final_act="sigmoid", final_init_rop=NoDropRop(), hidden_which_affine=3)) else: self.score_gate = None
def __init__(self, pc, conf: CandidateExtractorConf, input_enc_dims): super().__init__(pc, None, None) self.conf = conf # scorer self.adp = self.add_sub_node( 'adp', TaskSpecAdp(pc, input_enc_dims, [], conf.hidden_dim)) adp_hidden_size = self.adp.get_output_dims()[0] self.predictor = self.add_sub_node( 'pred', Affine(pc, adp_hidden_size, 2, init_rop=NoDropRop())) # 0 as nil # others self.id_counter = defaultdict( int) # docid->ef-count (make sure unique ef-id) self.valid_hlidx = HLabelIdx(["unk"], [1])
def __init__(self, pc, conf: NodeSelectorConf): super().__init__(pc, None, None) self.conf = conf self.input_dim = conf._input_dim self.scorer = self.add_sub_node( "sc", get_mlp(pc, self.input_dim, 1, conf.mlp_hidden_dim, n_hidden_layer=conf.mlp_hidden_layer, hidden_act=conf.mlp_hidden_act, final_init_rop=NoDropRop())) # loss function self.loss_prob, self.loss_hinge = [ conf.ns_loss == z for z in ["prob", "hinge"] ]
def __init__(self, pc: BK.ParamCollection, comp_name: str, ec_conf: EmbedderCompConf, conf: EmbedderNodeConf, vpack: VocabPackage): super().__init__(pc, comp_name, ec_conf, conf, vpack) # ----- # get embeddings npvec = None if self.ec_conf.comp_init_from_pretrain: npvec = vpack.get_emb(comp_name) zlog(f"Try to init InputEmbedNode {comp_name} with npvec.shape={npvec.shape if (npvec is not None) else None}") if npvec is None: zwarn("Warn: cannot get pre-trained embeddings to init!!") # get rare unk range # - get freq vals, make sure special ones will not be pruned; todo(note): directly use that field voc_rare_mask = [float(z is not None and z<=ec_conf.comp_rare_thr) for z in self.voc.final_vals] self.rare_mask = BK.input_real(voc_rare_mask) self.use_rare_unk = (ec_conf.comp_rare_unk>0. and ec_conf.comp_rare_thr>0) # -- # dropout outside explicitly self.E = self.add_sub_node(f"E{self.comp_name}", Embedding( pc, len(self.voc), self.comp_dim, fix_row0=conf.embed_fix_row0, npvec=npvec, name=comp_name, init_rop=NoDropRop(), init_scale=self.comp_init_scale)) self.create_dropout_node()
def __init__(self, pc: BK.ParamCollection, pname: str, input_dim: int, conf: SeqCrfNodeConf, inputter: Inputter): super().__init__(pc, conf, name="CRF") self.conf = conf self.inputter = inputter self.input_dim = input_dim # this step is performed at the embedder, thus still does not influence the inputter self.add_root_token = self.inputter.embedder.add_root_token # -- self.pname = pname self.attr_name = pname + "_seq" # attribute name in Instance self.vocab = inputter.vpack.get_voc(pname) # todo(note): we must make sure that 0 means NAN assert self.vocab.non == 0 # models if conf.hid_dim <= 0: # no hidden layer self.hid_layer = None self.pred_input_dim = input_dim else: self.hid_layer = self.add_sub_node( "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act)) self.pred_input_dim = conf.hid_dim self.tagset_size = self.vocab.unk # todo(note): UNK is the prediction boundary self.pred_layer = self.add_sub_node( "pr", Affine(pc, self.pred_input_dim, self.tagset_size + 2, init_rop=NoDropRop())) # transition matrix init_transitions = np.zeros( [self.tagset_size + 2, self.tagset_size + 2]) init_transitions[:, START_TAG] = -10000.0 init_transitions[STOP_TAG, :] = -10000.0 init_transitions[:, 0] = -10000.0 init_transitions[0, :] = -10000.0 self.transitions = self.add_param( "T", (self.tagset_size + 2, self.tagset_size + 2), init=init_transitions)
def __init__(self, pc, conf: HLabelNodeConf, hl_vocab: HLabelVocab, eff_max_layer=None): super().__init__(pc, None, None) self.conf = conf self.hl_vocab = hl_vocab assert self.hl_vocab.nil_as_zero # for each layer, the idx=0 is the full-NIL # basic pool embeddings npvec = hl_vocab.pool_init_vec if not conf.pool_init_hint: npvec = None else: assert npvec is not None, "pool-init not provided by the Vocab!" n_dim, n_pool = conf.n_dim, len(hl_vocab.pools_k) self.pool_pred = self.add_sub_node( "pp", Embedding( pc, n_pool, n_dim, fix_row0=conf.zero_nil, npvec=npvec, init_rop=(NoDropRop() if conf.nodrop_pred_embeds else None))) if conf.tie_embeds: self.pool_lookup = self.pool_pred else: self.pool_lookup = self.add_sub_node( "pl", Embedding(pc, n_pool, n_dim, fix_row0=conf.zero_nil, npvec=npvec, init_rop=(NoDropRop() if conf.nodrop_lookup_embeds else None))) # layered labels embeddings (to be refreshed) self.max_layer = hl_vocab.max_layer self.layered_embeds_pred = [None] * self.max_layer self.layered_embeds_lookup = [None] * self.max_layer self.layered_prei = [ None ] * self.max_layer # previous layer i, for score combining self.layered_isnil = [None] * self.max_layer # whether is nil(None) self.zero_nil = conf.zero_nil # lookup summer assert conf.strategy_predict == "sum" self.lookup_is_sum, self.lookup_is_ff = [ conf.strategy_lookup == z for z in ["sum", "ff"] ] if self.lookup_is_ff: self.lookup_summer = self.add_sub_node( "summer", Affine(pc, [n_dim] * self.max_layer, n_dim, act="tanh")) elif self.lookup_is_sum: self.sum_dropout = self.add_sub_node("sdrop", Dropout(pc, (n_dim, ))) self.lookup_summer = lambda embeds: self.sum_dropout( BK.stack(embeds, 0).sum(0)) else: raise NotImplementedError( f"UNK strategy_lookup: {conf.strategy_lookup}") # bias for prediction self.prediction_sizes = [ len(hl_vocab.layered_pool_links_padded[i]) for i in range(self.max_layer) ] if conf.bias_predict: self.biases_pred = [ self.add_param(name="B", shape=(x, )) for x in self.prediction_sizes ] else: self.biases_pred = [None] * self.max_layer # ===== # training self.is_hinge_loss, self.is_prob_loss = [ conf.loss_function == z for z in ["hinge", "prob"] ] self.loss_lambdas = conf.loss_lambdas + [1.] * ( self.max_layer - len(conf.loss_lambdas)) # loss scale self.margin_lambdas = conf.margin_lambdas + [0.] * ( self.max_layer - len(conf.margin_lambdas)) # margin scale self.lookup_soft_alphas = conf.lookup_soft_alphas + [1.] * ( self.max_layer - len(conf.lookup_soft_alphas)) self.loss_fullnil_weight = conf.loss_fullnil_weight # ====== # set current effective max_layer self.eff_max_layer = self.max_layer if eff_max_layer is not None: self.set_eff_max_layer(eff_max_layer)
def __init__(self, pc, dim: int, conf: VRecConf): super().__init__(pc, None, None) self.conf = conf self.dim = dim # ===== # Feat if conf.feat_mod == "matt": self.feat_node = self.add_sub_node( "feat", MAttNode(pc, dim, dim, dim, conf.matt_conf)) self.attn_count = conf.matt_conf.head_count elif conf.feat_mod == "fcomb": self.feat_node = self.add_sub_node( "feat", FCombNode(pc, dim, dim, dim, conf.fc_conf)) self.attn_count = conf.fc_conf.fc_count else: raise NotImplementedError() feat_out_dim = self.feat_node.get_output_dims()[0] # ===== # Combiner if conf.comb_mode == "affine": self.comb_aff = self.add_sub_node( "aff", AffineCombiner(pc, [dim, feat_out_dim], [conf.comb_affine_q, conf.comb_affine_v], dim, out_act=conf.comb_affine_act, out_drop=conf.comb_affine_drop)) self.comb_f = lambda q, v, c: (self.comb_aff([q, v]), None) elif conf.comb_mode == "lstm": self.comb_lstm = self.add_sub_node( "lstm", LstmNode2(pc, feat_out_dim, dim)) self.comb_f = self._call_lstm else: raise NotImplementedError() # ===== # ff if conf.ff_dim > 0: self.has_ff = True self.linear1 = self.add_sub_node( "l1", Affine(pc, dim, conf.ff_dim, act=conf.ff_act, init_rop=NoDropRop())) self.dropout1 = self.add_sub_node( "d1", Dropout(pc, (conf.ff_dim, ), fix_rate=conf.ff_drop)) self.linear2 = self.add_sub_node( "l2", Affine(pc, conf.ff_dim, dim, act="linear", init_rop=NoDropRop())) self.dropout2 = self.add_sub_node( "d2", Dropout(pc, (dim, ), fix_rate=conf.ff_drop)) else: self.has_ff = False # layer norms if conf.use_pre_norm: self.att_pre_norm = self.add_sub_node("aln1", LayerNorm(pc, dim)) self.ff_pre_norm = self.add_sub_node("fln1", LayerNorm(pc, dim)) else: self.att_pre_norm = self.ff_pre_norm = None if conf.use_post_norm: self.att_post_norm = self.add_sub_node("aln2", LayerNorm(pc, dim)) self.ff_post_norm = self.add_sub_node("fln2", LayerNorm(pc, dim)) else: self.att_post_norm = self.ff_post_norm = None
def __init__(self, pc: BK.ParamCollection, comp_name: str, ec_conf: EmbedderCompConf, conf: EmbedderNodeConf, vpack: VocabPackage): super().__init__(pc, comp_name, ec_conf, conf, vpack) # ----- per_cnn_size = conf.char_cnn_hidden // len(conf.char_cnn_windows) self.char_cnns = [self.add_sub_node("char_cnn", CnnLayer( self.pc, self.comp_dim, per_cnn_size, z, pooling="max", act="tanh", init_rop=NoDropRop())) for z in conf.char_cnn_windows] self.output_dim = conf.char_cnn_hidden self.create_dropout_node()
def __init__(self, pc: BK.ParamCollection, input_dim: int, conf: MaskLMNodeConf, inputter: Inputter): super().__init__(pc, conf, name="MLM") self.conf = conf self.inputter = inputter self.input_dim = input_dim # this step is performed at the embedder, thus still does not influence the inputter self.add_root_token = self.inputter.embedder.add_root_token # vocab and padder vpack = inputter.vpack vocab_word, vocab_pos = vpack.get_voc("word"), vpack.get_voc("pos") # no mask fields self.nomask_names_set = set(conf.nomask_names) # models if conf.hid_dim <= 0: # no hidden layer self.hid_layer = None self.pred_input_dim = input_dim else: self.hid_layer = self.add_sub_node( "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act)) self.pred_input_dim = conf.hid_dim # todo(note): unk is the first one above real words self.pred_word_size = min(conf.max_pred_rank + 1, vocab_word.unk) self.pred_pos_size = vocab_pos.unk if conf.tie_input_embeddings: zwarn("Tie all preds in mlm with input embeddings!!") self.pred_word_layer = self.pred_pos_layer = None self.inputter_word_node = self.inputter.embedder.get_node("word") self.inputter_pos_node = self.inputter.embedder.get_node("pos") else: self.inputter_word_node, self.inputter_pos_node = None, None self.pred_word_layer = self.add_sub_node( "pw", Affine(pc, self.pred_input_dim, self.pred_word_size, init_rop=NoDropRop())) self.pred_pos_layer = self.add_sub_node( "pp", Affine(pc, self.pred_input_dim, self.pred_pos_size, init_rop=NoDropRop())) if conf.init_pred_from_pretrain: npvec = vpack.get_emb("word") if npvec is None: zwarn( "Pretrained vector not provided, skip init pred embeddings!!" ) else: with BK.no_grad_env(): self.pred_word_layer.ws[0].copy_( BK.input_real(npvec[:self.pred_word_size].T)) zlog( f"Init pred embeddings from pretrained vectors (size={self.pred_word_size})." ) # ===== COMBINE_METHOD_FS = { "sum": lambda xs: BK.stack(xs, -1).sum(-1), "avg": lambda xs: BK.stack(xs, -1).mean(-1), "min": lambda xs: BK.stack(xs, -1).min(-1)[0], "max": lambda xs: BK.stack(xs, -1).max(-1)[0], } self.loss_comb_f = COMBINE_METHOD_FS[conf.loss_comb_method] self.score_comb_f = COMBINE_METHOD_FS[conf.score_comb_method]
def __init__(self, pc: BK.ParamCollection, input_dim: int, conf: PlainLMNodeConf, inputter: Inputter): super().__init__(pc, conf, name="PLM") self.conf = conf self.inputter = inputter self.input_dim = input_dim self.split_input_blm = conf.split_input_blm # this step is performed at the embedder, thus still does not influence the inputter self.add_root_token = self.inputter.embedder.add_root_token # vocab and padder vpack = inputter.vpack vocab_word = vpack.get_voc("word") # models real_input_dim = input_dim // 2 if self.split_input_blm else input_dim if conf.hid_dim <= 0: # no hidden layer self.l2r_hid_layer = self.r2l_hid_layer = None self.pred_input_dim = real_input_dim else: self.l2r_hid_layer = self.add_sub_node( "l2r_h", Affine(pc, real_input_dim, conf.hid_dim, act=conf.hid_act)) self.r2l_hid_layer = self.add_sub_node( "r2l_h", Affine(pc, real_input_dim, conf.hid_dim, act=conf.hid_act)) self.pred_input_dim = conf.hid_dim # todo(note): unk is the first one above real words self.pred_size = min(conf.max_pred_rank + 1, vocab_word.unk) if conf.tie_input_embeddings: zwarn("Tie all preds in plm with input embeddings!!") self.l2r_pred = self.r2l_pred = None self.inputter_embed_node = self.inputter.embedder.get_node("word") else: self.l2r_pred = self.add_sub_node( "l2r_p", Affine(pc, self.pred_input_dim, self.pred_size, init_rop=NoDropRop())) if conf.tie_bidirect_pred: self.r2l_pred = self.l2r_pred else: self.r2l_pred = self.add_sub_node( "r2l_p", Affine(pc, self.pred_input_dim, self.pred_size, init_rop=NoDropRop())) self.inputter_embed_node = None if conf.init_pred_from_pretrain: npvec = vpack.get_emb("word") if npvec is None: zwarn( "Pretrained vector not provided, skip init pred embeddings!!" ) else: with BK.no_grad_env(): self.l2r_pred.ws[0].copy_( BK.input_real(npvec[:self.pred_size].T)) self.r2l_pred.ws[0].copy_( BK.input_real(npvec[:self.pred_size].T)) zlog( f"Init pred embeddings from pretrained vectors (size={self.pred_size})." )
def __init__(self, pc, conf: NodeExtractorConfGene1, vocab: HLabelVocab, extract_type: str): super().__init__(pc, conf, vocab, extract_type) # ----- # decoding # 1. attention for selecting token self.affine_k = self.add_sub_node("ak", Affine(pc, [conf._input_dim, 1], conf.hid_att, bias=False, which_affine=3, init_rop=NoDropRop())) self.affine_q = self.add_sub_node("aq", Affine(pc, [conf.hid_repo, conf.hid_state], conf.hid_att, bias=False, which_affine=3, init_rop=NoDropRop())) self.repos = self.add_param("r", [conf.num_repo, conf.hid_repo], lookup=True) # input is (last_hid_layer + lab_embed) self.rnn_unit = self.add_sub_node("rnn", RnnNode.get_rnn_node("lstm2", pc, 2*conf.lab_conf.n_dim, conf.hid_state)) # 2. labeling self.lab_f = self.add_sub_node("lab", Affine(pc, [conf._input_dim, 1, conf.hid_repo, conf.hid_state], conf.lab_conf.n_dim, which_affine=3, act="elu"))