def __init__(self, args, data): super(B2H, self).__init__() ###Networks self.word_feature_extractor = args.word_extractor self.use_char = args.use_char self.char_feature_extractor = args.char_extractor self.use_crf = args.use_crf ## Training self.average_batch_loss = args.average_batch_loss self.status = args.status ### Hyperparameters self.cnn_layer = args.cnn_layer self.iteration = args.iteration self.batch_size = args.batch_size self.char_hidden_dim = args.char_hidden_dim self.hidden_dim = args.hidden_dim self.dropout = args.dropout self.lstm_layer = args.lstm_layer self.bilstm = args.bilstm self.gpu = args.gpu self.optimizer = args.optimizer self.lr = args.lr self.lr_decay = args.lr_decay self.clip = args.clip self.momentum = args.momentum self.l2 = args.l2 #Dual Network Modules... self.wordrep = WordRep(args, data) # information interaction unit self.inter_unit = B2HInterUnit( args.word_emb_dim + args.char_hidden_dim, data.llabelset_size, 1, F.relu) # component of Dual model self.H_ner = BiLSTMCRF(args, input_size=args.char_hidden_dim + args.word_emb_dim, tagset_size=data.hlabelset_size, name='High layer model') self.B_ner = BiLSTMCRF(args, input_size=args.char_hidden_dim + args.word_emb_dim, tagset_size=data.llabelset_size, name='bottom layer model') print("build B2H sequence labeling network...")
class B2H(nn.Module): def __init__(self, args, data): super(B2H, self).__init__() ###Networks self.word_feature_extractor = args.word_extractor self.use_char = args.use_char self.char_feature_extractor = args.char_extractor self.use_crf = args.use_crf ## Training self.average_batch_loss = args.average_batch_loss self.status = args.status ### Hyperparameters self.cnn_layer = args.cnn_layer self.iteration = args.iteration self.batch_size = args.batch_size self.char_hidden_dim = args.char_hidden_dim self.hidden_dim = args.hidden_dim self.dropout = args.dropout self.lstm_layer = args.lstm_layer self.bilstm = args.bilstm self.gpu = args.gpu self.optimizer = args.optimizer self.lr = args.lr self.lr_decay = args.lr_decay self.clip = args.clip self.momentum = args.momentum self.l2 = args.l2 #Dual Network Modules... self.wordrep = WordRep(args, data) # information interaction unit self.inter_unit = B2HInterUnit( args.word_emb_dim + args.char_hidden_dim, data.llabelset_size, 1, F.relu) # component of Dual model self.H_ner = BiLSTMCRF(args, input_size=args.char_hidden_dim + args.word_emb_dim, tagset_size=data.hlabelset_size, name='High layer model') self.B_ner = BiLSTMCRF(args, input_size=args.char_hidden_dim + args.word_emb_dim, tagset_size=data.llabelset_size, name='bottom layer model') print("build B2H sequence labeling network...") def calculate_loss(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_hlabel, batch_llabel, mask): word_represent = self.wordrep(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) B2HB_outs, B2HB_loss, B2HB_tag_seqs = self.B_ner.calculate_loss( word_represent, word_seq_lengths, batch_llabel, mask) low_rep = self.inter_unit(word_represent, B2HB_outs) # concat high level information to do high level tagging B2HH_outs, B2HH_loss, B2HH_tag_seqs = self.H_ner.calculate_loss( low_rep, word_seq_lengths, batch_hlabel, mask) return B2HB_loss, B2HH_loss, B2HB_tag_seqs, B2HH_tag_seqs #todo for a while... def forward(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): word_represent = self.wordrep(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) B2HB_outs, B2HB_tag_seqs = self.B_ner(word_represent, word_seq_lengths, mask) low_rep = self.inter_unit(word_represent, B2HB_outs) # concat low level information to do low level tagging # concat high level information to do high level tagging _, B2HH_tag_seqs = self.H_ner(low_rep, word_seq_lengths, mask) return B2HB_tag_seqs, B2HH_tag_seqs def show_model_summary(self, logger): logger.info(" " + "++" * 20) logger.info(" Model Network:") logger.info(" Model use_crf: %s" % (self.use_crf)) logger.info(" Model word extractor: %s" % (self.word_feature_extractor)) logger.info(" Model use_char: %s" % (self.use_char)) if self.use_char: logger.info(" Model char extractor: %s" % (self.char_feature_extractor)) logger.info(" Model char_hidden_dim: %s" % (self.char_hidden_dim)) logger.info(" " + "++" * 20) logger.info(" Training:") logger.info(" Optimizer: %s" % (self.optimizer)) logger.info(" Iteration: %s" % (self.iteration)) logger.info(" BatchSize: %s" % (self.batch_size)) logger.info(" Average batch loss: %s" % (self.average_batch_loss)) logger.info(" " + "++" * 20) logger.info(" Hyperparameters:") logger.info(" Hyper lr: %s" % (self.lr)) logger.info(" Hyper lr_decay: %s" % (self.lr_decay)) logger.info(" Hyper HP_clip: %s" % (self.clip)) logger.info(" Hyper momentum: %s" % (self.momentum)) logger.info(" Hyper l2: %s" % (self.l2)) logger.info(" Hyper hidden_dim: %s" % (self.hidden_dim)) logger.info(" Hyper dropout: %s" % (self.dropout)) logger.info(" Hyper lstm_layer: %s" % (self.lstm_layer)) logger.info(" Hyper bilstm: %s" % (self.bilstm)) logger.info(" Hyper GPU: %s" % (self.gpu)) logger.info("Model SUMMARY END.") logger.info("++" * 50) sys.stdout.flush()