def __init__(self, model: dy.ParameterCollection, in_dim: int, hid_dim: int, p: int = 0.1): pc = model.add_subcollection() self.W1 = Linear(pc, in_dim, hid_dim) self.W2 = Linear(pc, hid_dim, in_dim) self.p = p self.pc = pc self.spec = (in_dim, hid_dim, p)
def __init__(self, model: dy.ParameterCollection, in_dim: int, out_dim: int, init: dy.PyInitializer = None, bias: bool = True): pc = model.add_subcollection() if not init: init = dy.UniformInitializer(math.sqrt(in_dim)) self.W = pc.add_parameters((out_dim, in_dim), init=init) if bias: self.b = pc.add_parameters((out_dim, ), init=init) self.pc = pc self.bias = bias
def __init__(self, model: dy.ParameterCollection, cfg: IniConfigurator, vocabulary: Vocabulary): pc = model.add_subcollection() # MLP layer orth_init = OrthogonalInitializer self.head_arc_MLP = MLP(pc, cfg.ARC_MLP_SIZE, leaky_relu, cfg.MLP_DROP, cfg.MLP_BIAS, orth_init) self.head_rel_MLP = MLP(pc, cfg.REL_MLP_SIZE, leaky_relu, cfg.MLP_DROP, cfg.MLP_BIAS, orth_init) self.dept_arc_MLP = MLP(pc, cfg.ARC_MLP_SIZE, leaky_relu, cfg.MLP_DROP, cfg.MLP_BIAS, orth_init) self.dept_rel_MLP = MLP(pc, cfg.REL_MLP_SIZE, leaky_relu, cfg.MLP_DROP, cfg.MLP_BIAS, orth_init) # Biaffine Attention Layer (Arc) arc_size = cfg.ARC_MLP_SIZE[-1] zero_init = dy.ConstInitializer(0) self.arc_attn_mat = [ BiaffineMatAttention(pc, arc_size, arc_size, 1, True, False, zero_init) for _ in range(cfg.GRAPH_LAYERS + 1) ] # Biaffine Attention Layer (Rel) rel_num = vocabulary.get_vocab_size('rel') rel_size = cfg.REL_MLP_SIZE[-1] self.rel_mask = np.array([1] + [0] * (rel_num - 1)) # mask root relation self.rel_attn = BiaffineMatAttention(pc, rel_size, rel_size, rel_num, True, True, zero_init) # Graph Network Layer self.head_gnn = GraphNNUnit(pc, arc_size, arc_size, leaky_relu, orth_init) self.dept_gnn = GraphNNUnit(pc, arc_size, arc_size, leaky_relu, orth_init) self.head_rel_gnn = GraphNNUnit(pc, rel_size, rel_size, leaky_relu, orth_init) self.dept_rel_gnn = GraphNNUnit(pc, rel_size, rel_size, leaky_relu, orth_init) # Graph Layer WarmUp self.warm_list = [ -i * cfg.WARM for i in range(cfg.GRAPH_LAYERS, -1, -1) ] # Save Variable self.arc_size, self.rel_size, self.rel_num = arc_size, rel_size, rel_num self.pc, self.cfg = pc, cfg self.spec = (cfg, vocabulary)
def __init__(self, model: dy.ParameterCollection, in_dim: int, out_dim: int, bias: bool = True, init: dy.PyInitializer = dy.GlorotInitializer()): pc = model.add_subcollection() init = init_wrap(init, (out_dim, in_dim)) self.W = pc.add_parameters((out_dim, in_dim), init=init) if bias: self.b = pc.add_parameters((out_dim, ), init=0) self.pc = pc self.bias = bias self.spec = (in_dim, out_dim, bias, init)
def __init__( self, model: dy.ParameterCollection, h_dim: int, d_dim: int, f=dy.tanh, init: dy.PyInitializer = dy.GlorotInitializer()): pc = model.add_subcollection() init_W = init_wrap(init, (h_dim, d_dim)) self.W = pc.add_parameters((h_dim, d_dim), init=init_W) init_B = init_wrap(init, (h_dim, h_dim)) self.B = pc.add_parameters((h_dim, h_dim), init=init_B) self.pc, self.f = pc, f self.spec = (h_dim, d_dim, f, init)
def __init__(self, model: dy.ParameterCollection, sizes: List[int], f: 'nonlinear' = dy.tanh, p: float = 0.0, bias: bool = True, init: dy.PyInitializer = dy.GlorotInitializer()): pc = model.add_subcollection() self.W = [ pc.add_parameters((x, y), init=init_wrap(init, (x, y))) for x, y in zip(sizes[1:], sizes[:-1]) ] if bias: self.b = [pc.add_parameters((y, ), init=0) for y in sizes[1:]] self.pc, self.f, self.p, self.bias = pc, f, p, bias self.spec = (sizes, f, p, bias, init)
def __init__(self, vocab, w2i, pos, rels, options): if isinstance(options, dict): options = _dict_to_obj(options, 'Values') self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))} self.activation = self.activations[options.activation] self.blstm_flag = options.blstmFlag self.labels_flag = options.labelsFlag self.costaug_flag = options.costaugFlag self.bibi_flag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.layers = options.lstm_layers self.words_count = vocab self.vocab = {word: ind + 3 for word, ind in list(w2i.items())} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels if self.bibi_flag: self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model), LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)] self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model), LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = \ [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model), LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)] self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims)) self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2)) self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2)) self.hid_bias = self.model.add_parameters((self.hidden_units)) self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) self.hid2_bias = self.model.add_parameters((self.hidden2_units)) self.out_layer = self.model.add_parameters( (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) if self.labels_flag: self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) self.rhid_bias = self.model.add_parameters((self.hidden_units)) self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) self.rhid2_bias = self.model.add_parameters((self.hidden2_units)) self.rout_layer = self.model.add_parameters( (len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.rout_bias = self.model.add_parameters((len(self.irels)))
class MSTParserLSTM(object): """Underlying LSTM model for MSTParser used by BIST parser.""" def __init__(self, vocab, w2i, pos, rels, options): if isinstance(options, dict): options = _dict_to_obj(options, 'Values') self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))} self.activation = self.activations[options.activation] self.blstm_flag = options.blstmFlag self.labels_flag = options.labelsFlag self.costaug_flag = options.costaugFlag self.bibi_flag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.layers = options.lstm_layers self.words_count = vocab self.vocab = {word: ind + 3 for word, ind in list(w2i.items())} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels if self.bibi_flag: self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model), LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)] self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model), LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = \ [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model), LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)] self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims)) self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2)) self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2)) self.hid_bias = self.model.add_parameters((self.hidden_units)) self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) self.hid2_bias = self.model.add_parameters((self.hidden2_units)) self.out_layer = self.model.add_parameters( (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) if self.labels_flag: self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) self.rhid_bias = self.model.add_parameters((self.hidden_units)) self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) self.rhid2_bias = self.model.add_parameters((self.hidden2_units)) self.rout_layer = self.model.add_parameters( (len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.rout_bias = self.model.add_parameters((len(self.irels))) def _get_expr(self, sentence, i, j): # pylint: disable=missing-docstring if sentence[i].headfov is None: sentence[i].headfov = self.hid_layer_foh.expr() * concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = self.hid_layer_fom.expr() * concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) if self.hidden2_units > 0: output = \ self.out_layer.expr() * self.activation( self.hid2_bias.expr() + self.hid2_layer.expr() * self.activation( sentence[i].headfov + sentence[j].modfov + self.hid_bias.expr())) # + self.outBias else: output = self.out_layer.expr() * self.activation( sentence[i].headfov + sentence[j].modfov + self.hid_bias.expr()) # + self.outBias return output def _evaluate(self, sentence): # pylint: disable=missing-docstring exprs = [[self._get_expr(sentence, i, j) for j in range(len(sentence))] for i in range(len(sentence))] scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs]) return scores, exprs def _evaluate_label(self, sentence, i, j): # pylint: disable=missing-docstring if sentence[i].rheadfov is None: sentence[i].rheadfov = self.rhid_layer_foh.expr() * concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].rmodfov is None: sentence[j].rmodfov = self.rhid_layer_fom.expr() * concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) if self.hidden2_units > 0: output = self.rout_layer.expr() * self.activation( self.rhid2_bias.expr() + self.rhid2_layer.expr() * self.activation(sentence[i].rheadfov + sentence[j].rmodfov + self.rhid_bias.expr())) + self.rout_bias.expr() else: output = self.rout_layer.expr() * self.activation( sentence[i].rheadfov + sentence[j].rmodfov + self.rhid_bias.expr()) + self.rout_bias.expr() return output.value(), output def predict(self, conll_path=None, conll=None): # pylint: disable=missing-docstring if conll is None: conll = read_conll(conll_path) for sentence in conll: conll_sentence = [entry for entry in sentence if isinstance(entry, ConllEntry)] for entry in conll_sentence: wordvec = self.wlookup[int( self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None posvec = self.plookup[ int(self.pos[entry.pos])] if self.pdims > 0 else None entry.vec = concatenate( [_f for _f in [wordvec, posvec, None] if _f]) entry.lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if self.blstm_flag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibi_flag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, _ = self._evaluate(conll_sentence) heads = decoder.parse_proj(scores) for entry, head in zip(conll_sentence, heads): entry.pred_parent_id = head entry.pred_relation = '_' dump = False if self.labels_flag: for modifier, head in enumerate(heads[1:]): scores, _ = self._evaluate_label(conll_sentence, head, modifier + 1) conll_sentence[modifier + 1].pred_relation = \ self.irels[max(enumerate(scores), key=itemgetter(1))[0]] renew_cg() if not dump: yield sentence def train(self, conll_path): # pylint: disable=invalid-name # pylint: disable=missing-docstring eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() shuffled_data = list(read_conll(conll_path)) random.shuffle(shuffled_data) errs = [] lerrs = [] i_sentence = 0 for sentence in shuffled_data: if i_sentence % 100 == 0 and i_sentence != 0: print('Processing sentence number:', i_sentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Time', time.time() - start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 conll_sentence = [entry for entry in sentence if isinstance(entry, ConllEntry)] for entry in conll_sentence: c = float(self.words_count.get(entry.norm, 0)) drop_flag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0)) if drop_flag else 0] \ if self.wdims > 0 else None posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None entry.vec = concatenate([_f for _f in [wordvec, posvec, None] if _f]) entry.lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if self.blstm_flag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibi_flag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self._evaluate(conll_sentence) gold = [entry.parent_id for entry in conll_sentence] heads = decoder.parse_proj(scores, gold if self.costaug_flag else None) if self.labels_flag: for modifier, head in enumerate(gold[1:]): rscores, rexprs = self._evaluate_label(conll_sentence, head, modifier + 1) gold_label_ind = self.rels[conll_sentence[modifier + 1].relation] wrong_label_ind = max(((label, scr) for label, scr in enumerate(rscores) if label != gold_label_ind), key=itemgetter(1))[0] if rscores[gold_label_ind] < rscores[wrong_label_ind] + 1: lerrs.append(rexprs[wrong_label_ind] - rexprs[gold_label_ind]) e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) eerrors += e if e > 0: loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) eloss += e mloss += e errs.extend(loss) etotal += len(conll_sentence) if i_sentence % 1 == 0 or errs > 0 or lerrs: if errs or lerrs: eerrs = (esum(errs + lerrs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() i_sentence += 1 if errs: eerrs = (esum(errs + lerrs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() renew_cg() self.trainer.update() print("Loss: ", mloss / i_sentence) def save(self, filename): self.model.save(filename) def load(self, filename): self.model.populate(filename)
class MSTParserLSTM(object): """Underlying LSTM model for MSTParser used by BIST parser.""" def __init__(self, vocab, w2i, pos, rels, options): if isinstance(options, dict): options = _dict_to_obj(options, 'Values') self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))} self.activation = self.activations[options.activation] self.blstm_flag = options.blstmFlag self.labels_flag = options.labelsFlag self.costaug_flag = options.costaugFlag self.bibi_flag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.layers = options.lstm_layers self.words_count = vocab self.vocab = {word: ind + 3 for word, ind in list(w2i.items())} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels if self.bibi_flag: self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model), LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)] self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model), LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = \ [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model), LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)] self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims)) self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2)) self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2)) self.hid_bias = self.model.add_parameters((self.hidden_units)) self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) self.hid2_bias = self.model.add_parameters((self.hidden2_units)) self.out_layer = self.model.add_parameters( (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) if self.labels_flag: self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) self.rhid_bias = self.model.add_parameters((self.hidden_units)) self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) self.rhid2_bias = self.model.add_parameters((self.hidden2_units)) self.rout_layer = self.model.add_parameters( (len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.rout_bias = self.model.add_parameters((len(self.irels))) def _get_expr(self, sentence, i, j): # pylint: disable=missing-docstring if sentence[i].headfov is None: sentence[i].headfov = self.hid_layer_foh.expr() * concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].modfov is None: sentence[j].modfov = self.hid_layer_fom.expr() * concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) if self.hidden2_units > 0: output = \ self.out_layer.expr() * self.activation( self.hid2_bias.expr() + self.hid2_layer.expr() * self.activation( sentence[i].headfov + sentence[j].modfov + self.hid_bias.expr())) # + self.outBias else: output = self.out_layer.expr() * self.activation( sentence[i].headfov + sentence[j].modfov + self.hid_bias.expr()) # + self.outBias return output def _evaluate(self, sentence): # pylint: disable=missing-docstring exprs = [[self._get_expr(sentence, i, j) for j in range(len(sentence))] for i in range(len(sentence))] scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs]) return scores, exprs def _evaluate_label(self, sentence, i, j): # pylint: disable=missing-docstring if sentence[i].rheadfov is None: sentence[i].rheadfov = self.rhid_layer_foh.expr() * concatenate( [sentence[i].lstms[0], sentence[i].lstms[1]]) if sentence[j].rmodfov is None: sentence[j].rmodfov = self.rhid_layer_fom.expr() * concatenate( [sentence[j].lstms[0], sentence[j].lstms[1]]) if self.hidden2_units > 0: output = self.rout_layer.expr() * self.activation( self.rhid2_bias.expr() + self.rhid2_layer.expr() * self.activation(sentence[i].rheadfov + sentence[j].rmodfov + self.rhid_bias.expr())) + self.rout_bias.expr() else: output = self.rout_layer.expr() * self.activation( sentence[i].rheadfov + sentence[j].rmodfov + self.rhid_bias.expr()) + self.rout_bias.expr() return output.value(), output def predict(self, conll_path=None, conll=None): # pylint: disable=missing-docstring if conll is None: conll = read_conll(conll_path) for sentence in conll: conll_sentence = [entry for entry in sentence if isinstance(entry, ConllEntry)] for entry in conll_sentence: wordvec = self.wlookup[int( self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None posvec = self.plookup[ int(self.pos[entry.pos])] if self.pdims > 0 else None entry.vec = concatenate( [_f for _f in [wordvec, posvec, None] if _f]) entry.lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if self.blstm_flag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibi_flag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, _ = self._evaluate(conll_sentence) heads = decoder.parse_proj(scores) for entry, head in zip(conll_sentence, heads): entry.pred_parent_id = head entry.pred_relation = '_' dump = False if self.labels_flag: for modifier, head in enumerate(heads[1:]): scores, _ = self._evaluate_label(conll_sentence, head, modifier + 1) conll_sentence[modifier + 1].pred_relation = \ self.irels[max(enumerate(scores), key=itemgetter(1))[0]] renew_cg() if not dump: yield sentence def train(self, conll_path): # pylint: disable=invalid-name # pylint: disable=missing-docstring eloss = 0.0 mloss = 0.0 eerrors = 0 etotal = 0 start = time.time() shuffled_data = list(read_conll(conll_path)) random.shuffle(shuffled_data) errs = [] lerrs = [] i_sentence = 0 for sentence in shuffled_data: if i_sentence % 100 == 0 and i_sentence != 0: print('Processing sentence number:', i_sentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Time', time.time() - start) start = time.time() eerrors = 0 eloss = 0.0 etotal = 0 conll_sentence = [entry for entry in sentence if isinstance(entry, ConllEntry)] for entry in conll_sentence: c = float(self.words_count.get(entry.norm, 0)) drop_flag = (random.random() < (c / (0.25 + c))) wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0)) if drop_flag else 0] \ if self.wdims > 0 else None posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None entry.vec = concatenate([_f for _f in [wordvec, posvec, None] if _f]) entry.lstms = [entry.vec, entry.vec] entry.headfov = None entry.modfov = None entry.rheadfov = None entry.rmodfov = None if self.blstm_flag: lstm_forward = self.builders[0].initial_state() lstm_backward = self.builders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): lstm_forward = lstm_forward.add_input(entry.vec) lstm_backward = lstm_backward.add_input(rentry.vec) entry.lstms[1] = lstm_forward.output() rentry.lstms[0] = lstm_backward.output() if self.bibi_flag: for entry in conll_sentence: entry.vec = concatenate(entry.lstms) blstm_forward = self.bbuilders[0].initial_state() blstm_backward = self.bbuilders[1].initial_state() for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): blstm_forward = blstm_forward.add_input(entry.vec) blstm_backward = blstm_backward.add_input(rentry.vec) entry.lstms[1] = blstm_forward.output() rentry.lstms[0] = blstm_backward.output() scores, exprs = self._evaluate(conll_sentence) gold = [entry.parent_id for entry in conll_sentence] heads = decoder.parse_proj(scores, gold if self.costaug_flag else None) if self.labels_flag: for modifier, head in enumerate(gold[1:]): rscores, rexprs = self._evaluate_label(conll_sentence, head, modifier + 1) gold_label_ind = self.rels[conll_sentence[modifier + 1].relation] wrong_label_ind = max(((label, scr) for label, scr in enumerate(rscores) if label != gold_label_ind), key=itemgetter(1))[0] if rscores[gold_label_ind] < rscores[wrong_label_ind] + 1: lerrs.append(rexprs[wrong_label_ind] - rexprs[gold_label_ind]) e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) eerrors += e if e > 0: loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) eloss += e mloss += e errs.extend(loss) etotal += len(conll_sentence) if i_sentence % 1 == 0 or errs > 0 or lerrs: if errs or lerrs: eerrs = (esum(errs + lerrs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() errs = [] lerrs = [] renew_cg() i_sentence += 1 if errs: eerrs = (esum(errs + lerrs)) # * (1.0/(float(len(errs)))) eerrs.scalar_value() eerrs.backward() self.trainer.update() renew_cg() self.trainer.update() print("Loss: ", mloss / i_sentence)