Example #1
0
    def __init__(self,
                 model: dy.ParameterCollection,
                 in_dim: int,
                 hid_dim: int,
                 p: int = 0.1):

        pc = model.add_subcollection()
        self.W1 = Linear(pc, in_dim, hid_dim)
        self.W2 = Linear(pc, hid_dim, in_dim)
        self.p = p
        self.pc = pc
        self.spec = (in_dim, hid_dim, p)
Example #2
0
    def __init__(self,
                 model: dy.ParameterCollection,
                 in_dim: int,
                 out_dim: int,
                 init: dy.PyInitializer = None,
                 bias: bool = True):

        pc = model.add_subcollection()
        if not init: init = dy.UniformInitializer(math.sqrt(in_dim))
        self.W = pc.add_parameters((out_dim, in_dim), init=init)
        if bias: self.b = pc.add_parameters((out_dim, ), init=init)
        self.pc = pc
        self.bias = bias
Example #3
0
    def __init__(self, model: dy.ParameterCollection, cfg: IniConfigurator,
                 vocabulary: Vocabulary):

        pc = model.add_subcollection()
        # MLP layer
        orth_init = OrthogonalInitializer
        self.head_arc_MLP = MLP(pc, cfg.ARC_MLP_SIZE, leaky_relu, cfg.MLP_DROP,
                                cfg.MLP_BIAS, orth_init)
        self.head_rel_MLP = MLP(pc, cfg.REL_MLP_SIZE, leaky_relu, cfg.MLP_DROP,
                                cfg.MLP_BIAS, orth_init)
        self.dept_arc_MLP = MLP(pc, cfg.ARC_MLP_SIZE, leaky_relu, cfg.MLP_DROP,
                                cfg.MLP_BIAS, orth_init)
        self.dept_rel_MLP = MLP(pc, cfg.REL_MLP_SIZE, leaky_relu, cfg.MLP_DROP,
                                cfg.MLP_BIAS, orth_init)

        # Biaffine Attention Layer (Arc)
        arc_size = cfg.ARC_MLP_SIZE[-1]
        zero_init = dy.ConstInitializer(0)
        self.arc_attn_mat = [
            BiaffineMatAttention(pc, arc_size, arc_size, 1, True, False,
                                 zero_init)
            for _ in range(cfg.GRAPH_LAYERS + 1)
        ]

        # Biaffine Attention Layer (Rel)
        rel_num = vocabulary.get_vocab_size('rel')
        rel_size = cfg.REL_MLP_SIZE[-1]
        self.rel_mask = np.array([1] + [0] *
                                 (rel_num - 1))  # mask root relation
        self.rel_attn = BiaffineMatAttention(pc, rel_size, rel_size, rel_num,
                                             True, True, zero_init)

        # Graph Network Layer
        self.head_gnn = GraphNNUnit(pc, arc_size, arc_size, leaky_relu,
                                    orth_init)
        self.dept_gnn = GraphNNUnit(pc, arc_size, arc_size, leaky_relu,
                                    orth_init)
        self.head_rel_gnn = GraphNNUnit(pc, rel_size, rel_size, leaky_relu,
                                        orth_init)
        self.dept_rel_gnn = GraphNNUnit(pc, rel_size, rel_size, leaky_relu,
                                        orth_init)

        # Graph Layer WarmUp
        self.warm_list = [
            -i * cfg.WARM for i in range(cfg.GRAPH_LAYERS, -1, -1)
        ]

        # Save Variable
        self.arc_size, self.rel_size, self.rel_num = arc_size, rel_size, rel_num
        self.pc, self.cfg = pc, cfg
        self.spec = (cfg, vocabulary)
Example #4
0
    def __init__(self,
                 model: dy.ParameterCollection,
                 in_dim: int,
                 out_dim: int,
                 bias: bool = True,
                 init: dy.PyInitializer = dy.GlorotInitializer()):

        pc = model.add_subcollection()
        init = init_wrap(init, (out_dim, in_dim))
        self.W = pc.add_parameters((out_dim, in_dim), init=init)
        if bias:
            self.b = pc.add_parameters((out_dim, ), init=0)
        self.pc = pc
        self.bias = bias
        self.spec = (in_dim, out_dim, bias, init)
Example #5
0
    def __init__(
            self,
            model: dy.ParameterCollection,
            h_dim: int,
            d_dim: int,
            f=dy.tanh,
            init: dy.PyInitializer = dy.GlorotInitializer()):

        pc = model.add_subcollection()
        init_W = init_wrap(init, (h_dim, d_dim))
        self.W = pc.add_parameters((h_dim, d_dim), init=init_W)
        init_B = init_wrap(init, (h_dim, h_dim))
        self.B = pc.add_parameters((h_dim, h_dim), init=init_B)

        self.pc, self.f = pc, f
        self.spec = (h_dim, d_dim, f, init)
Example #6
0
    def __init__(self,
                 model: dy.ParameterCollection,
                 sizes: List[int],
                 f: 'nonlinear' = dy.tanh,
                 p: float = 0.0,
                 bias: bool = True,
                 init: dy.PyInitializer = dy.GlorotInitializer()):

        pc = model.add_subcollection()
        self.W = [
            pc.add_parameters((x, y), init=init_wrap(init, (x, y)))
            for x, y in zip(sizes[1:], sizes[:-1])
        ]
        if bias:
            self.b = [pc.add_parameters((y, ), init=0) for y in sizes[1:]]

        self.pc, self.f, self.p, self.bias = pc, f, p, bias
        self.spec = (sizes, f, p, bias, init)
Example #7
0
    def __init__(self, vocab, w2i, pos, rels, options):
        if isinstance(options, dict):
            options = _dict_to_obj(options, 'Values')

        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)

        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstm_flag = options.blstmFlag
        self.labels_flag = options.labelsFlag
        self.costaug_flag = options.costaugFlag
        self.bibi_flag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.words_count = vocab
        self.vocab = {word: ind + 3 for word, ind in list(w2i.items())}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels

        if self.bibi_flag:
            self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]
            self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = \
                [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model),
                 LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))

        self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_bias = self.model.add_parameters((self.hidden_units))

        self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
        self.hid2_bias = self.model.add_parameters((self.hidden2_units))

        self.out_layer = self.model.add_parameters(
            (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))

        if self.labels_flag:
            self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_bias = self.model.add_parameters((self.hidden_units))
            self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
            self.rhid2_bias = self.model.add_parameters((self.hidden2_units))
            self.rout_layer = self.model.add_parameters(
                (len(self.irels),
                 self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
            self.rout_bias = self.model.add_parameters((len(self.irels)))
Example #8
0
class MSTParserLSTM(object):
    """Underlying LSTM model for MSTParser used by BIST parser."""

    def __init__(self, vocab, w2i, pos, rels, options):
        if isinstance(options, dict):
            options = _dict_to_obj(options, 'Values')

        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)

        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstm_flag = options.blstmFlag
        self.labels_flag = options.labelsFlag
        self.costaug_flag = options.costaugFlag
        self.bibi_flag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.words_count = vocab
        self.vocab = {word: ind + 3 for word, ind in list(w2i.items())}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels

        if self.bibi_flag:
            self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]
            self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = \
                [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model),
                 LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))

        self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_bias = self.model.add_parameters((self.hidden_units))

        self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
        self.hid2_bias = self.model.add_parameters((self.hidden2_units))

        self.out_layer = self.model.add_parameters(
            (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))

        if self.labels_flag:
            self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_bias = self.model.add_parameters((self.hidden_units))
            self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
            self.rhid2_bias = self.model.add_parameters((self.hidden2_units))
            self.rout_layer = self.model.add_parameters(
                (len(self.irels),
                 self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
            self.rout_bias = self.model.add_parameters((len(self.irels)))

    def _get_expr(self, sentence, i, j):
        # pylint: disable=missing-docstring
        if sentence[i].headfov is None:
            sentence[i].headfov = self.hid_layer_foh.expr() * concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].modfov is None:
            sentence[j].modfov = self.hid_layer_fom.expr() * concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        if self.hidden2_units > 0:
            output = \
                self.out_layer.expr() * self.activation(
                    self.hid2_bias.expr() + self.hid2_layer.expr() * self.activation(
                        sentence[i].headfov + sentence[j].modfov
                        + self.hid_bias.expr()))  # + self.outBias
        else:
            output = self.out_layer.expr() * self.activation(
                sentence[i].headfov + sentence[j].modfov + self.hid_bias.expr())  # + self.outBias
        return output

    def _evaluate(self, sentence):
        # pylint: disable=missing-docstring
        exprs = [[self._get_expr(sentence, i, j) for j in range(len(sentence))]
                 for i in range(len(sentence))]
        scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs])
        return scores, exprs

    def _evaluate_label(self, sentence, i, j):
        # pylint: disable=missing-docstring
        if sentence[i].rheadfov is None:
            sentence[i].rheadfov = self.rhid_layer_foh.expr() * concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].rmodfov is None:
            sentence[j].rmodfov = self.rhid_layer_fom.expr() * concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        if self.hidden2_units > 0:
            output = self.rout_layer.expr() * self.activation(
                self.rhid2_bias.expr() + self.rhid2_layer.expr()
                * self.activation(sentence[i].rheadfov + sentence[j].rmodfov
                                  + self.rhid_bias.expr())) + self.rout_bias.expr()
        else:
            output = self.rout_layer.expr() * self.activation(
                sentence[i].rheadfov + sentence[j].rmodfov
                + self.rhid_bias.expr()) + self.rout_bias.expr()
        return output.value(), output

    def predict(self, conll_path=None, conll=None):
        # pylint: disable=missing-docstring
        if conll is None:
            conll = read_conll(conll_path)

        for sentence in conll:
            conll_sentence = [entry for entry in sentence if
                              isinstance(entry, ConllEntry)]

            for entry in conll_sentence:
                wordvec = self.wlookup[int(
                    self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None
                posvec = self.plookup[
                    int(self.pos[entry.pos])] if self.pdims > 0 else None
                entry.vec = concatenate(
                    [_f for _f in [wordvec, posvec, None] if _f])

                entry.lstms = [entry.vec, entry.vec]
                entry.headfov = None
                entry.modfov = None

                entry.rheadfov = None
                entry.rmodfov = None

            if self.blstm_flag:
                lstm_forward = self.builders[0].initial_state()
                lstm_backward = self.builders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.lstms[1] = lstm_forward.output()
                    rentry.lstms[0] = lstm_backward.output()

                if self.bibi_flag:
                    for entry in conll_sentence:
                        entry.vec = concatenate(entry.lstms)

                    blstm_forward = self.bbuilders[0].initial_state()
                    blstm_backward = self.bbuilders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        blstm_forward = blstm_forward.add_input(entry.vec)
                        blstm_backward = blstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = blstm_forward.output()
                        rentry.lstms[0] = blstm_backward.output()

            scores, _ = self._evaluate(conll_sentence)
            heads = decoder.parse_proj(scores)

            for entry, head in zip(conll_sentence, heads):
                entry.pred_parent_id = head
                entry.pred_relation = '_'

            dump = False

            if self.labels_flag:
                for modifier, head in enumerate(heads[1:]):
                    scores, _ = self._evaluate_label(conll_sentence, head, modifier + 1)
                    conll_sentence[modifier + 1].pred_relation = \
                        self.irels[max(enumerate(scores), key=itemgetter(1))[0]]

            renew_cg()
            if not dump:
                yield sentence

    def train(self, conll_path):
        # pylint: disable=invalid-name
        # pylint: disable=missing-docstring
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()

        shuffled_data = list(read_conll(conll_path))
        random.shuffle(shuffled_data)
        errs = []
        lerrs = []
        i_sentence = 0

        for sentence in shuffled_data:
            if i_sentence % 100 == 0 and i_sentence != 0:
                print('Processing sentence number:', i_sentence, 'Loss:',
                      eloss / etotal, 'Errors:',
                      (float(eerrors)) / etotal, 'Time', time.time() - start)
                start = time.time()
                eerrors = 0
                eloss = 0.0
                etotal = 0

            conll_sentence = [entry for entry in sentence if isinstance(entry, ConllEntry)]

            for entry in conll_sentence:
                c = float(self.words_count.get(entry.norm, 0))
                drop_flag = (random.random() < (c / (0.25 + c)))
                wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0)) if drop_flag else 0] \
                    if self.wdims > 0 else None
                posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None

                entry.vec = concatenate([_f for _f in [wordvec, posvec, None] if _f])

                entry.lstms = [entry.vec, entry.vec]
                entry.headfov = None
                entry.modfov = None

                entry.rheadfov = None
                entry.rmodfov = None

            if self.blstm_flag:
                lstm_forward = self.builders[0].initial_state()
                lstm_backward = self.builders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.lstms[1] = lstm_forward.output()
                    rentry.lstms[0] = lstm_backward.output()

                if self.bibi_flag:
                    for entry in conll_sentence:
                        entry.vec = concatenate(entry.lstms)

                    blstm_forward = self.bbuilders[0].initial_state()
                    blstm_backward = self.bbuilders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        blstm_forward = blstm_forward.add_input(entry.vec)
                        blstm_backward = blstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = blstm_forward.output()
                        rentry.lstms[0] = blstm_backward.output()

            scores, exprs = self._evaluate(conll_sentence)
            gold = [entry.parent_id for entry in conll_sentence]
            heads = decoder.parse_proj(scores, gold if self.costaug_flag else None)

            if self.labels_flag:
                for modifier, head in enumerate(gold[1:]):
                    rscores, rexprs = self._evaluate_label(conll_sentence, head, modifier + 1)
                    gold_label_ind = self.rels[conll_sentence[modifier + 1].relation]
                    wrong_label_ind = max(((label, scr) for label, scr in enumerate(rscores)
                                           if label != gold_label_ind), key=itemgetter(1))[0]
                    if rscores[gold_label_ind] < rscores[wrong_label_ind] + 1:
                        lerrs.append(rexprs[wrong_label_ind] - rexprs[gold_label_ind])

            e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g])
            eerrors += e
            if e > 0:
                loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in
                        enumerate(zip(heads, gold)) if h != g]  # * (1.0/float(e))
                eloss += e
                mloss += e
                errs.extend(loss)

            etotal += len(conll_sentence)

            if i_sentence % 1 == 0 or errs > 0 or lerrs:
                if errs or lerrs:
                    eerrs = (esum(errs + lerrs))  # * (1.0/(float(len(errs))))
                    eerrs.scalar_value()
                    eerrs.backward()
                    self.trainer.update()
                    errs = []
                    lerrs = []

                renew_cg()

            i_sentence += 1

        if errs:
            eerrs = (esum(errs + lerrs))  # * (1.0/(float(len(errs))))
            eerrs.scalar_value()
            eerrs.backward()
            self.trainer.update()

            renew_cg()

        self.trainer.update()
        print("Loss: ", mloss / i_sentence)

    def save(self, filename):
        self.model.save(filename)

    def load(self, filename):
        self.model.populate(filename)
Example #9
0
    def __init__(self, vocab, w2i, pos, rels, options):
        if isinstance(options, dict):
            options = _dict_to_obj(options, 'Values')

        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)

        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstm_flag = options.blstmFlag
        self.labels_flag = options.labelsFlag
        self.costaug_flag = options.costaugFlag
        self.bibi_flag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.words_count = vocab
        self.vocab = {word: ind + 3 for word, ind in list(w2i.items())}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels

        if self.bibi_flag:
            self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]
            self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = \
                [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model),
                 LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))

        self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_bias = self.model.add_parameters((self.hidden_units))

        self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
        self.hid2_bias = self.model.add_parameters((self.hidden2_units))

        self.out_layer = self.model.add_parameters(
            (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))

        if self.labels_flag:
            self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_bias = self.model.add_parameters((self.hidden_units))
            self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
            self.rhid2_bias = self.model.add_parameters((self.hidden2_units))
            self.rout_layer = self.model.add_parameters(
                (len(self.irels),
                 self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
            self.rout_bias = self.model.add_parameters((len(self.irels)))
Example #10
0
class MSTParserLSTM(object):
    """Underlying LSTM model for MSTParser used by BIST parser."""

    def __init__(self, vocab, w2i, pos, rels, options):
        if isinstance(options, dict):
            options = _dict_to_obj(options, 'Values')

        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)

        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cmult(cmult(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstm_flag = options.blstmFlag
        self.labels_flag = options.labelsFlag
        self.costaug_flag = options.costaugFlag
        self.bibi_flag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.words_count = vocab
        self.vocab = {word: ind + 3 for word, ind in list(w2i.items())}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels

        if self.bibi_flag:
            self.builders = [LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             LSTMBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]
            self.bbuilders = [LSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              LSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = \
                [LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model),
                 LSTMBuilder(self.layers, self.wdims + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.pdims, self.ldims, self.model)]

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims))

        self.hid_layer_foh = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_layer_fom = self.model.add_parameters((self.hidden_units, self.ldims * 2))
        self.hid_bias = self.model.add_parameters((self.hidden_units))

        self.hid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
        self.hid2_bias = self.model.add_parameters((self.hidden2_units))

        self.out_layer = self.model.add_parameters(
            (1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))

        if self.labels_flag:
            self.rhid_layer_foh = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_layer_fom = self.model.add_parameters((self.hidden_units, 2 * self.ldims))
            self.rhid_bias = self.model.add_parameters((self.hidden_units))
            self.rhid2_layer = self.model.add_parameters((self.hidden2_units, self.hidden_units))
            self.rhid2_bias = self.model.add_parameters((self.hidden2_units))
            self.rout_layer = self.model.add_parameters(
                (len(self.irels),
                 self.hidden2_units if self.hidden2_units > 0 else self.hidden_units))
            self.rout_bias = self.model.add_parameters((len(self.irels)))

    def _get_expr(self, sentence, i, j):
        # pylint: disable=missing-docstring
        if sentence[i].headfov is None:
            sentence[i].headfov = self.hid_layer_foh.expr() * concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].modfov is None:
            sentence[j].modfov = self.hid_layer_fom.expr() * concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        if self.hidden2_units > 0:
            output = \
                self.out_layer.expr() * self.activation(
                    self.hid2_bias.expr() + self.hid2_layer.expr() * self.activation(
                        sentence[i].headfov + sentence[j].modfov
                        + self.hid_bias.expr()))  # + self.outBias
        else:
            output = self.out_layer.expr() * self.activation(
                sentence[i].headfov + sentence[j].modfov + self.hid_bias.expr())  # + self.outBias
        return output

    def _evaluate(self, sentence):
        # pylint: disable=missing-docstring
        exprs = [[self._get_expr(sentence, i, j) for j in range(len(sentence))]
                 for i in range(len(sentence))]
        scores = np.array([[output.scalar_value() for output in exprsRow] for exprsRow in exprs])
        return scores, exprs

    def _evaluate_label(self, sentence, i, j):
        # pylint: disable=missing-docstring
        if sentence[i].rheadfov is None:
            sentence[i].rheadfov = self.rhid_layer_foh.expr() * concatenate(
                [sentence[i].lstms[0], sentence[i].lstms[1]])
        if sentence[j].rmodfov is None:
            sentence[j].rmodfov = self.rhid_layer_fom.expr() * concatenate(
                [sentence[j].lstms[0], sentence[j].lstms[1]])

        if self.hidden2_units > 0:
            output = self.rout_layer.expr() * self.activation(
                self.rhid2_bias.expr() + self.rhid2_layer.expr() *
                self.activation(sentence[i].rheadfov + sentence[j].rmodfov
                                + self.rhid_bias.expr())) + self.rout_bias.expr()
        else:
            output = self.rout_layer.expr() * self.activation(
                sentence[i].rheadfov + sentence[j].rmodfov
                + self.rhid_bias.expr()) + self.rout_bias.expr()
        return output.value(), output

    def predict(self, conll_path=None, conll=None):
        # pylint: disable=missing-docstring
        if conll is None:
            conll = read_conll(conll_path)

        for sentence in conll:
            conll_sentence = [entry for entry in sentence if
                              isinstance(entry, ConllEntry)]

            for entry in conll_sentence:
                wordvec = self.wlookup[int(
                    self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None
                posvec = self.plookup[
                    int(self.pos[entry.pos])] if self.pdims > 0 else None
                entry.vec = concatenate(
                    [_f for _f in [wordvec, posvec, None] if _f])

                entry.lstms = [entry.vec, entry.vec]
                entry.headfov = None
                entry.modfov = None

                entry.rheadfov = None
                entry.rmodfov = None

            if self.blstm_flag:
                lstm_forward = self.builders[0].initial_state()
                lstm_backward = self.builders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.lstms[1] = lstm_forward.output()
                    rentry.lstms[0] = lstm_backward.output()

                if self.bibi_flag:
                    for entry in conll_sentence:
                        entry.vec = concatenate(entry.lstms)

                    blstm_forward = self.bbuilders[0].initial_state()
                    blstm_backward = self.bbuilders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        blstm_forward = blstm_forward.add_input(entry.vec)
                        blstm_backward = blstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = blstm_forward.output()
                        rentry.lstms[0] = blstm_backward.output()

            scores, _ = self._evaluate(conll_sentence)
            heads = decoder.parse_proj(scores)

            for entry, head in zip(conll_sentence, heads):
                entry.pred_parent_id = head
                entry.pred_relation = '_'

            dump = False

            if self.labels_flag:
                for modifier, head in enumerate(heads[1:]):
                    scores, _ = self._evaluate_label(conll_sentence, head, modifier + 1)
                    conll_sentence[modifier + 1].pred_relation = \
                        self.irels[max(enumerate(scores), key=itemgetter(1))[0]]

            renew_cg()
            if not dump:
                yield sentence

    def train(self, conll_path):
        # pylint: disable=invalid-name
        # pylint: disable=missing-docstring
        eloss = 0.0
        mloss = 0.0
        eerrors = 0
        etotal = 0
        start = time.time()

        shuffled_data = list(read_conll(conll_path))
        random.shuffle(shuffled_data)
        errs = []
        lerrs = []
        i_sentence = 0

        for sentence in shuffled_data:
            if i_sentence % 100 == 0 and i_sentence != 0:
                print('Processing sentence number:', i_sentence, 'Loss:',
                      eloss / etotal, 'Errors:',
                      (float(eerrors)) / etotal, 'Time', time.time() - start)
                start = time.time()
                eerrors = 0
                eloss = 0.0
                etotal = 0

            conll_sentence = [entry for entry in sentence if isinstance(entry, ConllEntry)]

            for entry in conll_sentence:
                c = float(self.words_count.get(entry.norm, 0))
                drop_flag = (random.random() < (c / (0.25 + c)))
                wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0)) if drop_flag else 0] \
                    if self.wdims > 0 else None
                posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None

                entry.vec = concatenate([_f for _f in [wordvec, posvec, None] if _f])

                entry.lstms = [entry.vec, entry.vec]
                entry.headfov = None
                entry.modfov = None

                entry.rheadfov = None
                entry.rmodfov = None

            if self.blstm_flag:
                lstm_forward = self.builders[0].initial_state()
                lstm_backward = self.builders[1].initial_state()

                for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                    lstm_forward = lstm_forward.add_input(entry.vec)
                    lstm_backward = lstm_backward.add_input(rentry.vec)

                    entry.lstms[1] = lstm_forward.output()
                    rentry.lstms[0] = lstm_backward.output()

                if self.bibi_flag:
                    for entry in conll_sentence:
                        entry.vec = concatenate(entry.lstms)

                    blstm_forward = self.bbuilders[0].initial_state()
                    blstm_backward = self.bbuilders[1].initial_state()

                    for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
                        blstm_forward = blstm_forward.add_input(entry.vec)
                        blstm_backward = blstm_backward.add_input(rentry.vec)

                        entry.lstms[1] = blstm_forward.output()
                        rentry.lstms[0] = blstm_backward.output()

            scores, exprs = self._evaluate(conll_sentence)
            gold = [entry.parent_id for entry in conll_sentence]
            heads = decoder.parse_proj(scores, gold if self.costaug_flag else None)

            if self.labels_flag:
                for modifier, head in enumerate(gold[1:]):
                    rscores, rexprs = self._evaluate_label(conll_sentence, head, modifier + 1)
                    gold_label_ind = self.rels[conll_sentence[modifier + 1].relation]
                    wrong_label_ind = max(((label, scr) for label, scr in enumerate(rscores)
                                           if label != gold_label_ind), key=itemgetter(1))[0]
                    if rscores[gold_label_ind] < rscores[wrong_label_ind] + 1:
                        lerrs.append(rexprs[wrong_label_ind] - rexprs[gold_label_ind])

            e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g])
            eerrors += e
            if e > 0:
                loss = [(exprs[h][i] - exprs[g][i]) for i, (h, g) in
                        enumerate(zip(heads, gold)) if h != g]  # * (1.0/float(e))
                eloss += e
                mloss += e
                errs.extend(loss)

            etotal += len(conll_sentence)

            if i_sentence % 1 == 0 or errs > 0 or lerrs:
                if errs or lerrs:
                    eerrs = (esum(errs + lerrs))  # * (1.0/(float(len(errs))))
                    eerrs.scalar_value()
                    eerrs.backward()
                    self.trainer.update()
                    errs = []
                    lerrs = []

                renew_cg()

            i_sentence += 1

        if errs:
            eerrs = (esum(errs + lerrs))  # * (1.0/(float(len(errs))))
            eerrs.scalar_value()
            eerrs.backward()
            self.trainer.update()

            renew_cg()

        self.trainer.update()
        print("Loss: ", mloss / i_sentence)