def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options): import dynet as dy # import here so we don't load Dynet if just running parser.py --help for example global dy self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = {'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model,options,words,rels,langs,w2i,ch,self.nnvecs) self.irels = self.feature_extractor.irels mlp_in_dims = options.lstm_output_size*2*self.nnvecs*(self.k+1) self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled' ,mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims,2*len(self.irels)+2,self.activation)
def __init__(self, vocab, options): import dynet as dy from feature_extractor import FeatureExtractor global dy self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.costaugFlag = options.costaugFlag self.feature_extractor = FeatureExtractor(self.model, options, vocab) self.labelsFlag = options.labelsFlag mlp_in_dims = options.lstm_output_size * 2 self.unlabeled_MLP = biMLP(self.model, mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 1, self.activation) if self.labelsFlag: self.labeled_MLP = biMLP(self.model, mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, len(self.feature_extractor.irels), self.activation) self.proj = options.proj
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k self.recursive_composition = options.use_recursive_composition #ugly hack #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) + (1 if self.recursive_composition else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = options.lstm_input_size * self.nnvecs * (self.k + 1) self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation)
def add_input(self, input_vec): x = dynet.concatenate([input_vec, self.h]) i = dynet.logistic(self.W_i * x + self.b_i) f = dynet.logistic(self.W_f * x + self.b_f) g = dynet.tanh(self.W_c * x + self.b_c) o = dynet.logistic(self.W_o * x + self.b_o) c = dynet.cwise_multiply(f, self.c) + dynet.cwise_multiply(i, g) h = dynet.cwise_multiply(o, dynet.tanh(c)) self.c = c self.h = h self.outputs.append(h) return self
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 global NO_COMP, SOFT_COMP, HARD_COMP, GEN_COMP NO_COMP, HARD_COMP, SOFT_COMP, GEN_COMP = 0, 1, 2, 3 self.composition = options.nucleus_composition all_rels = vocab[5] functional_rels = ['det', 'case', 'clf', 'cop', 'mark', 'aux', 'cc'] if self.composition in [HARD_COMP, SOFT_COMP]: self.compositional_relations = functional_rels elif self.composition in [GEN_COMP]: self.compositional_relations = all_rels else: self.compositional_relations = [] self.compositional_relations_dict = { rel: idx for idx, rel in enumerate(self.compositional_relations) } self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * ( self.k + 1) print("The size of the MLP input layer is {0}".format(mlp_in_dims)) if self.composition in [SOFT_COMP, GEN_COMP]: rel_emb_sz = 10 self.cmp_rel_lookup = self.model.add_lookup_parameters( (len(self.compositional_relations), rel_emb_sz)) cmb_sz = 2 * 2 * options.lstm_output_size + rel_emb_sz out_sz = 2 * options.lstm_output_size self.combiner_W1 = self.model.add_parameters((out_sz, cmb_sz), name='cmbW1') self.combiner_b1 = self.model.add_parameters(out_sz, name='cmbb1') self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 4, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation)
def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options): """ 0 = LA, 1 = RA, 2 = SH, 3 = SW """ import dynet as dy # import here so we don't load Dynet if just running parser.py --help for example global dy self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.shareMLP = options.shareMLP self.config_lembed = options.lembed_config #vectors used self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, words, rels, langs, w2i, ch, self.nnvecs, options) self.irels = self.feature_extractor.irels #mlps mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (self.k + 1) if self.config_lembed: mlp_in_dims += options.lang_emb_size h1 = options.mlp_hidden_dims h2 = options.mlp_hidden2_dims if not options.multiling or self.shareMLP: self.unlabeled_MLP = MLP(self.model, mlp_in_dims, h1, h2, 4, self.activation) self.labeled_MLP = MLP(self.model, mlp_in_dims, h1, h2, 2 * len(rels) + 2, self.activation) else: self.labeled_mlpdict = {} for lang in self.feature_extractor.langs: self.labeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1, h2, 2 * len(rels) + 2, self.activation) self.unlabeled_mlpdict = {} for lang in self.feature_extractor.langs: self.unlabeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1, h2, 4, self.activation)
def __init__(self, words, pos, rels, w2i, options): self.model = dn.Model() self.trainer = dn.AdamTrainer(self.model) random.seed(1) # noinspection PyUnresolvedReferences self.activations = { 'tanh': dn.tanh, 'sigmoid': dn.logistic, 'relu': dn.rectify, 'tanh3': (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.options = options self.oracle = options.oracle self.ldims = options.lstm_dims * 2 self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.wordsCount = words self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.relation = options.relation if self.relation: self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels else: self.rels = {"X": 0} self.irels = ["X"] self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.window self.nnvecs = self.headFlag + self.rlFlag * 2 + self.rlMostFlag * 2 self.actions = transition_utils.ArcHybridActions( self.irels, options.action_file) if options.external_embedding is not None: self.extrnd, self.elookup, self.edim = nn.get_external_embedding( self.model, options.external_embedding) print('Load external embedding. Vector dimensions', self.edim) else: self.extrnd, self.elookup, self.edim = None, None, 0 dims = self.wdims + self.pdims + self.edim self.rnn = nn.BiLSTM(self.model, [dims] + [self.ldims] * options.lstm_layers) self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(words) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters( (len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters( (len(rels), self.rdims)) self.word2lstm = self.model.add_parameters( (self.ldims, self.wdims + self.pdims + self.edim)) self.word2lstmbias = self.model.add_parameters((self.ldims)) # self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims)) # self.lstm2lstmbias = self.model.add_parameters((self.ldims)) input_dims = self.ldims * self.nnvecs * (self.k + 1) action_dims = [ input_dims, self.hidden_units, self.hidden2_units, len(self.actions) ] action_dims = [i for i in action_dims if i != 0] self.action_classifier = nn.DenseLayers(self.model, action_dims, self.activation) relation_dims = [ input_dims, self.hidden_units, self.hidden2_units, len(self.actions.decoded_with_relation) ] relation_dims = [i for i in relation_dims if i != 0] self.relation_classifier = nn.DenseLayers(self.model, relation_dims, self.activation) if self.options.beam_size == 0: self.options.beam_search = False
def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options): self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) random.seed(1) rels.append('runk') self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.multiling = options.multiling self.ldims = options.lstm_dims self.cldims = options.chlstm_dims self.wdims = options.wembedding_dims self.rdims = options.rembedding_dims self.cdims = options.cembedding_dims self.langdims = options.lembedding_dims self.wordsCount = words self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.chars = {ind: word + 3 for word, ind in enumerate(ch)} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.cpos = {word: ind + 3 for ind, word in enumerate(cpos)} self.rels = {word: ind for ind, word in enumerate(rels)} if langs: self.langs = {word: ind for ind, word in enumerate(langs)} else: self.langs = None self.irels = rels self.debug = options.debug self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.window #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.external_embedding = None if options.external_embedding is not None: external_embedding_fp = open(options.external_embedding, 'r') external_embedding_fp.readline() self.external_embedding = {} for line in external_embedding_fp: line = line.strip().split() self.external_embedding[line[0]] = [float(f) for f in line[1:]] external_embedding_fp.close() self.edim = len(self.external_embedding.values()[0]) self.noextrn = [0.0 for _ in xrange(self.edim)] #??? self.extrnd = { word: i + 3 for i, word in enumerate(self.external_embedding) } self.elookup = self.model.add_lookup_parameters( (len(self.external_embedding) + 3, self.edim)) for word, i in self.extrnd.iteritems(): self.elookup.init_row(i, self.external_embedding[word]) self.extrnd['*PAD*'] = 1 self.extrnd['*INITIAL*'] = 2 print 'Load external embedding. Vector dimensions', self.edim dims = self.wdims + (self.edim if self.external_embedding is\ not None else 0) + (self.langdims if self.multiling else 0) + 2 * self.cldims self.surfaceBuilders = [ dy.VanillaLSTMBuilder(1, dims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, dims, self.ldims, self.model) ] self.bsurfaceBuilders = [ dy.VanillaLSTMBuilder(1, 2 * self.ldims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, 2 * self.ldims, self.ldims, self.model) ] self.charBuilders = [ dy.VanillaLSTMBuilder(1, self.cdims, self.cldims, self.model), dy.VanillaLSTMBuilder(1, self.cdims, self.cldims, self.model) ] self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 if self.langs: self.langs['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 if self.langs: self.langs['*INITIAL*'] = 2 self.clookup = self.model.add_lookup_parameters( (len(ch) + 3, self.cdims)) self.wlookup = self.model.add_lookup_parameters( (len(words) + 3, self.wdims)) self.rlookup = self.model.add_lookup_parameters( (len(rels), self.rdims)) if self.multiling: self.langslookup = self.model.add_lookup_parameters( (len(langs) + 3, self.langdims)) #used in the PaddingVec self.word2lstm = self.model.add_parameters((self.ldims * 2, dims)) self.word2lstmbias = self.model.add_parameters((self.ldims * 2)) self.chPadding = self.model.add_parameters((self.cldims * 2)) self.hidLayer = self.model.add_parameters( (self.hidden_units, self.ldims * 2 * self.nnvecs * (self.k + 1))) self.hidBias = self.model.add_parameters((self.hidden_units)) self.hid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.hid2Bias = self.model.add_parameters((self.hidden2_units)) self.outLayer = self.model.add_parameters( (4, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.outBias = self.model.add_parameters((4)) # r stands for relation self.rhidLayer = self.model.add_parameters( (self.hidden_units, self.ldims * 2 * self.nnvecs * (self.k + 1))) self.rhidBias = self.model.add_parameters((self.hidden_units)) self.rhid2Layer = self.model.add_parameters( (self.hidden2_units, self.hidden_units)) self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) self.routLayer = self.model.add_parameters( (2 * len(self.irels) + 2, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) self.routBias = self.model.add_parameters((2 * len(self.irels) + 2))
def __init__(self, vocab, options): # import here so we don't load Dynet if just running parser.py --help for example from multilayer_perceptron import MLP from feature_extractor import FeatureExtractor import dynet as dy global dy global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3 self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate) self.activations = { 'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu': dy.rectify, 'tanh3': (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.oracle = options.oracle self.headFlag = options.headFlag self.rlMostFlag = options.rlMostFlag self.rlFlag = options.rlFlag self.k = options.k self.distances = 4 # probe looks at distances between tokens ahead, considering distances: # normalized by the smallest, among: # s0 - b0 # s0 - b1 # b0 - closest bi: if < s0-b0, do a Shift # closest si - b0 : if ~= s0-b0, do a reduce #dimensions depending on extended features self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) self.feature_extractor = FeatureExtractor(self.model, options, vocab, self.nnvecs) self.irels = self.feature_extractor.irels if options.no_bilstms > 0: # number of bilistms mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * ( self.k + 1) else: mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * ( self.k + 1) # use attention if options.bert and options.attention: # add attention vectors for stack to top buf and viceversa attention_size = self.k * 2 # all layers #layers = self.feature_extractor.bert.model.config.num_hidden_layers #attention_size = layers * layers * self.k # * 2 mlp_in_dims += attention_size # Sartiano if options.distance_probe_conf: print('Distance Probe enabled', file=sys.stderr) from distance_probe import DistanceProbe self.distance_probe = DistanceProbe(options.distance_probe_conf, options.dynet_seed) mlp_in_dims += self.distances else: self.distance_probe = None self.attention_indices = [ int(x) for x in options.attention.split(',') ] if options.attention else [] self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, SWAP + 1, self.activation) self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims, options.mlp_hidden_dims, options.mlp_hidden2_dims, 2 * len(self.irels) + 2, self.activation) print('MLP size: (%d, %d)' % (mlp_in_dims, options.mlp_hidden_dims), file=sys.stderr)
def leaky_relu(x): """:type x: dn.Expression :rtype: dn.Expression""" positive = dn.rectify(x) negative = dn.rectify(-x) * -0.01 ret = positive + negative return ret activations = { 'tanh': dn.tanh, 'sigmoid': dn.logistic, 'relu': dn.rectify, 'tanh3': (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x))), 'selu': selu, "leaky-relu": leaky_relu } trainers = { "adam": dn.AdamTrainer, "sgd": dn.SimpleSGDTrainer, "momentum": dn.MomentumSGDTrainer, "rmsprop": dn.RMSPropTrainer } recurrent_builders = {"lstm": dn.VanillaLSTMBuilder, "gru": dn.GRUBuilder} def recurrent_factory_factory(builder): # use closure to hold "builder"
class MaxSubGraphLSTM(object): activations = { 'tanh': dn.tanh, 'sigmoid': dn.logistic, 'relu': dn.rectify, 'tanh3': (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x))) } decoders = { "arcfactor": decoder.arcfactor, "1ec2p": decoder.oneec2p, "1ec2p-vine": decoder.oneec2p_vine } def __init__(self, vocab, pos, rels, w2i, options): self.model = dn.Model() random.seed(1) self.trainer = dn.AdamTrainer(self.model) self.activation = self.activations[options.activation] self.decoder = self.decoders[options.decoder](options) self.test_decoder = self.decoders[options.test_decoder](options) \ if options.test_decoder is not None \ else self.decoder self.cost_augment = cost_augments[options.cost_augment] self.labelsFlag = options.labelsFlag self.options = options self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.rels = {word: ind for ind, word in enumerate(rels)} # type: dict[str, int] self.irels = rels if options.external_embedding is not None: self.extrnd, self.elookup, self.edim = nn.get_external_embedding( self.model, options.external_embedding) logger.info('Load external embedding. Vector dimensions %d', self.edim) else: self.extrnd, self.elookup, self.edim = None, None, 0 dims = self.wdims + self.pdims + self.edim self.rnn = nn.BiLSTM(self.model, [dims] + [self.ldims * 2] * options.lstm_layers) self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters( (len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters( (len(rels), self.rdims)) if self.hidden2_units > 0: dense_dims = [self.hidden_units, self.hidden2_units, 1] use_bias = [True, False] else: dense_dims = [self.hidden_units, 1] # use_bias = [dn.NormalInitializer(0, 0)] use_bias = [False] self.head_dense_layer = DenseLayers( self.model, [self.ldims * 2, self.hidden_units], self.activation) self.dep_dense_layer = DenseLayers(self.model, [self.ldims * 2, self.hidden_units], self.activation) self.fusion_layer = nn.Biaffine(self.model, self.hidden_units, self.activation) if self.labelsFlag: self.relation_binear_layer = BiLinear(self.model, self.ldims * 2, self.hidden_units) relation_dense_dims = list(dense_dims) relation_dense_dims[-1] = len(self.irels) self.relation_dense_layer = DenseLayers(self.model, relation_dense_dims, self.activation) def get_vecs(self, node): wordvec = self.wlookup[int(self.vocab.get( node.norm, 0))] if self.wdims > 0 else None posvec = self.plookup[int(self.pos.get(node.postag, 0))] if self.pdims > 0 else None evec = self.elookup[int(self.extrnd.get(node.form, self.extrnd.get(node.norm, 0)))]\ if self.edim > 0 else None return dn.concatenate(filter(None, [wordvec, posvec, evec])) def __evaluate(self, lstm_output): length = len(lstm_output) # (i, j) -> (i * length + j,) # i = k / length, j = k % length # 1 1 2 2 3 3 4 4 .. heads = [ dn.transpose(self.activation(self.head_dense_layer( lstm_output[i]))) for i in range(length) ] mods = [ self.activation(self.dep_dense_layer(lstm_output[i])) for i in range(length) ] head_part = dn.concatenate_to_batch( [heads[i // len(lstm_output)] for i in range(length * length)]) # 1 2 3 4 .. 1 2 3 4 ... mod_part = dn.concatenate_to_batch([mods[i] for i in range(length)] * length) output = self.fusion_layer(head_part, mod_part) exprs = [[ dn.pick_batch_elem(output, i * length + j) for j in range(length) ] for i in range(length)] scores = output.npvalue() scores = scores.reshape((len(lstm_output), len(lstm_output))) return scores, exprs def __evaluate_labels(self, lstm_output, edges): """ :type lstm_output: list[dn.Expression] :type edges: Edge :return: """ rheadfov = [None] * len(lstm_output) rmodfov = [None] * len(lstm_output) for source, label, target in edges: if rheadfov[source] is None: rheadfov[source] = self.relation_binear_layer.w1.expr( ) * lstm_output[source] if rmodfov[target] is None: rmodfov[target] = self.relation_binear_layer.w2.expr( ) * lstm_output[target] hidden = self.activation(rheadfov[source] + rmodfov[target] + self.relation_binear_layer.bias.expr()) output = self.relation_dense_layer(hidden) yield output def Save(self, filename): self.model.save(filename) def Load(self, filename): self.model.load(filename) def Predict(self, graphs): for iSentence, sentence in enumerate(graphs): vecs = [self.get_vecs(i) for i in sentence] lstm_output = self.rnn([vecs[i] for i in range(len(sentence))]) scores, exprs = self.__evaluate(lstm_output) output_graph = self.test_decoder(scores) edges = [] for source_id in range(len(sentence)): for target_id in range(len(sentence)): if output_graph[source_id][target_id]: edges.append( graph_utils.Edge(source_id, "X", target_id)) if self.labelsFlag: labeled_edges = [] for edge, r_scores_expr in \ zip(edges, self.__evaluate_labels(lstm_output, edges)): r_scores = r_scores_expr.value() label_index = max( ((l, scr) for l, scr in enumerate(r_scores)), key=itemgetter(1))[0] label = self.irels[label_index] labeled_edges.append( graph_utils.Edge(edge.source, label, edge.target)) edges = labeled_edges dn.renew_cg() yield sentence.replaced_edges(edges) def Train(self, graphs): """ :type graphs: [Graph] :return: """ eloss = 0.0 mloss = 0.0 total_gold_edge = 0 total_predict_edge = 0 recalled_gold_edge = 0.0 correct_predict_edge = 0.0 start = time.time() shuffled_index = range(len(graphs)) random.shuffle(shuffled_index) iSentence = -1 for g_idx in shuffled_index: sentence = graphs[g_idx] # type: graph_utils.Graph dn.renew_cg() iSentence += 1 if iSentence % 100 == 0 and iSentence != 0: logger.info( 'Processing sentence number: %d, Loss: %.2f, ' 'Accuracy: %.2f, Recall: %.2f, Time: %.2f', iSentence, eloss, correct_predict_edge / total_predict_edge * 100, recalled_gold_edge / total_gold_edge * 100, time.time() - start) start = time.time() eloss = 0.0 total_gold_edge = 0 total_predict_edge = 0 recalled_gold_edge = 0.0 correct_predict_edge = 0.0 vecs = [self.get_vecs(i) for i in sentence] lstm_output = self.rnn([vecs[i] for i in range(len(sentence))]) scores, exprs = self.__evaluate(lstm_output) self.cost_augment(scores, sentence, self.options) output_graph = self.decoder(scores) gold_graph = sentence.to_matrix() lerrs = [] if self.labelsFlag: edges = list(sentence.generate_edges()) for edge, r_scores_expr \ in zip(edges, self.__evaluate_labels(lstm_output, edges)): head, label, modifier = edge r_scores = r_scores_expr.value() gold_label_index = self.rels[label] wrong_label_index = max(((l, scr) for l, scr in enumerate(r_scores) if l != gold_label_index), key=itemgetter(1))[0] if r_scores[gold_label_index] < r_scores[ wrong_label_index] + 1: lerrs.append(r_scores_expr[wrong_label_index] - r_scores_expr[gold_label_index]) errs = [] for source_id in range(len(sentence)): for target_id in range(len(sentence)): gold_exist = gold_graph[source_id][target_id] output_exist = output_graph[source_id][target_id] if gold_exist and output_exist: total_gold_edge += 1 total_predict_edge += 1 correct_predict_edge += 1 recalled_gold_edge += 1 elif not gold_exist and not output_exist: pass elif gold_exist and not output_exist: total_gold_edge += 1 errs.append(-exprs[source_id][target_id] + 1) elif not gold_exist and output_exist: total_predict_edge += 1 errs.append(exprs[source_id][target_id]) else: raise SystemError() if len(errs) > 0 or len(lerrs) > 0: loss = dn.scalarInput(0.0) if len(lerrs): loss += dn.esum(lerrs) if len(errs): loss += dn.esum(errs) eloss += loss.scalar_value() loss.backward() self.trainer.update() self.trainer.update_epoch() logger.info("Loss: %.2f", mloss / iSentence)