def __call__(self, query, options, gold, lengths, query_no): if len(options) == 1: return None, 0 final = [] if args.word_vectors: qvecs = [dy.lookup(self.pEmbedding, w) for w in query] qvec_max = dy.emax(qvecs) qvec_mean = dy.average(qvecs) for otext, features in options: if not args.no_features: inputs = dy.inputTensor(features) if args.word_vectors: ovecs = [dy.lookup(self.pEmbedding, w) for w in otext] ovec_max = dy.emax(ovecs) ovec_mean = dy.average(ovecs) if args.no_features: inputs = dy.concatenate( [qvec_max, qvec_mean, ovec_max, ovec_mean]) else: inputs = dy.concatenate( [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean]) if args.drop > 0: inputs = dy.dropout(inputs, args.drop) h = inputs for pH, pB in zip(self.hidden, self.bias): h = dy.affine_transform([pB, pH, h]) if args.nonlin == "linear": pass elif args.nonlin == "tanh": h = dy.tanh(h) elif args.nonlin == "cube": h = dy.cube(h) elif args.nonlin == "logistic": h = dy.logistic(h) elif args.nonlin == "relu": h = dy.rectify(h) elif args.nonlin == "elu": h = dy.elu(h) elif args.nonlin == "selu": h = dy.selu(h) elif args.nonlin == "softsign": h = dy.softsign(h) elif args.nonlin == "swish": h = dy.cmult(h, dy.logistic(h)) final.append(dy.sum_dim(h, [0])) final = dy.concatenate(final) nll = -dy.log_softmax(final) dense_gold = [] for i in range(len(options)): dense_gold.append(1.0 / len(gold) if i in gold else 0.0) answer = dy.inputTensor(dense_gold) loss = dy.transpose(answer) * nll predicted_link = np.argmax(final.npvalue()) return loss, predicted_link
def parse(self, t, oracle_actions=None): dy.renew_cg() self.NULL_REP = self.WORDS_LOOKUP[self.nwords - 1] if oracle_actions: oracle_actions = list(oracle_actions) oracle_actions.reverse() toks = list(t) toks.reverse() stack = [] buffer = [] W1 = dy.parameter(self.pW1) b1 = dy.parameter(self.pb1) W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) losses = [] for tok in toks: tok_embedding = self.WORDS_LOOKUP[tok] buffer.append(Head(self.vocab.i2w[tok], tok_embedding)) while not (len(stack) == 1 and len(buffer) == 0): # based on parser state, get valid actions valid_actions = [] if len(buffer) > 0: # can only reduce if elements in buffer valid_actions += [SHIFT] if len(stack) >= 2: # can only shift if 2 elements on stack valid_actions += [REDUCE_L, REDUCE_R] # compute probability of each of the actions and choose an action # either from the oracle or if there is no oracle, based on the model action = valid_actions[0] log_probs = None if len(valid_actions) > 1: representations = self.extract_features(stack, buffer) h = dy.cube(W1 * dy.concatenate(representations) + b1) logits = W_act * h + b_act log_probs = dy.log_softmax(logits, valid_actions) if oracle_actions is None: action = max(enumerate(log_probs.vec_value()), key=itemgetter(1))[0] if oracle_actions is not None: action = oracle_actions.pop() if log_probs is not None: # append the action-specific loss losses.append(dy.pick(log_probs, action)) # execute the action to update the parser state if action == SHIFT: token = buffer.pop() stack.append(token) else: # one of the reduce actions right = stack.pop() left = stack.pop() head, modifier = (left, right) if action == REDUCE_R else (right, left) #add the tokens and their embeddings into the children list if action == REDUCE_R: head.add_child(modifier, 'right') else: head.add_child(modifier, 'left') stack.append(head) if oracle_actions is None: print('{0} --> {1}'.format(head.word, modifier.word)) # the head of the tree that remains at the top of the stack is now the root if oracle_actions is None: head = stack.pop().word print('ROOT --> {0}'.format(head)) return -dy.esum(losses) if losses else None
def parse(self, t, oracle_actions=None): dy.renew_cg() self.NULL_REP = self.WORDS_LOOKUP[self.nwords-1] if oracle_actions: oracle_actions = list(oracle_actions) oracle_actions.reverse() toks = list(t) toks.reverse() stack = [] buffer = [] W1 = dy.parameter(self.pW1) b1 = dy.parameter(self.pb1) W_act = dy.parameter(self.pW_act) b_act = dy.parameter(self.pb_act) losses = [] for tok in toks: tok_embedding = self.WORDS_LOOKUP[tok] buffer.append(Head(self.vocab.i2w[tok], tok_embedding)) while not (len(stack) == 1 and len(buffer) == 0): # based on parser state, get valid actions valid_actions = [] if len(buffer) > 0: # can only reduce if elements in buffer valid_actions += [SHIFT] if len(stack) >= 2: # can only shift if 2 elements on stack valid_actions += [REDUCE_L, REDUCE_R] # compute probability of each of the actions and choose an action # either from the oracle or if there is no oracle, based on the model action = valid_actions[0] log_probs = None if len(valid_actions) > 1: representations = self.extract_features(stack, buffer) h = dy.cube(W1*dy.concatenate(representations) + b1) logits = W_act * h + b_act log_probs = dy.log_softmax(logits, valid_actions) if oracle_actions is None: action = max(enumerate(log_probs.vec_value()), key=itemgetter(1))[0] if oracle_actions is not None: action = oracle_actions.pop() if log_probs is not None: # append the action-specific loss losses.append(dy.pick(log_probs, action)) # execute the action to update the parser state if action == SHIFT: token = buffer.pop() stack.append(token) else: # one of the reduce actions right = stack.pop() left = stack.pop() head, modifier = (left, right) if action == REDUCE_R else (right, left) #add the tokens and their embeddings into the children list if action == REDUCE_R: head.add_child(modifier, 'right') else: head.add_child(modifier, 'left') stack.append(head) if oracle_actions is None: print('{0} --> {1}'.format(head.word, modifier.word)) # the head of the tree that remains at the top of the stack is now the root if oracle_actions is None: head = stack.pop().word print('ROOT --> {0}'.format(head)) return -dy.esum(losses) if losses else None
def cube(x): return dy.cube(x)