Esempio n. 1
0
    def learn(self, characters, target_mgc, guided_att=True):
        num_mgc = target_mgc.shape[0]
        # print num_mgc
        dy.renew_cg()
        output_mgc, output_stop, output_attention = self._predict(
            characters, target_mgc)
        losses = []
        index = 0
        for mgc, real_mgc in zip(output_mgc, target_mgc):
            t_mgc = dy.inputVector(real_mgc)
            # losses.append(self._compute_binary_divergence(mgc, t_mgc) )
            losses.append(dy.l1_distance(mgc, t_mgc))

            if index % 3 == 0:
                # attention loss
                if guided_att:
                    att = output_attention[index / 3]
                    losses.append(
                        self._compute_guided_attention(att, index / 3,
                                                       len(characters) + 2,
                                                       num_mgc / 3))
                # EOS loss
                stop = output_stop[index / 3]
                if index >= num_mgc - 6:
                    losses.append(dy.l1_distance(stop, dy.scalarInput(-0.8)))
                else:
                    losses.append(dy.l1_distance(stop, dy.scalarInput(0.8)))
            index += 1
        loss = dy.esum(losses)
        loss_val = loss.value() / num_mgc
        loss.backward()
        self.trainer.update()
        return loss_val
Esempio n. 2
0
)  # add a noise to each element from a gausian with standard-dev = stddev
dy.dropout(e1, p)  # apply dropout with probability p

# functions over lists of expressions
e = dy.esum([e1, e2, ...])  # sum
e = dy.average([e1, e2, ...])  # average
e = dy.concatenate_cols(
    [e1, e2, ...]
)  # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...])
e = dy.concatenate([e1, e2, ...])  # concatenate

e = dy.affine_transform([e0, e1, e2, ...])  # e = e0 + ((e1*e2) + (e3*e4) ...)

## Loss functions
e = dy.squared_distance(e1, e2)
e = dy.l1_distance(e1, e2)
e = dy.huber_distance(e1, e2, c=1.345)

# e1 must be a scalar that is a value between 0 and 1
# e2 (ty) must be a scalar that is a value between 0 and 1
# e = ty * log(e1) + (1 - ty) * log(1 - e1)
e = dy.binary_log_loss(e1, e2)

# e1 is row vector or scalar
# e2 is row vector or scalar
# m is number
# e = max(0, m - (e1 - e2))
e = dy.pairwise_rank_loss(e1, e2, m=1.0)

# Convolutions
# e1 \in R^{d x s} (input)
Esempio n. 3
0
# define trainable projection layer from word dim to phrase dim
# this simplifies concatenation and allows us to treat the recursive base case as a phrase of its own
word_to_phrase_projection = model.add_parameters((config.sent_dim, word_dim))


# define graph building operation
def generate_graph(parse):
    parse_graph = parse.to_tree()
    return graph_gen_helper(parse_graph)


def graph_gen_helper(node):
    node_value = word_to_phrase_projection * embeddings[node.data.form]

    for child in node:
        child_subtree = graph_gen_helper(child)

        # concatenate the node so far with the subtree, select layer according to dep reln
        node_value = dep_layers[child.data.deprel] * dynet.concatenate(
            [node_value, child_subtree])

    return node_value


# run training
for parse, y_pred in zip(parse_train, y_preds):
    y_pred = generate_graph(parse)
    loss = dynet.l1_distance(dynet.l2_norm(y_pred), dynet.l2_norm(y))

# run eval