def backprop_pytt_pooler_output(d_outputs, sgd=None):
     total_grads = []
     for doc, dY in zip(docs, d_outputs):
         if doc._.pytt_d_pooler_output.size == 0:
             xp = get_array_module(doc._.pytt_pooler_output)
             grads = xp.zeros(doc._.pytt_pooler_output.shape, dtype="f")
             doc._.pytt_d_pooler_output = grads
         doc._.pytt_d_pooler_output += dY
         xp = get_array_module(dY)
         total_grads.append(float(xp.abs(dY).sum()))
     return None
 def from_truncated(cls, square: Array,
                    lengths: List[int]) -> "RaggedArray":
     if len(lengths) != square.shape[0]:
         raise ValueError(
             "Truncated array must have shape[0] == len(lengths)")
     width = square.shape[1]
     max_len = max(lengths, default=0)
     extra_dims = square.shape[2:]
     if width == max_len:
         return RaggedArray(square, lengths)
     elif width > max_len:
         raise ValueError(
             "Expected width < max_len. Got {width} > {max_len}")
     xp = get_array_module(square)
     expanded = xp.zeros((sum(lengths), ) + extra_dims, dtype=square.dtype)
     # TODO: I know there's a way to do this without the loop :(. Escapes
     # me currently.
     start = 0
     for i, length in enumerate(lengths):
         # We could have a row that's actually shorter than the width,
         # if the array was padded. Make sure we don't get junk values.
         row_width = min(width, length)
         expanded[start:start + row_width] = square[i, :row_width]
         start += length
     return cls(expanded, lengths)
Esempio n. 3
0
def get_pytt_class_tokens(docs, drop=0.0):
    """Output a List[array], where the array is the class vector
    for each sentence in the document. To backprop, we increment the values
    in the doc._.pytt_d_last_hidden_state array.
    """
    xp = get_array_module(docs[0]._.pytt_last_hidden_state)
    outputs = []
    for doc in docs:
        wp_tensor = doc._.pytt_last_hidden_state
        class_vectors = []
        for sent in doc.sents:
            if sent._.pytt_start is not None:
                class_vectors.append(wp_tensor[sent._.pytt_start])
            else:
                class_vectors.append(
                    xp.zeros((wp_tensor.shape[-1], ), dtype="f"))
        Y = xp.vstack(class_vectors)
        outputs.append(Y)

    def backprop_pytt_class_tokens(d_outputs, sgd=None):
        for doc, dY in zip(docs, d_outputs):
            if doc._.pytt_d_last_hidden_state.size == 0:
                xp = get_array_module(doc._.pytt_last_hidden_state)
                grads = xp.zeros(doc._.pytt_last_hidden_state.shape, dtype="f")
                doc._.pytt_d_last_hidden_state = grads
            for i, sent in enumerate(doc.sents):
                if sent._.pytt_start is not None:
                    doc._.pytt_d_last_hidden_state[sent._.pytt_start] += dY[i]
        return None

    return outputs, backprop_pytt_class_tokens
Esempio n. 4
0
def get_class_tokens(docs, drop=0.0):
    """Output a List[array], where the array is the class vector
    for each sentence in the document. To backprop, we increment the values
    in the Doc's d_last_hidden_state array.
    """
    xp = get_array_module(docs[0]._.get(ATTRS.last_hidden_state))
    outputs = []
    doc_class_tokens = []
    for doc in docs:
        class_tokens = []
        for i, wp in enumerate(doc._.get(ATTRS.word_pieces_)):
            if is_class_token(wp):
                class_tokens.append(i)
        doc_class_tokens.append(xp.array(class_tokens, dtype="i"))
        wp_tensor = doc._.get(ATTRS.last_hidden_state)
        outputs.append(wp_tensor[doc_class_tokens[-1]])

    def backprop_class_tokens(d_outputs, sgd=None):
        for doc, class_tokens, dY in zip(docs, doc_class_tokens, d_outputs):
            if doc._.get(ATTRS.d_last_hidden_state).size == 0:
                xp = get_array_module(doc._.get(ATTRS.last_hidden_state))
                grads = xp.zeros(doc._.get(ATTRS.last_hidden_state).shape,
                                 dtype="f")
                doc._.set(ATTRS.d_last_hidden_state, grads)
            doc._.get(ATTRS.d_last_hidden_state)[class_tokens] += dY
        return None

    return outputs, backprop_class_tokens
Esempio n. 5
0
def cosine(vec1, vec2):
    xp = get_array_module(vec1)
    norm1 = xp.linalg.norm(vec1)
    norm2 = xp.linalg.norm(vec2)
    if norm1 == 0.0 or norm2 == 0.0:
        return 0
    else:
        return vec1.dot(vec2) / (norm1 * norm2)
Esempio n. 6
0
def cosine(vec1, vec2):
    xp = get_array_module(vec1)
    norm1 = xp.linalg.norm(vec1)
    norm2 = xp.linalg.norm(vec2)
    if norm1 == 0.0 or norm2 == 0.0:
        return 0
    else:
        return vec1.dot(vec2) / (norm1 * norm2)
Esempio n. 7
0
 def backprop_pooler_output(d_outputs, sgd=None):
     for doc, dY in zip(docs, d_outputs):
         if doc._.get(ATTRS.d_pooler_output).size == 0:
             xp = get_array_module(doc._.get(ATTRS.pooler_output))
             grads = xp.zeros(doc._.get(ATTRS.pooler_output).shape, dtype="f")
             doc._.set(ATTRS.d_pooler_output, grads)
         doc._.set(ATTRS.d_pooler_output, doc._.get(ATTRS.d_pooler_output) + dY)
     return None
Esempio n. 8
0
 def backprop_class_tokens(d_outputs, sgd=None):
     for doc, class_tokens, dY in zip(docs, doc_class_tokens, d_outputs):
         if doc._.get(ATTRS.d_last_hidden_state).size == 0:
             xp = get_array_module(doc._.get(ATTRS.last_hidden_state))
             grads = xp.zeros(doc._.get(ATTRS.last_hidden_state).shape, dtype="f")
             doc._.set(ATTRS.d_last_hidden_state, grads)
         doc._.get(ATTRS.d_last_hidden_state)[class_tokens] += dY
     return None
 def backprop_pytt_pooler_output(d_outputs, sgd=None):
     for doc, dY in zip(docs, d_outputs):
         if doc._.pytt_d_pooler_output.size == 0:
             xp = get_array_module(doc._.pytt_pooler_output)
             grads = xp.zeros(doc._.pytt_pooler_output.shape, dtype="f")
             doc._.pytt_d_pooler_output = grads
         doc._.pytt_d_pooler_output += dY
     return None
 def backprop_pytt_class_tokens(d_outputs, sgd=None):
     for doc, class_tokens, dY in zip(docs, doc_class_tokens, d_outputs):
         if doc._.pytt_d_last_hidden_state.size == 0:
             xp = get_array_module(doc._.pytt_last_hidden_state)
             grads = xp.zeros(doc._.pytt_last_hidden_state.shape, dtype="f")
             doc._.pytt_d_last_hidden_state = grads
         doc._.pytt_d_last_hidden_state[class_tokens] += dY
     return None
 def backprop_pytt_last_hidden(d_outputs, sgd=None):
     for doc, d_lh in zip(docs, d_outputs):
         xp = get_array_module(d_lh)
         shape = d_lh.shape
         dtype = d_lh.dtype
         if doc._.pytt_d_last_hidden_state.size == 0:
             doc._.pytt_d_last_hidden_state = xp.zeros(shape, dtype=dtype)
         doc._.pytt_d_last_hidden_state += d_lh
     return None
Esempio n. 12
0
def cosine_similarity(vec1, vec2) -> float:
    """Compute the cosine similarity of two vectors."""
    if vec1.all() == 0 or vec2.all() == 0:
        return 0.0
    xp = get_array_module(vec1)
    norm1 = xp.linalg.norm(vec1)
    norm2 = xp.linalg.norm(vec2)
    if norm1 == norm2:
        return 1.0
    return xp.dot(vec1, vec2) / (norm1 * norm2)
Esempio n. 13
0
 def from_padded(cls, padded: Array, lengths: List[int]) -> "RaggedArray":
     if max(lengths, default=0) > padded.shape[1]:
         return cls.from_truncated(padded, lengths)
     mask = lengths2mask(lengths)
     assert sum(mask) == sum(lengths)
     all_rows = padded.reshape((-1,) + padded.shape[2:])
     xp = get_array_module(all_rows)
     data = xp.ascontiguousarray(all_rows[mask])
     assert data.shape[0] == sum(lengths)
     return cls(data, lengths)
Esempio n. 14
0
def tanh(X, drop=0.0):
    xp = get_array_module(X)
    Y = xp.tanh(X)

    def backprop_tanh(dY, sgd=None):
        one = Y.dtype.type(1)
        dX = dY * (one - Y * Y)
        return dX

    return Y, backprop_tanh
Esempio n. 15
0
 def backprop_pytt_class_tokens(d_outputs, sgd=None):
     for doc, dY in zip(docs, d_outputs):
         if doc._.pytt_d_last_hidden_state.size == 0:
             xp = get_array_module(doc._.pytt_last_hidden_state)
             grads = xp.zeros(doc._.pytt_last_hidden_state.shape, dtype="f")
             doc._.pytt_d_last_hidden_state = grads
         for i, sent in enumerate(doc.sents):
             if sent._.pytt_start is not None:
                 doc._.pytt_d_last_hidden_state[sent._.pytt_start] += dY[i]
     return None
Esempio n. 16
0
 def backprop_last_hidden(d_outputs, sgd=None):
     for doc, d_lh in zip(docs, d_outputs):
         xp = get_array_module(d_lh)
         shape = d_lh.shape
         dtype = d_lh.dtype
         if doc._.get(ATTRS.d_last_hidden_state).size == 0:
             doc._.set(ATTRS.d_last_hidden_state,
                       xp.zeros(shape, dtype=dtype))
         doc._.set(ATTRS.d_last_hidden_state,
                   doc._.get(ATTRS.d_last_hidden_state) + d_lh)
     return None
 def backprop_tensors(d_tensors, sgd=None):
     for doc, d_t in zip(docs, d_tensor):
         # Count how often each word-piece token is represented. This allows
         # a weighted sum, so that we can make sure doc.tensor.sum()
         # equals wp_tensor.sum(). Do this with sensitivity to boundary tokens
         wp_rows, align_sizes = _get_boundary_sensitive_alignment(doc)
         d_lh = _get_or_set_d_last_hidden_state(doc)
         for i, word_piece_slice in enumerate(wp_rows):
             for j in word_piece_slice:
                 d_lh[j] += d_tensor[i]
         xp = get_array_module(d_lh)
         d_lh /= xp.array(align_sizes, dtype="f").reshape(-1, 1)
         return None
Esempio n. 18
0
def get_cossim_loss(yh, y):
    # Add a small constant to avoid 0 vectors
    yh = yh + 1e-8
    y = y + 1e-8
    # https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
    xp = get_array_module(yh)
    norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
    norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
    mul_norms = norm_yh * norm_y
    cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
    d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
    loss = xp.abs(cosine - 1).sum()
    return loss, -d_yh
def get_cossim_loss(yh, y):
    # Add a small constant to avoid 0 vectors
    yh = yh + 1e-8
    y = y + 1e-8
    # https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
    xp = get_array_module(yh)
    norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
    norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
    mul_norms = norm_yh * norm_y
    cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
    d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
    loss = xp.abs(cosine - 1).sum()
    return loss, -d_yh
Esempio n. 20
0
def logistic(X, drop=0.0):
    xp = get_array_module(X)
    if not isinstance(X, xp.ndarray):
        X = xp.asarray(X)
    # Clip to range (-10, 10)
    X = xp.minimum(X, 10.0, X)
    X = xp.maximum(X, -10.0, X)
    Y = 1.0 / (1.0 + xp.exp(-X))

    def logistic_bwd(dY, sgd=None):
        dX = dY * (Y * (1 - Y))
        return dX

    return Y, logistic_bwd
Esempio n. 21
0
def logistic(X, drop=0.0):
    xp = get_array_module(X)
    if not isinstance(X, xp.ndarray):
        X = xp.asarray(X)
    # Clip to range (-10, 10)
    X = xp.minimum(X, 10.0, X)
    X = xp.maximum(X, -10.0, X)
    Y = 1.0 / (1.0 + xp.exp(-X))

    def logistic_bwd(dY, sgd=None):
        dX = dY * (Y * (1 - Y))
        return dX

    return Y, logistic_bwd
Esempio n. 22
0
 def s2v_doc_similarity(self, obj1, other):
     """Make a semantic similarity estimate. The default estimate is cosine
     similarity using an average of word vectors.
     other (object): The object to compare with. By default, accepts `Doc`,
         `Span`, `Token` and `Lexeme` objects.
     RETURNS (float): A scalar similarity score. Higher is more similar.
     DOCS: https://spacy.io/api/doc#similarity
     """
     vector1 = self.get_s2v_doc_vector(obj1)
     vector2 = self.get_s2v_doc_vector(other)
     if len(vector1) == 0 or len(vector2) == 0:
         return -1.0
     xp = get_array_module(vector1)
     return xp.dot(vector1, vector2) / (self.vector_norm(vector1) *
                                        self.vector_norm(vector2))
Esempio n. 23
0
def tanh(X, drop=0.):
    xp = get_array_module(X)
    if not isinstance(X, xp.ndarray):
        X = xp.asarray(X)
    # Clip to range (-10, 10)
    X = xp.minimum(X, 10., X)
    X = xp.maximum(X, -10., X)
    e = xp.exp(2*X)
    Y = (e - 1.) / (e + 1.)

    def tanh_bwd(dY, sgd=None):
        dX = dY * (1 - Y * Y)
        return dX

    return Y, tanh_bwd
Esempio n. 24
0
def get_cossim_loss(yh, y, ignore_zeros=False):
    xp = get_array_module(yh)
    # Find the zero vectors
    if ignore_zeros:
        zero_indices = xp.abs(y).sum(axis=1) == 0
    # Add a small constant to avoid 0 vectors
    yh = yh + 1e-8
    y = y + 1e-8
    # https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
    norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
    norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
    mul_norms = norm_yh * norm_y
    cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
    d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
    losses = xp.abs(cosine - 1)
    if ignore_zeros:
        # If the target was a zero vector, don't count it in the loss.
        d_yh[zero_indices] = 0
        losses[zero_indices] = 0
    loss = losses.sum()
    return loss, -d_yh
Esempio n. 25
0
 def xp(self):
     return get_array_module(self.data)