def disambiguate_samples(data, distance):
    n_before = data.shape[0]
    # it would be better if centroids were calculated without the duplicates
    centroids = utils.get_centroids(data[:, :-1], data[:, -1])
    to_remove = set()
    to_add = []
    for i in range(n_before):
        if i in to_remove:
            continue
        ambiguity = False
        for j in range(i + 1, n_before):
            if sum(abs(data[i, :-1] - data[j, :-1]) > 0) == 0:
                ambiguity = True
                to_remove.add(i)
                to_remove.add(j)
        if ambiguity:
            dist = np.array([[distance(data[i, :-1], centroids[c]), c]
                             for c in centroids.keys()])
            data[i, -1] = dist[np.argmin(dist[:, 0]), 1]
            to_add.append(data[i, :])
    data = np.delete(data, list(to_remove), axis=0)
    data = np.vstack([data, np.array(to_add)])
    if n_before - data.shape[0] > 0:
        print('\tRemoved {} ambiguous samples.'.format(n_before -
                                                       data.shape[0]))
    return data
 def forward(self, embeddings):
     torch.clamp(self.w, 1e-6)
     centroids = get_centroids(embeddings)
     cossim = get_cossim(embeddings, centroids)
     sim_matrix = self.w * cossim.to(self.device) + self.b
     loss, _ = calc_loss(sim_matrix)
     return loss
Beispiel #3
0
 def forward(self, embeddings, y=None):
     #pdb.set_trace()
     torch.clamp(self.w, 1e-6)
     centroids = get_centroids(embeddings)
     cossim = get_cossim(embeddings, centroids)
     sim_matrix = self.w*cossim + self.b
     loss, _ = calc_loss(sim_matrix)
     return loss
Beispiel #4
0
    def forward(self, embeddings):
        torch.clamp(self.w, hp.re_num)

        centroids = utils.get_centroids(embeddings)
        cossim = utils.get_cossim(embeddings, centroids)

        sim_matrix = self.w * cossim + self.b
        loss, _ = utils.cal_loss(sim_matrix)

        return loss
Beispiel #5
0
 def forward(self, embeddings, embedder_net, lamb):
     torch.clamp(self.w, 1e-6)
     centroids = get_centroids(embeddings)
     cossim = get_cossim(embeddings, centroids)
     sim_matrix = self.w * cossim.to(self.device) + self.b
     per_loss, _ = calc_loss(sim_matrix)
     weights = embedder_net.LSTM_stack.all_weights
     norm_loss = lamb * torch.sum(
         torch.Tensor([
             torch.norm(weights[i][j].data.to(self.device), 2)
             for i in range(hp.model.num_layer) for j in range(4)
         ]))
     loss = per_loss + norm_loss
     return loss, per_loss, norm_loss