def calculate_similarity(sentences, cache=None): """ """ tensor, len_sent = generate_tensor(sentences) cm = CosineSimilarity(dim=0) return cm.forward(tensor[len_sent[0] - 1, 0], tensor[len_sent[1] - 1, 1]), tensor, len_sent
def calculate_similarity(sentences, model, params, dico): """ """ sent_to_bpe = [sent for sent, lan in sentences] lan_sent = [lan for sent, lan in sentences] file = open("input_file", "w", encoding="utf-8") for sent in sent_to_bpe: file.write(sent + '\n') file.close() print('tokenizing and lowercasing data') process = subprocess.Popen( ".cat input_file | tools/tokenize.sh fr | python tools/lowercase_and_remove_accent.py > prep_input", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, err) = process.communicate( ) #now wait plus that you can send commands to process #This makes the wait possible p_status = process.wait() #This will give you the output of the command being executed print("Command tokenize output: ", output) print('executing bpe') process = subprocess.Popen( "./tools/fastBPE" + ("_local" if local else "") + "/fast applybpe output_file prep_input codes_xnli_15", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, err) = process.communicate( ) #now wait plus that you can send commands to process #This makes the wait possible p_status = process.wait() #This will give you the output of the command being executed print("Command fast output: ", output) print("reading file") f = open("output_file", "r") sentences = [x.rstrip('\n') for x in f] f.close() print(sentences) # sentences = bpe.apply(sent_to_bpe) # len_sent = [len(sent) for sent in sentences] sentences = zip(sentences, lan_sent) tensor, len_sent = generate_embedding_tensors(sentences, model, params, dico) cm = CosineSimilarity(dim=0) return cm.forward(tensor[len_sent[0] - 1, 0], tensor[len_sent[1] - 1, 1])
def cosine(x_dialogues): cos=CosineSimilarity(dim=-1) x = x_dialogues y = torch.narrow(x, dim=0, start=1, length=x.size(0)-1) y = torch.cat([y, torch.ones(1, y.size(1))], dim=0) scores = cos(x,y)[:-1].mean(dim=-1) # drop the last value as it is just a comparison to the 1-vector return scores
def __init__(self, args, vocab): super(ParaModel, self).__init__() self.args = args self.vocab = vocab self.gpu = args.gpu self.cosine = CosineSimilarity()
def similarity(model, dataset, i, j): sim = CosineSimilarity() a_img = dataset[i][0] b_img = dataset[j][0] imshow(a_img) imshow(b_img) a = model.get_embedding(a_img.unsqueeze(0).to(device)) b = model.get_embedding(b_img.unsqueeze(0).to(device)) print(f"sim(a,b) = {sim(a, b).item()}")
def try_untrained(): # dataset = TripletImageFolder(FOOD101_IMG_PATH) dataset = TripletImageFolder(OWN_IMG_PATH) embed = EmbeddingResnet().to(device) sim = CosineSimilarity() a, b, c = map(lambda x: x.unsqueeze(0).to(device), dataset[2]) print(f"sim(a,b) = {sim(embed(a), embed(b)).item()}") print(f"sim(a,c) = {sim(embed(a), embed(c)).item()}") print(f"sim(b,c) = {sim(embed(b), embed(c)).item()}")
def __init__(self, args, vocab, sp_file='simile-mrt/sim/sim.sp.30k.model'): super(ParaModel, self).__init__() self.args = args self.vocab = vocab self.gpu = args.gpu self.cosine = CosineSimilarity() self.sp = spm.SentencePieceProcessor() self.sp.Load(sp_file)
def test_query_support_distances(self): # Create some dummy data with easily verifiable distances q = 1 # 1 query per class k = 3 # 3 way classification d = 2 # embedding dimension of two query = torch.zeros([q * k, d], dtype=torch.double) query[0] = torch.Tensor([0, 0]) query[1] = torch.Tensor([0, 1]) query[2] = torch.Tensor([1, 0]) support = torch.zeros([k, d], dtype=torch.double) support[0] = torch.Tensor([1, 1]) support[1] = torch.Tensor([1, 2]) support[2] = torch.Tensor([2, 2]) distances = pairwise_distances(query, support, 'l2') self.assertEqual(distances.shape, (q * k, k), 'Output should have shape (q * k, k).') # Calculate squared distances by iterating through all query-support pairs for i, q_ in enumerate(query): for j, s_ in enumerate(support): self.assertEqual((q_ - s_).pow(2).sum(), distances[i, j].item( ), 'The jth column of the ith row should be the squared distance between the ' 'ith query sample and the kth query sample') # Create some dummy data with easily verifiable distances q = 1 # 1 query per class k = 3 # 3 way classification d = 2 # embedding dimension of two query = torch.zeros([q * k, d], dtype=torch.double) query[0] = torch.Tensor([1, 0]) query[1] = torch.Tensor([0, 1]) query[2] = torch.Tensor([1, 1]) support = torch.zeros([k, d], dtype=torch.double) support[0] = torch.Tensor([1, 1]) support[1] = torch.Tensor([-1, -1]) support[2] = torch.Tensor([0, 2]) distances = pairwise_distances(query, support, 'cosine') # Calculate distances by iterating through all query-support pairs for i, q_ in enumerate(query): for j, s_ in enumerate(support): self.assertTrue( torch.isclose(1 - CosineSimilarity(dim=0)(q_, s_), distances[i, j], atol=2e-8), 'The jth column of the ith row should be the squared distance between the ' 'ith query sample and the kth query sample')
def compute_cosine_matrix(self, A, B): similarity = CosineSimilarity() similarity = PairwiseDistance() print(A.size(), B.size()) num_A, _ = A.size() num_B, _ = B.size() _matrix = np.zeros((num_A, num_B)) for i in range(num_A): vA = A[i, :].unsqueeze(0) vA = vA.cuda() for j in range(num_B): vB = B[j, :].unsqueeze(0) vB = vB.cuda() value = similarity(vA, vB) # print(value) _matrix[i, j] = value.item() return _matrix
def try_trained(): # dataset = TripletImageFolder(FOOD101_IMG_PATH) dataset = ImageFolder(FOOD101_IMG_PATH, transform=transforms.Compose([ transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize((0.486, 0.459, 0.408), (0.229, 0.224, 0.225)) ])) model = torch.load(MODEL_PATH, pickle_module=dill) sim = CosineSimilarity() a, b, c = map(lambda x: x.unsqueeze(0).to(device), [dataset[95000][0], dataset[93003][0], dataset[93004][0]]) a, b, c = map(model.get_embedding, (a, b, c)) print(f"sim(a,b) = {sim(a, b).item()}") print(f"sim(a,c) = {sim(a, c).item()}") print(f"sim(b,c) = {sim(b, c).item()}")
def __init__(self, data, args, vocab, vocab_fr): super(ParaModel, self).__init__() self.data = data self.args = args self.gpu = args.gpu self.save_interval = args.save_interval if "report_interval" in args: self.report_interval = args.report_interval else: self.report_interval = args.save_interval self.vocab = vocab self.rev_vocab = {v: k for k, v in vocab.items()} self.vocab_fr = vocab_fr self.ngrams = args.ngrams self.delta = args.delta self.pool = args.pool self.dropout = args.dropout self.share_encoder = args.share_encoder self.share_vocab = args.share_vocab self.scramble_rate = args.scramble_rate self.zero_unk = args.zero_unk self.batchsize = args.batchsize self.max_megabatch_size = args.megabatch_size self.curr_megabatch_size = 1 self.megabatch = [] self.megabatch_anneal = args.megabatch_anneal self.increment = False self.sim_loss = nn.MarginRankingLoss(margin=self.delta) self.cosine = CosineSimilarity() self.embedding = nn.Embedding(len(self.vocab), self.args.dim) if self.vocab_fr is not None: self.embedding_fr = nn.Embedding(len(self.vocab_fr), self.args.dim) self.sp = None if args.sp_model: self.sp = spm.SentencePieceProcessor() self.sp.Load(args.sp_model)
def __init__(self, src_emb, tgt_emb, mapping, params): """ Initialize trainer script. """ self.src_emb = src_emb self.tgt_emb = tgt_emb self.src_dico = params.src_dico self.tgt_dico = getattr(params, 'tgt_dico', None) self.mapping = mapping self.params = params # optimizers if hasattr(params, 'mapping_optimizer'): optim_fn, optim_params = get_optimizer(params.mapping_optimizer) self.mapping_optimizer = optim_fn(mapping.parameters(), **optim_params) self.criterion = CosineSimilarity( ) #ContrastiveLoss(margin=0.0, measure='cosine') self.criterionRCSLS = RCSLS() # best validation score self.best_valid_metric = -1e12 self.decrease_lr = False
def __init__(self, vocab: Vocabulary, char_embedder: TextFieldEmbedder, word_embedder: TextFieldEmbedder, tokens_encoder: Seq2SeqEncoder, model_args, inp_drop_rate: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: """ :param vocab: vocabulary from train and dev dataset :param char_embedder: character embedding + cnn encoder :param word_embedder: word embedding :param tokens_encoder: Bi-LSTM backbone for split :param model_args: model arguments :param inp_drop_rate: input dropout rate """ super(FollowUpSnippetModel, self).__init__(vocab, regularizer) self.tokens_encoder = tokens_encoder self.projection_layer = torch.nn.Linear( in_features=word_embedder.get_output_dim() + 1 + char_embedder.get_output_dim(), out_features=self.tokens_encoder.get_input_dim(), bias=False) # integer to mark field, 0 or 1 self.num_classes = 2 self.num_conflicts = 2 self._non_linear = torch.nn.PReLU() self.hidden_size = int(self.tokens_encoder.get_output_dim() / 2) self.policy_net = PolicyNet(self.tokens_encoder.get_output_dim() * 3, self.num_classes) self.token_field_embedding = word_embedder self.char_field_embedding = char_embedder self._scaled_value = 1.0 self._self_attention = CosineMatrixAttention() self.margin_loss = MarginRankingLoss(margin=model_args.margin) # calculate span similarity self.cosine_similar = CosineSimilarity(dim=0) if inp_drop_rate > 0: self._variational_dropout = InputVariationalDropout(p=inp_drop_rate) else: self._variational_dropout = lambda x: x self.metrics = { "bleu": BLEUScore(), "reward": RewardScore(), "symbol": SymbolScore(), "reward_var": RewardScore(), "overall": RewardScore() } initializer(self)
if __name__ == '__main__': import cv2 import dlib import matplotlib.pyplot as plt from torch.nn.modules.distance import PairwiseDistance, CosineSimilarity detector = dlib.get_frontal_face_detector() shaper = dlib.shape_predictor( './models/shape_predictor_68_face_landmarks.dat') model = InceptionResnetV1('casia-webface', device='cuda') model.eval() pwd = PairwiseDistance().cuda() cos = CosineSimilarity().cuda() image1 = cv2.imread( '../Data/LFW/lfw-deepfunneled/Abdullah/Abdullah_0001.jpg') image1 = image1[:, :, ::-1] faces1 = detector(image1) alined1 = dlib.get_face_chip(image1, shaper(image1, faces1[0]), size=160) inp1 = prewhiten(torch.tensor(alined1, dtype=torch.float32)) inp1 = inp1.permute(2, 0, 1).unsqueeze(0) out1 = model(inp1.cuda()) image2 = cv2.imread( '../Data/LFW/lfw-deepfunneled/Abdullah/Abdullah_0003.jpg') image2 = image2[:, :, ::-1] faces2 = detector(image2) alined2 = dlib.get_face_chip(image2, shaper(image2, faces2[0]), size=160)
def __init__(self, args, device): super(CosineCoherence, self).__init__() self.seed = args.seed self.cos = CosineSimilarity(dim=-1) self.emb = GloveEmbedding(args) self.device = device
@author: tonyparker Credits to [1] G. Lample *, A. Conneau * [*Cross-lingual Language Model Pretraining*](https://arxiv.org/abs/1901.07291) """ import os import torch import numpy.matlib import numpy as np from src.utils import AttrDict from src.data.dictionary import Dictionary, BOS_WORD, EOS_WORD, PAD_WORD, UNK_WORD, MASK_WORD from src.model.transformer import TransformerModel import subprocess from torch.nn.modules.distance import CosineSimilarity cm = CosineSimilarity(dim=0) #On initialise le modèle model_path = './mlm_tlm_xnli15_1024.pth' reloaded = torch.load(model_path) params = AttrDict(reloaded['params']) print("Supported languages: %s" % ", ".join(params.lang2id.keys())) # build dictionary / update parameters dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'], reloaded['dico_counts']) params.n_words = len(dico) params.bos_index = dico.index(BOS_WORD) params.eos_index = dico.index(EOS_WORD) params.pad_index = dico.index(PAD_WORD) params.unk_index = dico.index(UNK_WORD)
import tqdm import numpy as np import rouge import edlib import os import pandas as pd import re import glob import sys sys.path.append("./BERT/pytorch-pretrained-BERT-master") sys.path.append("./BERT") from pytorch_pretrained_bert import BertTokenizer, BertModel from wmd import WMD from torch.nn.modules.distance import CosineSimilarity torch_emb_sim = CosineSimilarity() from bert_score import score as bert_score nlp = spacy.load('en_core_web_md') nlp.add_pipe(WMD.SpacySimilarityHook(nlp), last=True) def _clean_text(txt): return txt.lower() class CFRInstance(object): def __init__( self, original_context: str,
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.similarity = CosineSimilarity(dim=1)
def __init__(self): super().__init__() self.cossim = CosineSimilarity()