Beispiel #1
0
 def cost(self, speech, simembed):
     speech_enc = self.SpeechEncoderTop(self.SpeechEncoderBottom(speech))
     embed_enc = simembed
     scores = loss.cosine_matrix(speech_enc, embed_enc)
     cost = loss.contrastive(scores, margin=self.config['margin_size'])
     return cost
Beispiel #2
0
 def cost(self, speech, text):
     speech_enc = self.SpeechEncoderTop(self.SpeechEncoderBottom(speech))
     text_enc = self.TextEncoderTop(self.TextEncoderBottom(text))
     scores = loss.cosine_matrix(speech_enc, text_enc)
     cost = loss.contrastive(scores, margin=self.config['margin_size'])
     return cost
Beispiel #3
0
 def cost(self, speech, image):
     speech_enc = self.SpeechEncoderTop(self.SpeechEncoderBottom(speech))
     image_enc = self.ImageEncoder(image)
     scores = loss.cosine_matrix(speech_enc, image_enc)
     cost = loss.contrastive(scores, margin=self.config['margin_size'])
     return cost
Beispiel #4
0
 def cost(self, text, image):
     text_enc = self.TextEncoderTop(self.TextEncoderBottom(text))
     image_enc = self.ImageEncoder(image)
     scores = loss.cosine_matrix(text_enc, image_enc)
     cost = loss.contrastive(scores, margin=self.config['margin_size'])
     return cost
Beispiel #5
0
 def cost(self, beg, end):
     beg_encoded = F.normalize(self.ProjBeg(self.Encode(beg)), p=2, dim=1)
     end_encoded = F.normalize(self.ProjEnd(self.Encode(end)), p=2, dim=1)
     scores = cosine_matrix(beg_encoded, end_encoded)
     return contrastive(scores, margin=self.config['audio']['margin_size'])