def __init__(self,total_frames,video_size,spk_total_num): super(VIDEO_QUERY,self).__init__() self.total_frames=total_frames self.video_size=video_size self.spk_total_num=spk_total_num self.images_net=myNet.inception_v3(pretrained=True)#注意这个输出[2]才是最后的隐层状态 for para in self.images_net.parameters(): para.requires_grad=False self.size_hidden_image=2048 #抽取的图像的隐层向量的长度,Inception_v3对应的是2048 self.lstm_layer=nn.LSTM( input_size=self.size_hidden_image, hidden_size=config.HIDDEN_UNITS, num_layers=config.NUM_LAYERS, batch_first=True, bidirectional=True ) self.dense=nn.Linear(2*config.HIDDEN_UNITS,config.EMBEDDING_SIZE) #把输出的东西映射到embding_size的维度上 self.Linear=nn.Linear(config.EMBEDDING_SIZE,self.spk_total_num)
def __init__(self): super(FACE_HIDDEN, self).__init__() self.layer = nn.Linear(3 * 299 * 299, 1024) self.image_net = myNet.inception_v3(pretrained=True)
import torch from torch import nn from torch.autograd import Variable import torch.nn.functional as F import torch.optim as optim import torchvision.models as models import myNet torch.manual_seed(1) # class VIDEO_QUERY(nn.Module): # def __init__(self,total_frames,video_size): # self.images_net=models.inception_v3(pretrained=True) # # def forward(self,x): # mm=models.inception_v3() mm=myNet.inception_v3(1) # xx=Variable(torch.rand([2,3,299,299]))#standard size is 299*299. print mm(xx)[2].size()