def __init__(self, pretrained_matrix, embed_size, **modelParams): super(HybridIMP, self).__init__() self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False sigma = 1 if 'init_sigma' not in modelParams else modelParams[ 'init_sigma'] alpha = 0.1 if 'alpha' not in modelParams else modelParams['alpha'] self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, padding_idx=0) # self.EmbedNorm = nn.LayerNorm(embed_size) self.EmbedDrop = nn.Dropout(modelParams['dropout']) hidden_dim = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] # self.Encoder = BiLstmCellEncoder(input_size=embed_size, **modelParams) self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) self.Decoder = CNNEncoder1D([hidden_dim, hidden_dim]) # TODO: 使用Sigma self.Sigma = nn.Parameter(t.FloatTensor([sigma])) self.ALPHA = alpha self.Dim = hidden_dim self.NumClusterSteps = 1 if 'cluster_num_step' not in modelParams else modelParams[ 'cluster_num_step'] self.Clusters = None self.ClusterLabels = None
def __init__(self, pretrained_matrix, embed_size, hidden=128, layer_num=1, self_attention=False, self_att_dim=64, word_cnt=None): super(RelationNet, self).__init__() # 可训练的嵌入层 if pretrained_matrix is not None: self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) else: self.Embedding = nn.Embedding(word_cnt, embedding_dim=embed_size, padding_idx=0) self.EmbedNorm = nn.LayerNorm(embed_size) self.Encoder = BiLstmEncoder( embed_size, #64 hidden_size=hidden, layer_num=layer_num, self_attention=self_attention, self_att_dim=self_att_dim, useBN=False) self.Relation = nn.Sequential()
def __init__(self, pretrained_matrix, embed_size, ntn_hidden=100, routing_iters=3, word_cnt=None, **modelParams): super(InductionNet, self).__init__() self.DataParallel = modelParams['data_parallel'] if 'data_parallel' in modelParams else False self.Iters = routing_iters if pretrained_matrix is not None: self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, padding_idx=0) else: self.Embedding = nn.Embedding(word_cnt, embedding_dim=embed_size, padding_idx=0) self.EmbedDrop = nn.Dropout(modelParams['dropout']) self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) self.MiddleEncoder = None hidden_size = (1 + modelParams['bidirectional']) * modelParams['hidden_size'] self.Decoder = CNNEncoder1D([hidden_size, hidden_size]) self.Transformer = nn.Linear(hidden_size, hidden_size) self.NTN = NTN(hidden_size , hidden_size, ntn_hidden)
def __init__(self, pretrained_matrix, embed_size, feat_avg='pre', contrastive_factor=None, **modelParams): super(AFEAT, self).__init__() self.Avg = feat_avg self.ContraFac = contrastive_factor self.DisTempr = modelParams[ 'temperature'] if 'temperature' in modelParams else 1 # 可训练的嵌入层 self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) self.EmbedNorm = nn.LayerNorm(embed_size) self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) self.Decoder = CNNEncoder1D([ (modelParams['bidirectional'] + 1) * modelParams['hidden_size'], (modelParams['bidirectional'] + 1) * modelParams['hidden_size'] ]) self.SetFunc = DeepAffine( embed_dim=(modelParams['bidirectional'] + 1) * modelParams['hidden_size'], dropout=modelParams['dropout'])
def __init__(self, n, pretrained_matrix, embed_size, seq_len, **kwargs): super(BaseLearner, self).__init__() # 需要adapt的参数名称 self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False, padding_idx=0) self.EmbedNorm = nn.LayerNorm(embed_size) self.Encoder = BiLstmEncoder(input_size=embed_size, **kwargs) #CNNEncoder1D(**kwargs) self.Attention = nn.Linear(2 * kwargs['hidden_size'], 1, bias=False) # self.Attention = AttnReduction(input_dim=2*kwargs['hidden_size']) # out_size = kwargs['hidden_size'] # self.fc = nn.Linear(seq_len, n) self.fc = nn.Linear(kwargs['hidden_size'] * 2, n) # 对于双向lstm,输出维度是隐藏层的两倍 # 对于CNN,输出维度是嵌入维度 self.adapted_keys = [] # [ # # 'Attention.IntAtt.weight', # # 'Attention.ExtAtt.weight', # 'Attention.weight', # 'fc.weight', # 'fc.bias'] self.addAdaptedKeys()
def __init__(self, n, pretrained_matrix, embed_size, seq_len, **modelParams): super(BaseLearner, self).__init__() # 需要adapt的参数名称 self.adapted_keys = [ # 'Attention.IntAtt.weight', # 'Attention.ExtAtt.weight', 'Attention.weight', 'fc.weight', 'fc.bias' ] self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, padding_idx=0) # self.EmbedNorm = nn.LayerNorm(embed_size) self.EmbedDrop = nn.Dropout(modelParams['dropout']) self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) #CNNEncoder1D(**kwargs) # self.Encoder = TemporalConvNet(**kwargs) directions = 1 + modelParams['bidirectional'] self.Attention = nn.Linear(directions * modelParams['hidden_size'], 1, bias=False) # self.Attention = nn.Linear(2*kwargs['hidden_size'], 1, bias=False) # self.Attention = AttnReduction(input_dim=2*kwargs['hidden_size']) # out_size = kwargs['hidden_size'] # self.fc = nn.Linear(seq_len, n) self.fc = nn.Linear(directions * modelParams['hidden_size'], n)
def __init__(self, pretrained_matrix, embed_size, feat_avg='pre', contrastive_factor=None, **modelParams): super(FEAT, self).__init__() self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False self.Avg = feat_avg self.ContraFac = contrastive_factor self.DisTempr = modelParams[ 'temperature'] if 'temperature' in modelParams else 1 # self.Encoder = FastTextEncoder(pretrained_matrix, # embed_size, # modelParams['dropout']) # 可训练的嵌入层 self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) self.EmbedDrop = nn.Dropout(modelParams['dropout']) # self.EmbedNorm = nn.LayerNorm(embed_size) # self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) self.MiddleEncoder = None # self.Encoder = TemporalConvNet(num_inputs=embed_size, # init_hidden_channel=modelParams['tcn_init_channel'], # num_channels=modelParams['tcn_channels']) self.Decoder = CNNEncoder1D([ (modelParams['bidirectional'] + 1) * modelParams['hidden_size'], (modelParams['bidirectional'] + 1) * modelParams['hidden_size'] ]) # self.Decoder = StepMaxReduce() if modelParams['set_function'] == 'deepset': self.SetFunc = DeepSet( embed_dim=(modelParams['bidirectional'] + 1) * modelParams['hidden_size'], **modelParams) elif modelParams['set_function'] == 'transformer': self.SetFunc = TransformerSet( trans_input_size=(modelParams['bidirectional'] + 1) * modelParams['hidden_size'], **modelParams) else: raise ValueError('Unrecognized set function type:', modelParams['set_function'])
def __init__(self, n, pretrained_matrix, embed_size, **modelParams): super(BaseLearner, self).__init__() self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False, padding_idx=0) # self.EmbedNorm = nn.LayerNorm(embed_size) self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) hidden_size = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] self.Decoder = CNNEncoder1D([hidden_size, hidden_size]) # out_size = kwargs['hidden_size'] self.fc = nn.Linear(hidden_size, n) # 对于双向lstm,输出维度是隐藏层的两倍
def __init__(self, k, pretrained_matrix, embed_size, **modelParams): super(HAPNet, self).__init__() self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False # 可训练的嵌入层 self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) self.EmbedDrop = nn.Dropout(modelParams['dropout']) # self.EmbedNorm = nn.LayerNorm(embed_size) # self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) # self.Encoder = TemporalConvNet(**modelParams) self.MiddleEncoder = None # 嵌入后的向量维度 feature_dim = (1 + modelParams['bidirectional']) * modelParams[ 'hidden_size'] #modelParams['num_channels'][-1]# self.Decoder = CNNEncoder1D(num_channels=[feature_dim, feature_dim]) # 获得样例注意力的模块 # 将嵌入后的向量拼接成单通道矩阵后,有多少个支持集就为几个batch if k % 2 == 0: warnings.warn( "K=%d是偶数将会导致feature_attention中卷积核的宽度为偶数,因此部分将会发生一些变化") attention_paddings = [(k // 2, 0), (k // 2, 0), (0, 0)] else: attention_paddings = [(k // 2, 0), (k // 2, 0), (0, 0)] attention_channels = [1, 32, 64, 1] attention_strides = [(1, 1), (1, 1), (k, 1)] attention_kernels = [(k, 1), (k, 1), (k, 1)] attention_relus = ['leaky', 'leaky', 'leaky'] self.FeatureAttention = nn.Sequential(*[ CNNBlock2D(attention_channels[i], attention_channels[i + 1], attention_strides[i], attention_kernels[i], attention_paddings[i], attention_relus[i], pool=None) for i in range(len(attention_channels) - 1) ]) # 获得样例注意力的模块 # 将support重复query次,query重复n*k次,因为每个support在每个query下嵌入都不同 self.InstanceAttention = InstanceAttention(feature_dim, feature_dim)
def __init__(self, k, pretrained_matrix, embed_size, **modelParams): super(ConvProtoNet, self).__init__() self.DataParallel = modelParams['data_parallel'] if 'data_parallel' in modelParams else False # 可训练的嵌入层 self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) self.EmbedDrop = nn.Dropout(modelParams['dropout']) # self.EmbedNorm = nn.LayerNorm(embed_size) # self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) self.MiddleEncoder = None # self.Encoder = TemporalConvNet(num_inputs=embed_size, # init_hidden_channel=modelParams['tcn_init_channel'], # num_channels=modelParams['tcn_channels']) self.Decoder = CNNEncoder1D([(modelParams['bidirectional']+1)*modelParams['hidden_size'], (modelParams['bidirectional']+1)*modelParams['hidden_size']]) if k%2==0: warnings.warn("K=%d是偶数将会导致feature_attention中卷积核的宽度为偶数,因此部分将会发生一些变化") attention_paddings = [(k // 2, 0), (k // 2, 0), (0, 0)] else: attention_paddings = [(k // 2, 0), (k // 2, 0), (0, 0)] attention_channels = [1,32,64,1] attention_strides = [(1,1),(1,1),(k,1)] attention_kernels = [(k,1),(k,1),(k,1)] attention_relus = ['relu','relu',None] self.Induction = nn.Sequential( *[CNNBlock2D(attention_channels[i], attention_channels[i + 1], attention_strides[i], attention_kernels[i], attention_paddings[i], attention_relus[i], pool=None) for i in range(len(attention_channels) - 1)] )
def __init__(self, pretrained_matrix, embed_size, **modelParams): super(SIMPLE, self).__init__() self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False sigma = 1 if 'init_sigma' not in modelParams else modelParams[ 'init_sigma'] alpha = 0.1 if 'alpha' not in modelParams else modelParams['alpha'] self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, padding_idx=0) self.EmbedNorm = nn.LayerNorm(embed_size) self.EmbedDrop = nn.Dropout(modelParams['dropout']) hidden_size = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] #-------------------------------------------------------------------------- self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) # self.Encoder = BiLstmCellEncoder(input_size=embed_size, **modelParams) # self.Encoder = TransformerEncoder(embed_size=embed_size, **modelParams) #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- self.MiddleEncoder = None #MultiHeadAttention(mhatt_input_size=hidden_size, **modelParams) #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- self.Decoder = CNNEncoder1D([hidden_size, hidden_size]) # self.Decoder = SelfAttnReduction(input_size=hidden_size, **modelParams) # self.Decoder = BiliAttnReduction(input_dim=hidden_size, **modelParams) # self.Decoder = StepMaxReduce() #-------------------------------------------------------------------------- # TODO: 使用Sigma self.Sigma = nn.Parameter(t.FloatTensor([sigma])) self.ALPHA = alpha self.Dim = (1 + modelParams['bidirectional']) * modelParams['hidden_size'] self.NumClusterSteps = 1 if 'cluster_num_step' not in modelParams else modelParams[ 'cluster_num_step'] self.Clusters = None self.ClusterLabels = None
def __init__(self, pretrained_matrix, embed_size, **modelParams): super(NnNet, self).__init__() self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False # 可训练的嵌入层 self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) self.EmbedDrop = nn.Dropout(modelParams['dropout']) hidden_size = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) # self.Encoder = BiLstmCellEncoder(input_size=embed_size, **modelParams) self.Decoder = CNNEncoder1D([hidden_size, hidden_size])
def __init__(self, n, loss_fn, pretrained_matrix, embed_size, word_cnt=None, lr=0.01, **modelParams): super(FT, self).__init__() self.Lr = lr self.LossFn = loss_fn self.DistTemp = modelParams[ 'temperature'] if 'temperature' in modelParams else 1 self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False # 可训练的嵌入层 if pretrained_matrix is not None: self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) else: self.Embedding = nn.Embedding(word_cnt, embedding_dim=embed_size, padding_idx=0) # self.EmbedNorm = nn.LayerNorm(embed_size) self.EmbedDrop = nn.Dropout(modelParams['dropout']) hidden_size = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] #------------------------------------------------------------------------ self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) # self.Encoder = BiLstmCellEncoder(input_size=embed_size, **modelParams) #------------------------------------------------------------------------ #------------------------------------------------------------------------ self.MiddleEncoder = None #MultiHeadAttention(mhatt_input_size=hidden_size, **modelParams) #------------------------------------------------------------------------ self.Decoder = CNNEncoder1D([hidden_size, hidden_size]) self.Classifier = nn.Linear(hidden_size, n)
def __init__( self, channels=[1, 32, 64, 64], # 默认3个卷积层 lstm_input_size=64 * 2 * 2, # matrix大小为10×10,两次池化为2×2 strides=None, hidden_size=64, layer_num=1, self_att_dim=32): super(CNNLstmProtoNet, self).__init__() self.Embedding = CNNEncoder( channels=channels, strides=strides, flatten=False, # 保留序列信息 pools=[True, True, False]) self.LstmEncoder = BiLstmEncoder(input_size=lstm_input_size, hidden_size=hidden_size, layer_num=layer_num, self_att_dim=self_att_dim)
def __init__(self, n, pretrained_matrix, embed_size, seq_len, **modelParams): super(BaseLearner, self).__init__() # 需要adapt的参数名称 self.adapted_keys = [ # 'Attention.IntAtt.weight', # 'Attention.ExtAtt.weight', # 'Attention.Encoder.0.0.weight', # 'Attention.Encoder.0.1.weight', # 'Attention.Encoder.0.1.bias', 'Attention.weight', 'fc.weight', 'fc.bias' ] self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False, padding_idx=0) # self.EmbedNorm = nn.LayerNorm(embed_size) self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) #CNNEncoder1D(**kwargs) # self.Encoder = TemporalConvNet(**kwargs) # self.Encoder = TransformerEncoder(embed_size=embed_size, # **kwargs) # self.Attention = nn.Linear(kwargs['num_channels'][-1], # 1, bias=False) hidden_size = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] self.Attention = nn.Linear(hidden_size, 1, bias=False) # self.Attention = CNNEncoder1D(dims=[kwargs['hidden_size']*2, 256], # bn=[False]) # self.Attention = AttnReduction(input_dim=2*kwargs['hidden_size']) # out_size = kwargs['hidden_size'] # self.fc = nn.Linear(seq_len, n) self.fc = nn.Linear(hidden_size, n)
def __init__(self, pretrained_matrix, embed_size, word_cnt=None, **modelParams): super(ProtoNet, self).__init__() self.DistTemp = modelParams[ 'temperature'] if 'temperature' in modelParams else 1 self.DataParallel = modelParams[ 'data_parallel'] if 'data_parallel' in modelParams else False # 可训练的嵌入层 if pretrained_matrix is not None: self.Embedding = nn.Embedding.from_pretrained(pretrained_matrix, freeze=False) else: self.Embedding = nn.Embedding(word_cnt, embedding_dim=embed_size, padding_idx=0) # self.EmbedNorm = nn.LayerNorm(embed_size) self.EmbedDrop = nn.Dropout(modelParams['dropout']) hidden_size = ( 1 + modelParams['bidirectional']) * modelParams['hidden_size'] #------------------------------------------------------------------------ self.Encoder = BiLstmEncoder(input_size=embed_size, **modelParams) # self.Encoder = BiLstmCellEncoder(input_size=embed_size, **modelParams) #------------------------------------------------------------------------ #------------------------------------------------------------------------ self.MiddleEncoder = None #MultiHeadAttention(mhatt_input_size=hidden_size, **modelParams) #------------------------------------------------------------------------ # self.Encoder = TransformerEncoder(embed_size=embed_size, **modelParams) # self.Encoder = CNNEncoder2D(dims=[1, 64, 128, 256, 256], # kernel_sizes=[3,3,3,3], # paddings=[1,1,1,1], # relus=[True,True,True,True], # pools=['max','max','max','ada']) # self.Encoder = CNNEncoder1D(**modelParams) # self.Encoder = CNNEncoder1D(**kwargs) # self.Encoder = BiLstmEncoder(embed_size, # 64 # hidden_size=hidden, # layer_num=layer_num, # self_att_dim=self_att_dim, # useBN=False) # self.Encoder = TemporalConvNet(**modelParams) # self.Encoder = nn.ModuleList([ # BiLstmEncoder(embed_size, # 64 # hidden_size=hidden, # layer_num=1, # self_att_dim=self_att_dim, # useBN=False), # BiLstmEncoder(2*hidden, # 64 # hidden_size=hidden, # layer_num=1, # self_att_dim=self_att_dim, # useBN=False) # ]) # self.EncoderNorm = nn.ModuleList([ # nn.LayerNorm(2*hidden), # nn.LayerNorm(2*hidden) # ]) # self.Decoder = StepMaxReduce() # self.Encoder = BiLstmCellEncoder(input_size=embed_size, # hidden_size=hidden, # num_layers=layer_num, # bidirectional=True, # self_att_dim=self_att_dim) self.Decoder = CNNEncoder1D([hidden_size, hidden_size])
# ----------------------------feature-wise affine ------------------------------------- weight = weight.expand_as(x) bias = bias.expand_as(x) #-------------------------------------------------------------------------- return weight*x + bias def penalizedNorm(self): return self.WeightMuplier.norm(), self.BiasMuplier.norm() if __name__ == '__main__': model = BiLstmEncoder(input_size=64)