def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.2) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward(feedfoward_input_dim, activations=[Activation.by_name('relu')(), Activation.by_name('linear')()], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention(num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, configuration): AbstractTorchModule.__init__(self) self.layers = configuration["model_parameters"]["gnn_layers"] self.configuration = configuration self.max_nodes = configuration["task"]["max_nodes"] self.max_query_size = configuration["task"]["max_query_size"] self.max_candidates = configuration["task"]["max_candidates"] embedding_input_dim = 300 self.gcn = QaGNN(dim=512, n_layers=self.layers, n_relations=self.n_edge_types, share_parameters=True) self.node_compress_mlp = torch.nn.Sequential( XavierLinear(embedding_input_dim, 256), torch.nn.Tanh(), torch.nn.Dropout(p=0.2)) self.node_mlp = torch.nn.Sequential(XavierLinear(512, 1024), torch.nn.Tanh(), torch.nn.Dropout(p=0.2), XavierLinear(1024, 512), torch.nn.Tanh(), torch.nn.Dropout(p=0.2)) # self.lstm = LSTM(3072, 256, 2, batch_first=True, bidirectional=True) self.lstm1 = torch.nn.LSTM(embedding_input_dim, 256, num_layers=1, batch_first=True, bidirectional=True, dropout=0) self.lstm2 = torch.nn.LSTM(512, 128, num_layers=1, batch_first=True, bidirectional=True, dropout=0) self.query_dropout = Dropout(p=0.2) self.second_mlp = torch.nn.Sequential(XavierLinear(768, 128), torch.nn.Tanh(), XavierLinear(128, 1), torch.nn.Dropout(p=0.2)) self.loss = CrossEntropyLoss(reduction="none")
def __init__(self, input_dim: int, hidden_dim: int, output_dim: int, dropout_p: float) \ -> None: """Encoder module. :param input_dim: Input dimensionality. :type input_dim: int :param hidden_dim: Hidden dimensionality. :type hidden_dim: int :param output_dim: Output dimensionality. :type output_dim: int :param dropout_p: Dropout. :type dropout_p: float """ super(Encoder, self).__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.dropout: Module = Dropout(p=dropout_p) self.gru_1: GRU = GRU(input_size=self.input_dim, hidden_size=self.hidden_dim, num_layers=1, bias=True, batch_first=True, bidirectional=True) self.gru_2: GRU = GRU(input_size=self.hidden_dim * 2, hidden_size=self.hidden_dim, num_layers=1, bias=True, batch_first=True, bidirectional=True) self.gru_3: GRU = GRU(input_size=self.hidden_dim * 2, hidden_size=self.output_dim, num_layers=1, bias=True, batch_first=True, bidirectional=True) self.gru_1.flatten_parameters() self.gru_2.flatten_parameters() self.gru_3.flatten_parameters()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, pos_tag_embedding: Embedding = None, users_embedding: Embedding = None, dropout: float = 0.1, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None) -> None: super().__init__(vocab, regularizer) self._label_namespace = label_namespace self._dropout = Dropout(dropout) self._text_field_embedder = text_field_embedder self._pos_tag_embedding = pos_tag_embedding or None representation_dim = self._text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += self._pos_tag_embedding.get_output_dim() self._report_cnn = CnnEncoder(representation_dim, 25) self._comment_cnn = CnnEncoder(representation_dim, 25) lstm_input_dim = self._comment_cnn.get_output_dim() self._user_embedding = users_embedding or None if users_embedding is not None: lstm_input_dim += self._user_embedding.get_output_dim() rnn = nn.LSTM(input_size=lstm_input_dim, hidden_size=150, batch_first=True, bidirectional=True) self._encoder = PytorchSeq2SeqWrapper(rnn) self._seq2vec = CnnEncoder(self._encoder.get_output_dim(), 25) self._num_class = self.vocab.get_vocab_size(self._label_namespace) self._bilinear_sim = BilinearSimilarity(self._encoder.get_output_dim(), self._encoder.get_output_dim()) self._projector = FeedForward(self._seq2vec.get_output_dim(), 2, [50, self._num_class], Activation.by_name("sigmoid")(), dropout) self._golden_instances = None self._golden_instances_labels = None self._golden_instances_id = None self._metrics = { "accuracy": CategoricalAccuracy(), "f-measure": F1Measure( positive_label=vocab.get_token_index("feature", "labels")), } self._loss = torch.nn.CrossEntropyLoss() self._contrastive_loss = ContrastiveLoss() self._mse_loss = torch.nn.MSELoss() initializer(self)
def __init__(self, input_size, hidden_size, bilstm_layers, weights_matrix, cam_type, device, context='art', pos_dim=100, src_dim=100, pos_quartiles=4, nr_srcs=3): super(ContextAwareModel, self).__init__() self.input_size = input_size self.hidden_size = hidden_size # + pos_dim + src_dim self.bilstm_layers = bilstm_layers self.device = device self.cam_type = cam_type self.context = context # Store pretrained embeddings to use as representations of sentences self.weights_matrix = torch.tensor(weights_matrix, dtype=torch.float, device=self.device) self.embedding = Embedding.from_pretrained(self.weights_matrix) self.embedding_pos = Embedding(pos_quartiles, pos_dim) # option to embed position of target sentence in article self.embedding_src = Embedding(nr_srcs, src_dim) self.emb_size = weights_matrix.shape[1] # Initialise LSTMS for article and event context self.lstm_art = LSTM(self.input_size, self.hidden_size, num_layers=self.bilstm_layers, bidirectional=True, dropout=0.2) self.lstm_ev1 = LSTM(self.input_size, self.hidden_size, num_layers=self.bilstm_layers, bidirectional=True, dropout=0.2) self.lstm_ev2 = LSTM(self.input_size, self.hidden_size, num_layers=self.bilstm_layers, bidirectional=True, dropout=0.2) # Attention-related attributes # self.attention = BahdanauAttention(self.hidden_size, key_size=self.hidden_size * 2, query_size=self.emb_size) # self.rob_squeezer = nn.Linear(self.emb_size, self.hidden_size) self.dropout = Dropout(0.6) self.num_labels = 2 self.pad_index = 0 if self.context == 'art': self.context_rep_dim = self.emb_size + self.hidden_size * 2 # size of target sentences + 1 article else: self.context_rep_dim = self.emb_size + self.hidden_size * 6 # size of target sentences + 3 articles if self.cam_type == 'cim*': self.context_rep_dim += src_dim # add representation of source self.half_context_rep_dim = int(self.context_rep_dim*0.5) self.dense = nn.Linear(self.context_rep_dim, self.half_context_rep_dim) if self.cam_type == 'cnm': # optional Context Naive setting self.classifier = Linear(self.emb_size, self.num_labels) else: self.classifier = Linear(self.half_context_rep_dim, self.num_labels) # + self.emb_size + src_dim, 2) # self.sigm = Sigmoid()
def __init__( self, out_channels, kernel_size, num_layers, inner_dim, input_shape, stride=1, beta=1, dropout=0.15, activation=Swish, se_activation=torch.nn.Sigmoid, norm=BatchNorm1d, residual=True, ): super().__init__() self.residual = residual self.Convs = Sequential(input_shape=input_shape) for i in range(num_layers): self.Convs.append( DepthwiseSeparableConv1d, out_channels, kernel_size, stride=stride if i == num_layers - 1 else 1, ) self.Convs.append(norm) self.SE = SEmodule( input_shape=self.Convs.get_output_shape(), inner_dim=inner_dim, activation=se_activation, norm=norm, ) self.drop = Dropout(dropout) self.reduced_cov = None if residual: self.reduced_cov = Sequential(input_shape=input_shape) self.reduced_cov.append( Conv1d, out_channels, kernel_size=3, stride=stride, ) self.reduced_cov.append(norm) if isinstance(activation, Swish): self.activation = activation(beta) else: self.activation = activation() self._reset_params()
def __init__(self, args): super(MultiHeadedAttentionMIL, self).__init__() self.dropout = args.dropout width_fe = is_in_args(args, 'width_fe', 64) atn_dim = is_in_args(args, 'atn_dim', 256) self.num_heads = is_in_args(args, 'num_heads', 1) self.feature_depth = is_in_args(args, 'feature_depth', 512) self.dim_heads = atn_dim // self.num_heads assert self.dim_heads * self.num_heads == atn_dim, "atn_dim must be divisible by num_heads" self.attention = Sequential( MultiHeadAttention(args) # Softmax(dim=-2) ) self.classifier = Sequential( Linear(int(args.feature_depth * self.num_heads), width_fe), ReLU(), # Added 25/09 Dropout(p=args.dropout), # Added 25/09 Linear(width_fe, width_fe), ReLU(), # Added 25/09 Dropout(p=args.dropout), # Added 25/09 Linear(width_fe, 1), # Added 25/09 Sigmoid())
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, attention_dropout=0.1, drop_path_rate=0.1): super(TransformerEncoderLayer, self).__init__() self.pre_norm = LayerNorm(d_model) self.self_attn = Attention(dim=d_model, num_heads=nhead, attention_dropout=attention_dropout, projection_dropout=dropout) self.linear1 = Linear(d_model, dim_feedforward) self.dropout1 = Dropout(dropout) self.norm1 = LayerNorm(d_model) self.linear2 = Linear(dim_feedforward, d_model) self.dropout2 = Dropout(dropout) self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0 else Identity() self.activation = F.gelu
def __init__(self, input_dim, dis_dims, pack=10): super(Discriminator, self).__init__() dim = input_dim * pack self.pack = pack self.packdim = dim seq = [] for item in list(dis_dims): seq += [ Linear(dim, item), LeakyReLU(0.2), Dropout(0.5) ] dim = item seq += [Linear(dim, 1)] self.seq = Sequential(*seq)
def __init__(self, input_size, embedding_size, n_classes, dropout=False, k=5, aggr='max', pool_op='max'): super(DECSeq, self).__init__() self.conv1 = EdgeConv( MLP([2 * input_size, 64, 64, 64], batch_norm=True), aggr) self.conv2 = DynamicEdgeConv(MLP([2 * 64, 128], batch_norm=True), k, aggr) self.lin1 = MLP([128 + 64, 1024]) if pool_op == 'max': self.pool = global_max_pool if dropout: self.mlp = Seq(MLP([1024, 512], batch_norm=True), Dropout(0.5), MLP([512, 256], batch_norm=True), Dropout(0.5), Lin(256, n_classes)) else: self.mlp = Seq(MLP([1024, 512]), MLP([512, 256]), Lin(256, n_classes))
def build_fc_layers(fc_layers: List, activation: Optional[str] = None, dropout: float = 0.0): components = [] for i, (x, y) in enumerate(zip(fc_layers[:-1], fc_layers[1:])): if i != 0: if activation: components.append(ACTIVATION_MAPPING[activation]) if dropout > 0: components.append(Dropout(dropout)) components.append(Linear(x, y)) return nn.Sequential(*components)
def __init__(self,hidden_size,output_size,embeddings,dropout_p=0.1,max_length=40): super(AttnDecoderRNN, self).__init__() self.hidden_size=hidden_size self.embedding_size=embeddings.size(1) self.output_size=output_size self.dropout_p=dropout_p self.max_length=max_length self.embedding=Embedding(self.output_size,self.embedding_size) self.embedding.weight = Parameter(embeddings) self.attn=Linear(self.hidden_size+self.embedding_size,self.max_length) self.attn_combine=Linear(self.hidden_size+self.embedding_size,self.hidden_size) self.dropout=Dropout(self.dropout_p) self.gru=GRU(self.hidden_size,self.hidden_size) self.out=Linear(self.hidden_size,self.output_size)
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", layer_norm_eps=1e-5, batch_first=False): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first) # Implementation of Feedforward model self.linear1 = Linear(d_model, dim_feedforward) self.dropout = Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm1 = LayerNorm(d_model, eps=layer_norm_eps) self.norm2 = LayerNorm(d_model, eps=layer_norm_eps) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu", gating="moe"): super(TransformerDecoderLayer, self).__init__() if gating == "moe": self.self_attn = MoE(d_model, nhead, num_experts=4, dropout=dropout) elif gating == "mog": self.self_attn = MoG(d_model, nhead, num_experts=4, dropout=dropout) else: raise Exception( "Provide a valid gating procedure: can either be moe or mog, you have provided ", gating) self.linear1 = Linear(d_model, dim_feedforward) self.dropout = Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.norm3 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.dropout3 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self, in_channel,out_channel,bn,dr_p ,no_tail=False): super(LinearBlock, self).__init__() mylist=ModuleList() mylist.append(Linear( in_channel,out_channel)) if no_tail==False: if bn==1: mylist.append(BatchNorm1d(out_channel) ) mylist.append(ReLU()) if dr_p>0: mylist.append(Dropout(dr_p) ) self.block= Sequential(*mylist)
def __init__(self, model_name_or_path: str, dropout: float, num_intent_labels: int, use_observers: bool = False): super(ExampleIntentBertModel, self).__init__() #self.bert_model = BertModel.from_pretrained(model_name_or_path) self.bert_model = BertModel( BertConfig.from_pretrained(model_name_or_path, output_attentions=True)) self.dropout = Dropout(dropout) self.num_intent_labels = num_intent_labels self.use_observers = use_observers self.all_outputs = []
def __init__(self, config): super(Attention, self).__init__() self.config = config self.num_attention_heads = config.num_heads self.attention_head_size = int(config.hidden_size / config.num_heads) self.all_head_size = self.num_attention_heads * self.attention_head_size self.query = Linear(config.hidden_size, self.all_head_size) self.key = Linear(config.hidden_size, self.all_head_size) self.value = Linear(config.hidden_size, self.all_head_size) self.o_proj = Linear(config.hidden_size, config.hidden_size) self.dropout = Dropout(config.dropout_prob) self.softmax = Softmax(dim=-1)
def __init__(self, in_dim, hidden_dim, out_dim, dropout=0., name='gat', residual=True, use_mlp=False, join_with_mlp=False): super(GNNModelDGL, self).__init__() self.name = name self.use_mlp = use_mlp self.join_with_mlp = join_with_mlp self.normalize_input_columns = True if use_mlp: self.mlp = MLPRegressor(in_dim, hidden_dim, out_dim) if join_with_mlp: in_dim += out_dim else: in_dim = out_dim if name == 'gat': self.l1 = GATConvDGL(in_dim, hidden_dim // 8, 8, feat_drop=dropout, attn_drop=dropout, residual=False, activation=F.elu) self.l2 = GATConvDGL(hidden_dim, out_dim, 1, feat_drop=dropout, attn_drop=dropout, residual=residual, activation=None) elif name == 'gcn': self.l1 = GraphConv(in_dim, hidden_dim, activation=F.elu) self.l2 = GraphConv(hidden_dim, out_dim, activation=F.elu) self.drop = Dropout(p=dropout) elif name == 'cheb': self.l1 = ChebConvDGL(in_dim, hidden_dim, k=3) self.l2 = ChebConvDGL(hidden_dim, out_dim, k=3) self.drop = Dropout(p=dropout) elif name == 'agnn': self.lin1 = Sequential(Dropout(p=dropout), Linear(in_dim, hidden_dim), ELU()) self.l1 = AGNNConvDGL(learn_beta=False) self.l2 = AGNNConvDGL(learn_beta=True) self.lin2 = Sequential(Dropout(p=dropout), Linear(hidden_dim, out_dim), ELU()) elif name == 'appnp': self.lin1 = Sequential(Dropout(p=dropout), Linear(in_dim, hidden_dim), ReLU(), Dropout(p=dropout), Linear(hidden_dim, out_dim)) self.l1 = APPNPConv(k=10, alpha=0.1, edge_drop=0.)
def __init__(self, channels=3, flat_length=968, labels=2, dropout=0.25): super().__init__( Conv2d(in_channels=channels, out_channels=16, kernel_size=(3, 3), padding=1), ReLU(), BatchNorm2d(16), MaxPool2d(kernel_size=(3, 3), padding=1), BatchNorm2d(16), Conv2d(in_channels=16, out_channels=8, kernel_size=(3, 3), padding=1), ReLU(), BatchNorm2d(8), MaxPool2d(kernel_size=(3, 3), padding=1), Flatten(), Dropout(p=dropout), Linear(flat_length, flat_length // 2), Sigmoid(), Dropout(p=dropout), Linear(flat_length // 2, labels), )
def __init__(self): super(TagDecoder, self).__init__() self.tag_decoder = Sequential( Linear(1152, 512), BatchNorm1d(512), ReLU(), Dropout(.25), Linear(512, 512), BatchNorm1d(512), ReLU(), Linear(512, 1000), BatchNorm1d(1000), Sigmoid(), )
def __init__(self, config, dropout=0.2): super(TDConway, self).__init__() self.stack_1 = ModuleList([ ResidualConvStack(3, 64, layer_structure=[1, 2, 2], initial_depth=config.num_channels), ResidualConvStack(5, 64, layer_structure=[1, 2, 2], initial_depth=config.num_channels), ]) self.stack_2 = ModuleList([ ResidualConvStack(1, 64 * 2, layer_structure=[0, 2, 2]), ResidualConvStack(3, 64 * 2, layer_structure=[0, 2, 2]), ResidualConvStack(5, 64 * 2, layer_structure=[0, 2, 2]), ]) self.fc = Sequential( Flatten(), Linear(64 * 2 * 3 * config.rows * config.cols, 512), SELU(), Dropout(dropout), Linear(512, 2048), SELU(), Dropout(dropout), Linear(2048, 1), Sigmoid())
def __init__(self): super(AudioEncoder, self).__init__() self.audio_encoder = Sequential( Conv2d(1, 128, kernel_size=4, stride=2, padding=1, padding_mode='zeros'), BatchNorm2d(128), ReLU(), # 128x48x48 Dropout(.25), Conv2d(128, 128, kernel_size=4, stride=2, padding=1, padding_mode='zeros'), BatchNorm2d(128), ReLU(), # 128x24x24 Dropout(.25), Conv2d(128, 128, kernel_size=4, stride=2, padding=1, padding_mode='zeros'), BatchNorm2d(128), ReLU(), # 128x12x12 Dropout(.25), Conv2d(128, 128, kernel_size=4, stride=2, padding=1, padding_mode='zeros'), BatchNorm2d(128), ReLU(), # 128x6x6 Dropout(.25), Conv2d(128, 128, kernel_size=4, stride=2, padding=1, padding_mode='zeros'), BatchNorm2d(128), ReLU(), # 128x3x3 Dropout(.25), Flatten(), ) self.fc_audio = Sequential( Linear(1152, 1152, bias=False), Dropout(0.25), )
def __init__(self, input_size, embedding_size, n_classes, dropout=True, k=5, aggr='max', pool_op='max', k_global=25): super(DECSeqGlob, self).__init__() self.k_global = k_global self.conv1 = EdgeConv(MLP([2 * 3, 64, 64, 64]), aggr) self.conv2 = DynamicEdgeConv(MLP([2 * 64, 128]), k, aggr) self.lin1 = MLP([128 + 64, 1024]) if pool_op == 'max': self.pool = global_max_pool if dropout: self.mlp = Seq(MLP([1024, 512]), Dropout(0.5), MLP([512, 256]), Dropout(0.5), MLP([256, 32])) else: self.mlp = Seq(MLP([1024, 512]), MLP([512, 256]), MLP([256, 32])) self.lin = Lin(256, n_classes) # self.conv_glob = EdgeConv(MLP([2 * 32, 32]), aggr) self.conv_glob = GATConv(32, 32, heads=8, dropout=0.5, concat=True)
def __init__(self, hyper_param, word_embedding, char_embedding, label_vocab_size, device): super(BaseLM, self).__init__() self.device = device word_embeddings_weight = torch.FloatTensor(word_embedding) self.word_matrix = Embedding.from_pretrained(word_embeddings_weight, freeze=False) char_embeddings_weight = torch.FloatTensor(char_embedding) self.char_matrix = Embedding.from_pretrained(char_embeddings_weight, freeze=False) self.char_cnn = CharCNN(hyper_param.drop_out, hyper_param.char_embed_dim, hyper_param.char_cnn_kernels) self.lstm_input_size = hyper_param.word_embed_dim + hyper_param.char_cnn_kernels * 3 self.lstm = LSTM(self.lstm_input_size, hyper_param.lstm_hidden, batch_first=True, bidirectional=True) self.drop_out = Dropout(p=hyper_param.drop_out)
def __init__(self, chunk_size=32, bits=8, rounds=4, masked=False, softmax_temp=None, dropout_rate=0.1): super(ReformerAttention, self).__init__() self.chunk_size = chunk_size self.bits = bits self.rounds = rounds self.masked = masked self.softmax_temp = softmax_temp self.dropout = Dropout(dropout_rate)
def forward(self, inputs): """Return the outputs from a forward pass of the network :param inputs: batch of input images, of shape (BATCH_SIZE, n_channels, height, width) :type inputs: torch.Tensor :return: outputs of SimpleCNN, of shape (BATCH_SIZE, NUM_CLASSES) :rtype: torch.Tensor """ layer = self.conv1(inputs) layer = ReLU()(layer) layer = self.conv2(layer) layer = ReLU()(layer) layer = MaxPool2d(kernel_size=(2, 2))(layer) layer = Dropout(0.25)(layer) layer = layer.view(layer.size(0), -1) layer = self.linear1(layer) layer = Dropout(0.5)(layer) outputs = self.linear2(layer) return outputs
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"): super(TransformerDecoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model self.linear1 = Linear(d_model, dim_feedforward) self.dropout = Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.norm3 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.dropout3 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self): super(Net1, self).__init__() self.features = nn.Sequential( DSC(5, 64, 3, 2, 1), BatchNorm2d(64), ReLU(inplace=True), DSC(64, 192, 3, 2, 1), BatchNorm2d(192), ReLU(inplace=True), DSC(192, 384, 3, 1, 1), BatchNorm2d(384), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False), DSC(384, 512, 3, 1, 1), BatchNorm2d(512), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False), DSC(512, 512, 3, 1, 1), BatchNorm2d(512), ReLU(inplace=True), DSC(512, 1024, 3, 1, 1), BatchNorm2d(1024), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False), DSC(1024, 1024, 3, 1, 1), BatchNorm2d(1024), ReLU(inplace=True), DSC(1024, 512, 3, 1, 1), BatchNorm2d(512), ReLU(inplace=True), DSC(512, 256, 3, 1, 1), BatchNorm2d(256), ReLU(inplace=True), CBAM(256)) self.avgpool = AdaptiveAvgPool2d(output_size=(6, 6)) self.classifier = nn.Sequential( Dropout(p=0.5), Linear(in_features=9216, out_features=2048, bias=True), ReLU(inplace=True), Dropout(p=0.5), Linear(in_features=2048, out_features=1024, bias=True), ReLU(inplace=True), Dropout(p=0.5), Linear(in_features=1024, out_features=3, bias=True))
def __init__(self, input_size, num_layers, mode='ir', drop_ratio=0.4, affine=True): super(Backbone, self).__init__() assert input_size in [112, 224], "input_size should be 112 or 224" assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152" assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se" blocks = get_blocks(num_layers) if mode == 'ir': unit_module = bottleneck_IR elif mode == 'ir_se': unit_module = bottleneck_IR_SE self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64), PReLU(64)) if input_size == 112: self.output_layer = Sequential(BatchNorm2d(512), Dropout(drop_ratio), Flatten(), Linear(512 * 7 * 7, 512), BatchNorm1d(512, affine=affine)) else: self.output_layer = Sequential(BatchNorm2d(512), Dropout(drop_ratio), Flatten(), Linear(512 * 14 * 14, 512), BatchNorm1d(512, affine=affine)) modules = [] for block in blocks: for bottleneck in block: modules.append( unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)) self.body = Sequential(*modules)
def __init__(self, bert_model, n_bertlayers=1, dropout=0., num_labels=1, no_pooler=False): super().__init__() if isinstance(bert_model, str): self.bert = get_pretrained_bert(bert_model, num_hidden_layers=n_bertlayers) elif isinstance(bert_model, BertModel): self.bert = bert_model self.dropout = Dropout(dropout) self.classifier = Linear(self.bert.config.hidden_size, num_labels) self.no_pooler = no_pooler